#!/usr/bin/env python3 """ Monitor the inbox directory for Markdown files and convert them to DOCX/PDF outputs. The script runs indefinitely inside the container, polling the inbox for new files. When a Markdown file is found, pandoc generates DOCX and PDF outputs using the reference templates, places the results in a timestamped outbox path, and moves the original Markdown file into the processed directory. """ import logging import shutil import subprocess import time from datetime import datetime from pathlib import Path INBOX = Path("/data/inbox") OUTBOX = Path("/data/outbox") PROCESSED = Path("/data/processed") FAILED = Path("/data/failed") TEMPLATES = Path("/templates") TEMPLATE_CACHE = Path("/tmp/templates") DOCX_TEMPLATE = TEMPLATES / "resume-reference.docx" DOCX_TEMPLATE_EXAMPLE = TEMPLATES / "resume-reference.docx.example" TEX_TEMPLATE = TEMPLATES / "resume-template.tex" TEX_TEMPLATE_EXAMPLE = TEMPLATES / "resume-template.tex.example" RESOLVED_DOCX_TEMPLATE: Path | None = None RESOLVED_TEX_TEMPLATE: Path | None = None POLL_INTERVAL_SECONDS = 5 def resolve_template(primary: Path, example: Path, cache_dir: Path) -> Path: """Return the template path, copying .example into a writable cache if needed.""" if primary.exists(): return primary if example.exists(): cache_dir.mkdir(parents=True, exist_ok=True) cached = cache_dir / primary.name shutil.copy(example, cached) return cached raise FileNotFoundError(f"Template missing: {primary} (no example found)") def ensure_environment() -> None: """Verify required files and directories exist before processing starts.""" global RESOLVED_DOCX_TEMPLATE, RESOLVED_TEX_TEMPLATE missing = [] for path in (INBOX, OUTBOX, PROCESSED, FAILED, TEMPLATES): if not path.exists(): missing.append(str(path)) if missing: raise FileNotFoundError( "Required paths are missing inside the container: " + ", ".join(missing) ) RESOLVED_DOCX_TEMPLATE = resolve_template(DOCX_TEMPLATE, DOCX_TEMPLATE_EXAMPLE, TEMPLATE_CACHE) RESOLVED_TEX_TEMPLATE = resolve_template(TEX_TEMPLATE, TEX_TEMPLATE_EXAMPLE, TEMPLATE_CACHE) def run_pandoc(input_md: Path, output_docx: Path, output_pdf: Path) -> None: """Invoke pandoc twice to create DOCX and PDF artifacts.""" subprocess.run( [ "pandoc", str(input_md), "--from", "gfm", "--to", "docx", "--reference-doc", str(RESOLVED_DOCX_TEMPLATE), "--output", str(output_docx), ], check=True, ) subprocess.run( [ "pandoc", str(input_md), "--from", "gfm", "--pdf-engine", "xelatex", "--template", str(RESOLVED_TEX_TEMPLATE), "--output", str(output_pdf), ], check=True, ) def build_timestamp_dir(base: Path, timestamp: datetime) -> Path: """Create (if needed) and return the timestamped directory under base.""" subdir = ( base / timestamp.strftime("%Y") / timestamp.strftime("%m") / timestamp.strftime("%d") / timestamp.strftime("%H%M") ) subdir.mkdir(parents=True, exist_ok=True) return subdir def process_markdown(md_file: Path) -> None: """Convert the Markdown file and move it into the processed directory.""" timestamp = datetime.now().astimezone() out_dir = build_timestamp_dir(OUTBOX, timestamp) processed_dir = build_timestamp_dir(PROCESSED, timestamp) stem = md_file.stem output_docx = out_dir / f"{stem}.docx" output_pdf = out_dir / f"{stem}.pdf" logging.info("Processing %s", md_file.name) try: run_pandoc(md_file, output_docx, output_pdf) except subprocess.CalledProcessError: output_docx.unlink(missing_ok=True) output_pdf.unlink(missing_ok=True) raise processed_target = processed_dir / md_file.name counter = 1 while processed_target.exists(): processed_target = processed_dir / f"{stem}_{counter}.md" counter += 1 shutil.move(str(md_file), processed_target) logging.info("Completed %s -> %s (processed archived at %s)", md_file.name, out_dir, processed_target) def move_to_failed(md_file: Path) -> None: """Move the markdown file into the failed directory to avoid repeated retries.""" if not md_file.exists(): return stem = md_file.stem failed_target = FAILED / md_file.name counter = 1 while failed_target.exists(): failed_target = FAILED / f"{stem}_{counter}.md" counter += 1 FAILED.mkdir(parents=True, exist_ok=True) shutil.move(str(md_file), failed_target) logging.info("Archived %s in failed directory at %s", md_file.name, failed_target) def main() -> None: logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", ) ensure_environment() logging.info("Resume customizer watcher started") while True: md_files = sorted(INBOX.glob("*.md")) if not md_files: time.sleep(POLL_INTERVAL_SECONDS) continue for md_file in md_files: try: process_markdown(md_file) except subprocess.CalledProcessError as exc: logging.error("Pandoc failed for %s: %s", md_file.name, exc) move_to_failed(md_file) except Exception as exc: # noqa: BLE001 logging.exception("Unexpected error while processing %s: %s", md_file.name, exc) time.sleep(POLL_INTERVAL_SECONDS) if __name__ == "__main__": main()