160 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			160 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| """
 | |
| Monitor the inbox directory for Markdown files and convert them to DOCX/PDF outputs.
 | |
| 
 | |
| The script runs indefinitely inside the container, polling the inbox for new files.
 | |
| When a Markdown file is found, pandoc generates DOCX and PDF outputs using the
 | |
| reference templates, places the results in a timestamped outbox path, and moves the
 | |
| original Markdown file into the processed directory.
 | |
| """
 | |
| 
 | |
| import logging
 | |
| import shutil
 | |
| import subprocess
 | |
| import time
 | |
| from datetime import datetime
 | |
| from pathlib import Path
 | |
| 
 | |
| INBOX = Path("/data/inbox")
 | |
| OUTBOX = Path("/data/outbox")
 | |
| PROCESSED = Path("/data/processed")
 | |
| FAILED = Path("/data/failed")
 | |
| TEMPLATES = Path("/templates")
 | |
| 
 | |
| DOCX_TEMPLATE = TEMPLATES / "resume-reference.docx"
 | |
| TEX_TEMPLATE = TEMPLATES / "resume-template.tex"
 | |
| 
 | |
| POLL_INTERVAL_SECONDS = 5
 | |
| 
 | |
| 
 | |
| def ensure_environment() -> None:
 | |
|     """Verify required files and directories exist before processing starts."""
 | |
|     missing = []
 | |
|     for path in (INBOX, OUTBOX, PROCESSED, FAILED, DOCX_TEMPLATE, TEX_TEMPLATE):
 | |
|         if not path.exists():
 | |
|             missing.append(str(path))
 | |
| 
 | |
|     if missing:
 | |
|         raise FileNotFoundError(
 | |
|             "Required paths are missing inside the container: " + ", ".join(missing)
 | |
|         )
 | |
| 
 | |
| 
 | |
| def run_pandoc(input_md: Path, output_docx: Path, output_pdf: Path) -> None:
 | |
|     """Invoke pandoc twice to create DOCX and PDF artifacts."""
 | |
|     subprocess.run(
 | |
|         [
 | |
|             "pandoc",
 | |
|             str(input_md),
 | |
|             "--from",
 | |
|             "gfm",
 | |
|             "--to",
 | |
|             "docx",
 | |
|             "--reference-doc",
 | |
|             str(DOCX_TEMPLATE),
 | |
|             "--output",
 | |
|             str(output_docx),
 | |
|         ],
 | |
|         check=True,
 | |
|     )
 | |
| 
 | |
|     subprocess.run(
 | |
|         [
 | |
|             "pandoc",
 | |
|             str(input_md),
 | |
|             "--from",
 | |
|             "gfm",
 | |
|             "--pdf-engine",
 | |
|             "xelatex",
 | |
|             "--template",
 | |
|             str(TEX_TEMPLATE),
 | |
|             "--output",
 | |
|             str(output_pdf),
 | |
|         ],
 | |
|         check=True,
 | |
|     )
 | |
| 
 | |
| 
 | |
| def build_timestamp_dir(base: Path, timestamp: datetime) -> Path:
 | |
|     """Create (if needed) and return the timestamped directory under base."""
 | |
|     subdir = (
 | |
|         base
 | |
|         / timestamp.strftime("%Y")
 | |
|         / timestamp.strftime("%m")
 | |
|         / timestamp.strftime("%d")
 | |
|         / timestamp.strftime("%H%M")
 | |
|     )
 | |
|     subdir.mkdir(parents=True, exist_ok=True)
 | |
|     return subdir
 | |
| 
 | |
| 
 | |
| def process_markdown(md_file: Path) -> None:
 | |
|     """Convert the Markdown file and move it into the processed directory."""
 | |
|     timestamp = datetime.now().astimezone()
 | |
|     out_dir = build_timestamp_dir(OUTBOX, timestamp)
 | |
|     processed_dir = build_timestamp_dir(PROCESSED, timestamp)
 | |
| 
 | |
|     stem = md_file.stem
 | |
|     output_docx = out_dir / f"{stem}.docx"
 | |
|     output_pdf = out_dir / f"{stem}.pdf"
 | |
| 
 | |
|     logging.info("Processing %s", md_file.name)
 | |
|     run_pandoc(md_file, output_docx, output_pdf)
 | |
| 
 | |
|     processed_target = processed_dir / md_file.name
 | |
|     counter = 1
 | |
|     while processed_target.exists():
 | |
|         processed_target = processed_dir / f"{stem}_{counter}.md"
 | |
|         counter += 1
 | |
| 
 | |
|     shutil.move(str(md_file), processed_target)
 | |
|     logging.info("Completed %s -> %s (processed archived at %s)", md_file.name, out_dir, processed_target)
 | |
| 
 | |
| 
 | |
| def move_to_failed(md_file: Path) -> None:
 | |
|     """Move the markdown file into the failed directory to avoid repeated retries."""
 | |
|     if not md_file.exists():
 | |
|         return
 | |
| 
 | |
|     stem = md_file.stem
 | |
|     failed_target = FAILED / md_file.name
 | |
|     counter = 1
 | |
|     while failed_target.exists():
 | |
|         failed_target = FAILED / f"{stem}_{counter}.md"
 | |
|         counter += 1
 | |
| 
 | |
|     FAILED.mkdir(parents=True, exist_ok=True)
 | |
|     shutil.move(str(md_file), failed_target)
 | |
|     logging.info("Archived %s in failed directory at %s", md_file.name, failed_target)
 | |
| 
 | |
| 
 | |
| def main() -> None:
 | |
|     logging.basicConfig(
 | |
|         level=logging.INFO,
 | |
|         format="%(asctime)s [%(levelname)s] %(message)s",
 | |
|     )
 | |
| 
 | |
|     ensure_environment()
 | |
|     logging.info("Resume customizer watcher started")
 | |
| 
 | |
|     while True:
 | |
|         md_files = sorted(INBOX.glob("*.md"))
 | |
|         if not md_files:
 | |
|             time.sleep(POLL_INTERVAL_SECONDS)
 | |
|             continue
 | |
| 
 | |
|         for md_file in md_files:
 | |
|             try:
 | |
|                 process_markdown(md_file)
 | |
|             except subprocess.CalledProcessError as exc:
 | |
|                 logging.error("Pandoc failed for %s: %s", md_file.name, exc)
 | |
|                 move_to_failed(md_file)
 | |
|             except Exception as exc:  # noqa: BLE001
 | |
|                 logging.exception("Unexpected error while processing %s: %s", md_file.name, exc)
 | |
| 
 | |
|         time.sleep(POLL_INTERVAL_SECONDS)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |