Files
ResumeCustomizer/output/Docker/watch_and_convert.py

160 lines
4.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Monitor the inbox directory for Markdown files and convert them to DOCX/PDF outputs.
The script runs indefinitely inside the container, polling the inbox for new files.
When a Markdown file is found, pandoc generates DOCX and PDF outputs using the
reference templates, places the results in a timestamped outbox path, and moves the
original Markdown file into the processed directory.
"""
import logging
import shutil
import subprocess
import time
from datetime import datetime
from pathlib import Path
INBOX = Path("/data/inbox")
OUTBOX = Path("/data/outbox")
PROCESSED = Path("/data/processed")
FAILED = Path("/data/failed")
TEMPLATES = Path("/templates")
DOCX_TEMPLATE = TEMPLATES / "resume-reference.docx"
TEX_TEMPLATE = TEMPLATES / "resume-template.tex"
POLL_INTERVAL_SECONDS = 5
def ensure_environment() -> None:
"""Verify required files and directories exist before processing starts."""
missing = []
for path in (INBOX, OUTBOX, PROCESSED, FAILED, DOCX_TEMPLATE, TEX_TEMPLATE):
if not path.exists():
missing.append(str(path))
if missing:
raise FileNotFoundError(
"Required paths are missing inside the container: " + ", ".join(missing)
)
def run_pandoc(input_md: Path, output_docx: Path, output_pdf: Path) -> None:
"""Invoke pandoc twice to create DOCX and PDF artifacts."""
subprocess.run(
[
"pandoc",
str(input_md),
"--from",
"gfm",
"--to",
"docx",
"--reference-doc",
str(DOCX_TEMPLATE),
"--output",
str(output_docx),
],
check=True,
)
subprocess.run(
[
"pandoc",
str(input_md),
"--from",
"gfm",
"--pdf-engine",
"xelatex",
"--template",
str(TEX_TEMPLATE),
"--output",
str(output_pdf),
],
check=True,
)
def build_timestamp_dir(base: Path, timestamp: datetime) -> Path:
"""Create (if needed) and return the timestamped directory under base."""
subdir = (
base
/ timestamp.strftime("%Y")
/ timestamp.strftime("%m")
/ timestamp.strftime("%d")
/ timestamp.strftime("%H%M")
)
subdir.mkdir(parents=True, exist_ok=True)
return subdir
def process_markdown(md_file: Path) -> None:
"""Convert the Markdown file and move it into the processed directory."""
timestamp = datetime.now().astimezone()
out_dir = build_timestamp_dir(OUTBOX, timestamp)
processed_dir = build_timestamp_dir(PROCESSED, timestamp)
stem = md_file.stem
output_docx = out_dir / f"{stem}.docx"
output_pdf = out_dir / f"{stem}.pdf"
logging.info("Processing %s", md_file.name)
run_pandoc(md_file, output_docx, output_pdf)
processed_target = processed_dir / md_file.name
counter = 1
while processed_target.exists():
processed_target = processed_dir / f"{stem}_{counter}.md"
counter += 1
shutil.move(str(md_file), processed_target)
logging.info("Completed %s -> %s (processed archived at %s)", md_file.name, out_dir, processed_target)
def move_to_failed(md_file: Path) -> None:
"""Move the markdown file into the failed directory to avoid repeated retries."""
if not md_file.exists():
return
stem = md_file.stem
failed_target = FAILED / md_file.name
counter = 1
while failed_target.exists():
failed_target = FAILED / f"{stem}_{counter}.md"
counter += 1
FAILED.mkdir(parents=True, exist_ok=True)
shutil.move(str(md_file), failed_target)
logging.info("Archived %s in failed directory at %s", md_file.name, failed_target)
def main() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
ensure_environment()
logging.info("Resume customizer watcher started")
while True:
md_files = sorted(INBOX.glob("*.md"))
if not md_files:
time.sleep(POLL_INTERVAL_SECONDS)
continue
for md_file in md_files:
try:
process_markdown(md_file)
except subprocess.CalledProcessError as exc:
logging.error("Pandoc failed for %s: %s", md_file.name, exc)
move_to_failed(md_file)
except Exception as exc: # noqa: BLE001
logging.exception("Unexpected error while processing %s: %s", md_file.name, exc)
time.sleep(POLL_INTERVAL_SECONDS)
if __name__ == "__main__":
main()