160 lines
4.6 KiB
Python
Executable File
160 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Monitor the inbox directory for Markdown files and convert them to DOCX/PDF outputs.
|
|
|
|
The script runs indefinitely inside the container, polling the inbox for new files.
|
|
When a Markdown file is found, pandoc generates DOCX and PDF outputs using the
|
|
reference templates, places the results in a timestamped outbox path, and moves the
|
|
original Markdown file into the processed directory.
|
|
"""
|
|
|
|
import logging
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
INBOX = Path("/data/inbox")
|
|
OUTBOX = Path("/data/outbox")
|
|
PROCESSED = Path("/data/processed")
|
|
FAILED = Path("/data/failed")
|
|
TEMPLATES = Path("/templates")
|
|
|
|
DOCX_TEMPLATE = TEMPLATES / "resume-reference.docx"
|
|
TEX_TEMPLATE = TEMPLATES / "resume-template.tex"
|
|
|
|
POLL_INTERVAL_SECONDS = 5
|
|
|
|
|
|
def ensure_environment() -> None:
|
|
"""Verify required files and directories exist before processing starts."""
|
|
missing = []
|
|
for path in (INBOX, OUTBOX, PROCESSED, FAILED, DOCX_TEMPLATE, TEX_TEMPLATE):
|
|
if not path.exists():
|
|
missing.append(str(path))
|
|
|
|
if missing:
|
|
raise FileNotFoundError(
|
|
"Required paths are missing inside the container: " + ", ".join(missing)
|
|
)
|
|
|
|
|
|
def run_pandoc(input_md: Path, output_docx: Path, output_pdf: Path) -> None:
|
|
"""Invoke pandoc twice to create DOCX and PDF artifacts."""
|
|
subprocess.run(
|
|
[
|
|
"pandoc",
|
|
str(input_md),
|
|
"--from",
|
|
"gfm",
|
|
"--to",
|
|
"docx",
|
|
"--reference-doc",
|
|
str(DOCX_TEMPLATE),
|
|
"--output",
|
|
str(output_docx),
|
|
],
|
|
check=True,
|
|
)
|
|
|
|
subprocess.run(
|
|
[
|
|
"pandoc",
|
|
str(input_md),
|
|
"--from",
|
|
"gfm",
|
|
"--pdf-engine",
|
|
"xelatex",
|
|
"--template",
|
|
str(TEX_TEMPLATE),
|
|
"--output",
|
|
str(output_pdf),
|
|
],
|
|
check=True,
|
|
)
|
|
|
|
|
|
def build_timestamp_dir(base: Path, timestamp: datetime) -> Path:
|
|
"""Create (if needed) and return the timestamped directory under base."""
|
|
subdir = (
|
|
base
|
|
/ timestamp.strftime("%Y")
|
|
/ timestamp.strftime("%m")
|
|
/ timestamp.strftime("%d")
|
|
/ timestamp.strftime("%H%M")
|
|
)
|
|
subdir.mkdir(parents=True, exist_ok=True)
|
|
return subdir
|
|
|
|
|
|
def process_markdown(md_file: Path) -> None:
|
|
"""Convert the Markdown file and move it into the processed directory."""
|
|
timestamp = datetime.now().astimezone()
|
|
out_dir = build_timestamp_dir(OUTBOX, timestamp)
|
|
processed_dir = build_timestamp_dir(PROCESSED, timestamp)
|
|
|
|
stem = md_file.stem
|
|
output_docx = out_dir / f"{stem}.docx"
|
|
output_pdf = out_dir / f"{stem}.pdf"
|
|
|
|
logging.info("Processing %s", md_file.name)
|
|
run_pandoc(md_file, output_docx, output_pdf)
|
|
|
|
processed_target = processed_dir / md_file.name
|
|
counter = 1
|
|
while processed_target.exists():
|
|
processed_target = processed_dir / f"{stem}_{counter}.md"
|
|
counter += 1
|
|
|
|
shutil.move(str(md_file), processed_target)
|
|
logging.info("Completed %s -> %s (processed archived at %s)", md_file.name, out_dir, processed_target)
|
|
|
|
|
|
def move_to_failed(md_file: Path) -> None:
|
|
"""Move the markdown file into the failed directory to avoid repeated retries."""
|
|
if not md_file.exists():
|
|
return
|
|
|
|
stem = md_file.stem
|
|
failed_target = FAILED / md_file.name
|
|
counter = 1
|
|
while failed_target.exists():
|
|
failed_target = FAILED / f"{stem}_{counter}.md"
|
|
counter += 1
|
|
|
|
FAILED.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(md_file), failed_target)
|
|
logging.info("Archived %s in failed directory at %s", md_file.name, failed_target)
|
|
|
|
|
|
def main() -> None:
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
)
|
|
|
|
ensure_environment()
|
|
logging.info("Resume customizer watcher started")
|
|
|
|
while True:
|
|
md_files = sorted(INBOX.glob("*.md"))
|
|
if not md_files:
|
|
time.sleep(POLL_INTERVAL_SECONDS)
|
|
continue
|
|
|
|
for md_file in md_files:
|
|
try:
|
|
process_markdown(md_file)
|
|
except subprocess.CalledProcessError as exc:
|
|
logging.error("Pandoc failed for %s: %s", md_file.name, exc)
|
|
move_to_failed(md_file)
|
|
except Exception as exc: # noqa: BLE001
|
|
logging.exception("Unexpected error while processing %s: %s", md_file.name, exc)
|
|
|
|
time.sleep(POLL_INTERVAL_SECONDS)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|