feat(output): add wrapper and failed conversion handling
This commit is contained in:
159
output/Docker/watch_and_convert.py
Executable file
159
output/Docker/watch_and_convert.py
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Monitor the inbox directory for Markdown files and convert them to DOCX/PDF outputs.
|
||||
|
||||
The script runs indefinitely inside the container, polling the inbox for new files.
|
||||
When a Markdown file is found, pandoc generates DOCX and PDF outputs using the
|
||||
reference templates, places the results in a timestamped outbox path, and moves the
|
||||
original Markdown file into the processed directory.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
INBOX = Path("/data/inbox")
|
||||
OUTBOX = Path("/data/outbox")
|
||||
PROCESSED = Path("/data/processed")
|
||||
FAILED = Path("/data/failed")
|
||||
TEMPLATES = Path("/templates")
|
||||
|
||||
DOCX_TEMPLATE = TEMPLATES / "resume-reference.docx"
|
||||
TEX_TEMPLATE = TEMPLATES / "resume-template.tex"
|
||||
|
||||
POLL_INTERVAL_SECONDS = 5
|
||||
|
||||
|
||||
def ensure_environment() -> None:
|
||||
"""Verify required files and directories exist before processing starts."""
|
||||
missing = []
|
||||
for path in (INBOX, OUTBOX, PROCESSED, FAILED, DOCX_TEMPLATE, TEX_TEMPLATE):
|
||||
if not path.exists():
|
||||
missing.append(str(path))
|
||||
|
||||
if missing:
|
||||
raise FileNotFoundError(
|
||||
"Required paths are missing inside the container: " + ", ".join(missing)
|
||||
)
|
||||
|
||||
|
||||
def run_pandoc(input_md: Path, output_docx: Path, output_pdf: Path) -> None:
|
||||
"""Invoke pandoc twice to create DOCX and PDF artifacts."""
|
||||
subprocess.run(
|
||||
[
|
||||
"pandoc",
|
||||
str(input_md),
|
||||
"--from",
|
||||
"gfm",
|
||||
"--to",
|
||||
"docx",
|
||||
"--reference-doc",
|
||||
str(DOCX_TEMPLATE),
|
||||
"--output",
|
||||
str(output_docx),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"pandoc",
|
||||
str(input_md),
|
||||
"--from",
|
||||
"gfm",
|
||||
"--pdf-engine",
|
||||
"xelatex",
|
||||
"--template",
|
||||
str(TEX_TEMPLATE),
|
||||
"--output",
|
||||
str(output_pdf),
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
|
||||
def build_timestamp_dir(base: Path, timestamp: datetime) -> Path:
|
||||
"""Create (if needed) and return the timestamped directory under base."""
|
||||
subdir = (
|
||||
base
|
||||
/ timestamp.strftime("%Y")
|
||||
/ timestamp.strftime("%m")
|
||||
/ timestamp.strftime("%d")
|
||||
/ timestamp.strftime("%H%M")
|
||||
)
|
||||
subdir.mkdir(parents=True, exist_ok=True)
|
||||
return subdir
|
||||
|
||||
|
||||
def process_markdown(md_file: Path) -> None:
|
||||
"""Convert the Markdown file and move it into the processed directory."""
|
||||
timestamp = datetime.now().astimezone()
|
||||
out_dir = build_timestamp_dir(OUTBOX, timestamp)
|
||||
processed_dir = build_timestamp_dir(PROCESSED, timestamp)
|
||||
|
||||
stem = md_file.stem
|
||||
output_docx = out_dir / f"{stem}.docx"
|
||||
output_pdf = out_dir / f"{stem}.pdf"
|
||||
|
||||
logging.info("Processing %s", md_file.name)
|
||||
run_pandoc(md_file, output_docx, output_pdf)
|
||||
|
||||
processed_target = processed_dir / md_file.name
|
||||
counter = 1
|
||||
while processed_target.exists():
|
||||
processed_target = processed_dir / f"{stem}_{counter}.md"
|
||||
counter += 1
|
||||
|
||||
shutil.move(str(md_file), processed_target)
|
||||
logging.info("Completed %s -> %s (processed archived at %s)", md_file.name, out_dir, processed_target)
|
||||
|
||||
|
||||
def move_to_failed(md_file: Path) -> None:
|
||||
"""Move the markdown file into the failed directory to avoid repeated retries."""
|
||||
if not md_file.exists():
|
||||
return
|
||||
|
||||
stem = md_file.stem
|
||||
failed_target = FAILED / md_file.name
|
||||
counter = 1
|
||||
while failed_target.exists():
|
||||
failed_target = FAILED / f"{stem}_{counter}.md"
|
||||
counter += 1
|
||||
|
||||
FAILED.mkdir(parents=True, exist_ok=True)
|
||||
shutil.move(str(md_file), failed_target)
|
||||
logging.info("Archived %s in failed directory at %s", md_file.name, failed_target)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
|
||||
ensure_environment()
|
||||
logging.info("Resume customizer watcher started")
|
||||
|
||||
while True:
|
||||
md_files = sorted(INBOX.glob("*.md"))
|
||||
if not md_files:
|
||||
time.sleep(POLL_INTERVAL_SECONDS)
|
||||
continue
|
||||
|
||||
for md_file in md_files:
|
||||
try:
|
||||
process_markdown(md_file)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
logging.error("Pandoc failed for %s: %s", md_file.name, exc)
|
||||
move_to_failed(md_file)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logging.exception("Unexpected error while processing %s: %s", md_file.name, exc)
|
||||
|
||||
time.sleep(POLL_INTERVAL_SECONDS)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user