feat(output): add wrapper and failed conversion handling

This commit is contained in:
2025-10-15 13:39:37 -05:00
parent 7afec09482
commit d7682ee87d
11 changed files with 324 additions and 0 deletions

26
output/Docker/Dockerfile Normal file
View File

@@ -0,0 +1,26 @@
FROM debian:bookworm
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \
python3 \
python3-venv \
gosu \
pandoc \
texlive-full \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY watch_and_convert.py entrypoint.sh ./
RUN chmod +x /app/entrypoint.sh /app/watch_and_convert.py
ENV PUID=1000 \
PGID=1000
ENTRYPOINT ["/app/entrypoint.sh"]

View File

@@ -0,0 +1,19 @@
name: ResumeCustomizer-OutputProcessor
services:
resumecustomizer-outputprocessor:
build:
context: .
dockerfile: Dockerfile
container_name: ResumeCustomizer-OutputProcessor
restart: always
environment:
PUID: "${LOCAL_UID:-1000}"
PGID: "${LOCAL_GID:-1000}"
volumes:
- ../ForRelease/inbox:/data/inbox
- ../ForRelease/outbox:/data/outbox
- ../ForRelease/processed:/data/processed
- ../ForRelease/failed:/data/failed
- ../../input/templates:/templates:ro
- /etc/localtime:/etc/localtime:ro

18
output/Docker/entrypoint.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -euo pipefail
PUID=${PUID:-1000}
PGID=${PGID:-1000}
if ! command -v gosu >/dev/null 2>&1; then
echo "gosu is required but not installed" >&2
exit 1
fi
if [ -d /data ]; then
chown -R "${PUID}:${PGID}" /data
fi
export HOME=${HOME:-/tmp}
exec gosu "${PUID}:${PGID}" python3 /app/watch_and_convert.py

View File

@@ -0,0 +1,28 @@
#!/usr/bin/env bash
# Wrapper to run docker compose with the caller's UID/GID so generated files stay writable.
set -euo pipefail
if ! command -v docker >/dev/null 2>&1; then
echo "Error: docker is not installed or not on PATH." >&2
exit 1
fi
if docker compose version >/dev/null 2>&1; then
COMPOSE_CMD=(docker compose)
elif command -v docker-compose >/dev/null 2>&1; then
COMPOSE_CMD=(docker-compose)
else
echo "Error: docker compose plugin or docker-compose binary is required." >&2
exit 1
fi
CALLER_UID=$(id -u)
CALLER_GID=$(id -g)
SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# Run docker compose from the Docker directory so it picks up the bundled yaml.
(
cd "${SCRIPT_DIR}"
LOCAL_UID="${CALLER_UID}" LOCAL_GID="${CALLER_GID}" "${COMPOSE_CMD[@]}" "$@"
)

View File

@@ -0,0 +1,159 @@
#!/usr/bin/env python3
"""
Monitor the inbox directory for Markdown files and convert them to DOCX/PDF outputs.
The script runs indefinitely inside the container, polling the inbox for new files.
When a Markdown file is found, pandoc generates DOCX and PDF outputs using the
reference templates, places the results in a timestamped outbox path, and moves the
original Markdown file into the processed directory.
"""
import logging
import shutil
import subprocess
import time
from datetime import datetime
from pathlib import Path
INBOX = Path("/data/inbox")
OUTBOX = Path("/data/outbox")
PROCESSED = Path("/data/processed")
FAILED = Path("/data/failed")
TEMPLATES = Path("/templates")
DOCX_TEMPLATE = TEMPLATES / "resume-reference.docx"
TEX_TEMPLATE = TEMPLATES / "resume-template.tex"
POLL_INTERVAL_SECONDS = 5
def ensure_environment() -> None:
"""Verify required files and directories exist before processing starts."""
missing = []
for path in (INBOX, OUTBOX, PROCESSED, FAILED, DOCX_TEMPLATE, TEX_TEMPLATE):
if not path.exists():
missing.append(str(path))
if missing:
raise FileNotFoundError(
"Required paths are missing inside the container: " + ", ".join(missing)
)
def run_pandoc(input_md: Path, output_docx: Path, output_pdf: Path) -> None:
"""Invoke pandoc twice to create DOCX and PDF artifacts."""
subprocess.run(
[
"pandoc",
str(input_md),
"--from",
"gfm",
"--to",
"docx",
"--reference-doc",
str(DOCX_TEMPLATE),
"--output",
str(output_docx),
],
check=True,
)
subprocess.run(
[
"pandoc",
str(input_md),
"--from",
"gfm",
"--pdf-engine",
"xelatex",
"--template",
str(TEX_TEMPLATE),
"--output",
str(output_pdf),
],
check=True,
)
def build_timestamp_dir(base: Path, timestamp: datetime) -> Path:
"""Create (if needed) and return the timestamped directory under base."""
subdir = (
base
/ timestamp.strftime("%Y")
/ timestamp.strftime("%m")
/ timestamp.strftime("%d")
/ timestamp.strftime("%H%M")
)
subdir.mkdir(parents=True, exist_ok=True)
return subdir
def process_markdown(md_file: Path) -> None:
"""Convert the Markdown file and move it into the processed directory."""
timestamp = datetime.now().astimezone()
out_dir = build_timestamp_dir(OUTBOX, timestamp)
processed_dir = build_timestamp_dir(PROCESSED, timestamp)
stem = md_file.stem
output_docx = out_dir / f"{stem}.docx"
output_pdf = out_dir / f"{stem}.pdf"
logging.info("Processing %s", md_file.name)
run_pandoc(md_file, output_docx, output_pdf)
processed_target = processed_dir / md_file.name
counter = 1
while processed_target.exists():
processed_target = processed_dir / f"{stem}_{counter}.md"
counter += 1
shutil.move(str(md_file), processed_target)
logging.info("Completed %s -> %s (processed archived at %s)", md_file.name, out_dir, processed_target)
def move_to_failed(md_file: Path) -> None:
"""Move the markdown file into the failed directory to avoid repeated retries."""
if not md_file.exists():
return
stem = md_file.stem
failed_target = FAILED / md_file.name
counter = 1
while failed_target.exists():
failed_target = FAILED / f"{stem}_{counter}.md"
counter += 1
FAILED.mkdir(parents=True, exist_ok=True)
shutil.move(str(md_file), failed_target)
logging.info("Archived %s in failed directory at %s", md_file.name, failed_target)
def main() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
ensure_environment()
logging.info("Resume customizer watcher started")
while True:
md_files = sorted(INBOX.glob("*.md"))
if not md_files:
time.sleep(POLL_INTERVAL_SECONDS)
continue
for md_file in md_files:
try:
process_markdown(md_file)
except subprocess.CalledProcessError as exc:
logging.error("Pandoc failed for %s: %s", md_file.name, exc)
move_to_failed(md_file)
except Exception as exc: # noqa: BLE001
logging.exception("Unexpected error while processing %s: %s", md_file.name, exc)
time.sleep(POLL_INTERVAL_SECONDS)
if __name__ == "__main__":
main()