#!/usr/bin/env python3 """ Monitor the customization inbox, normalize messy job descriptions, and run the Codex CLI to produce tailored resumes. The watcher expects exactly one base resume Markdown file and processes one job file at a time. After Codex succeeds, the generated resume is written into a timestamped outbox folder using the pattern -.md, while the original job file is archived under processed/. Failures move the job description into failed/. """ from __future__ import annotations import logging import os import shutil import subprocess import time from dataclasses import dataclass from datetime import datetime from pathlib import Path from tempfile import TemporaryDirectory from typing import Sequence INBOX = Path("/workspace/inbox") OUTBOX = Path("/workspace/outbox") PROCESSED = Path("/workspace/processed") FAILED = Path("/workspace/failed") RESUME_DIR = Path("/workspace/resume") TEMPLATES_DIR = Path("/templates") TEMPLATE_CACHE = Path("/tmp/templates") PROMPT_TEMPLATE = TEMPLATES_DIR / "ResumeCustomizerPrompt.md" PROMPT_TEMPLATE_EXAMPLE = TEMPLATES_DIR / "ResumeCustomizerPrompt.md.example" NORMALIZER_TEMPLATE = Path("/app/JobDescriptionNormalizerPrompt.md") POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "5")) CODEX_TIMEOUT_SECONDS = int(os.environ.get("CODEX_TIMEOUT_SECONDS", "600")) RESOLVED_PROMPT_TEMPLATE: Path | None = None RESOLVED_NORMALIZER_TEMPLATE: Path | None = None class FatalConfigurationError(RuntimeError): """Raised when the watcher encounters a non-recoverable configuration problem.""" @dataclass(frozen=True) class NormalizedJobDescription: company: str job_title: str description_markdown: str def ensure_environment() -> None: """Verify required directories and template assets exist.""" global RESOLVED_PROMPT_TEMPLATE, RESOLVED_NORMALIZER_TEMPLATE missing = [ str(path) for path in ( INBOX, OUTBOX, PROCESSED, FAILED, RESUME_DIR, TEMPLATES_DIR, ) if not path.exists() ] if missing: raise FatalConfigurationError( "Input pipeline is missing required paths: " + ", ".join(missing) ) RESOLVED_PROMPT_TEMPLATE = resolve_template( PROMPT_TEMPLATE, PROMPT_TEMPLATE_EXAMPLE, TEMPLATE_CACHE, "Resume customization prompt", ) RESOLVED_NORMALIZER_TEMPLATE = NORMALIZER_TEMPLATE if not RESOLVED_NORMALIZER_TEMPLATE.exists(): raise FatalConfigurationError( f"Job description normalizer prompt missing at {RESOLVED_NORMALIZER_TEMPLATE}" ) def resolve_template( primary: Path, example: Path, cache_dir: Path, description: str, ) -> Path: """Return the template path, copying the example if needed.""" if primary.exists(): return primary if example.exists(): cache_dir.mkdir(parents=True, exist_ok=True) cached = cache_dir / primary.name shutil.copy(example, cached) return cached raise FatalConfigurationError( f"{description} missing: {primary} (no example found at {example})" ) def ensure_single_resume() -> Path: """Return the single resume markdown file or raise if conditions are not met.""" resumes = sorted(RESUME_DIR.glob("*.md")) if len(resumes) == 0: raise FatalConfigurationError( f"No resume Markdown file found in {RESUME_DIR}. Exactly one is required." ) if len(resumes) > 1: raise FatalConfigurationError( f"Multiple resume Markdown files found in {RESUME_DIR}: " + ", ".join(r.name for r in resumes) ) return resumes[0] def ensure_single_job(paths: Sequence[Path]) -> Path | None: """Validate there is at most one job description file (any extension).""" visible = [path for path in paths if path.is_file() and not path.name.startswith(".")] if not visible: return None if len(visible) > 1: names = ", ".join(p.name for p in visible) raise FatalConfigurationError( f"Multiple job description files detected in inbox: {names} " "— expected exactly one." ) return visible[0] def build_prompt_text(resume: Path, job_markdown: str, prompt_template: Path) -> str: """Return the combined prompt string fed to the Codex CLI.""" resume_text = resume.read_text(encoding="utf-8").strip() instructions_text = prompt_template.read_text(encoding="utf-8").strip() return ( "# Resume Customization Request\n\n" "## Instructions\n" f"{instructions_text}\n\n" "---\n\n" "## Job Description\n" f"{job_markdown.strip()}\n\n" "---\n\n" "## Current Resume\n" f"{resume_text}\n" ) def build_timestamp_dir(base: Path, timestamp: datetime) -> Path: """Create (if missing) and return the timestamped directory path.""" path = ( base / timestamp.strftime("%Y") / timestamp.strftime("%m") / timestamp.strftime("%d") / timestamp.strftime("%H%M") ) path.mkdir(parents=True, exist_ok=True) return path def sanitize_stem(stem: str) -> str: """Replace characters that could interfere with filesystem operations.""" return "".join(ch if ch.isalnum() else "_" for ch in stem) or "resume" def slugify(component: str) -> str: """Turn a free-form string into a filesystem-friendly slug.""" normalized = "".join( ch.lower() if ch.isalnum() else "-" for ch in component.strip() ) parts = [part for part in normalized.split("-") if part] return "-".join(parts) def run_codex(prompt_text: str, output_path: Path) -> None: """Execute the Codex CLI with the provided prompt text.""" output_path.parent.mkdir(parents=True, exist_ok=True) command = [ "codex", "exec", "-", "--output-last-message", str(output_path), "--skip-git-repo-check", "--sandbox", "read-only", ] logging.info("Running Codex CLI command: %s", " ".join(command)) try: subprocess.run( command, input=prompt_text.encode("utf-8"), check=True, timeout=CODEX_TIMEOUT_SECONDS, env=os.environ.copy(), ) except FileNotFoundError as exc: raise FatalConfigurationError( "Codex CLI executable 'codex' not found in PATH" ) from exc except subprocess.TimeoutExpired as exc: raise RuntimeError("Codex CLI timed out") from exc if not output_path.exists(): raise RuntimeError( f"Codex CLI completed but expected output file {output_path} is missing." ) def build_normalizer_prompt(raw_text: str) -> str: """Construct the prompt for normalizing the raw job description.""" if RESOLVED_NORMALIZER_TEMPLATE is None: raise FatalConfigurationError("Normalizer template was not resolved during startup.") instructions = RESOLVED_NORMALIZER_TEMPLATE.read_text(encoding="utf-8").strip() return ( f"{instructions}\n\n" "---\n\n" "## Raw Job Description\n" "```\n" f"{raw_text.strip()}\n" "```\n" ) def parse_normalized_output(text: str) -> NormalizedJobDescription: """Parse the Codex-normalized output into structured pieces.""" lines = text.splitlines() idx = 0 def next_non_empty(start: int) -> tuple[int, str]: pos = start while pos < len(lines): content = lines[pos].strip() if content: return pos, content pos += 1 raise RuntimeError("Normalized output is missing expected lines.") idx, company_line = next_non_empty(idx) if not company_line.lower().startswith("company:"): raise RuntimeError(f"Expected 'Company:' line, found: {company_line!r}") company = company_line[len("company:") :].strip() idx, job_title_line = next_non_empty(idx + 1) if not job_title_line.lower().startswith("job title:"): raise RuntimeError(f"Expected 'Job Title:' line, found: {job_title_line!r}") job_title = job_title_line[len("job title:") :].strip() idx += 1 while idx < len(lines) and lines[idx].strip(): idx += 1 while idx < len(lines) and not lines[idx].strip(): idx += 1 description_lines = lines[idx:] description = "\n".join(description_lines).strip() if not description: raise RuntimeError("Normalized output did not include a job description section.") return NormalizedJobDescription( company=company or "Company", job_title=job_title or "Role", description_markdown=description, ) def normalize_job_description(job_file: Path) -> NormalizedJobDescription: """Use Codex to clean and extract metadata from the raw job description.""" raw_text = job_file.read_text(encoding="utf-8", errors="ignore").strip() if not raw_text: raise RuntimeError(f"Job description file {job_file.name} is empty after trimming.") prompt_text = build_normalizer_prompt(raw_text) with TemporaryDirectory() as tmp_dir_str: tmp_dir = Path(tmp_dir_str) output_path = tmp_dir / "normalize_output.md" run_codex(prompt_text, output_path) normalized_text = output_path.read_text(encoding="utf-8").strip() return parse_normalized_output(normalized_text) def move_with_unique_target(source: Path, destination_dir: Path) -> Path: """Move source into destination_dir, avoiding collisions with numeric suffixes.""" destination_dir.mkdir(parents=True, exist_ok=True) target = destination_dir / source.name stem = source.stem suffix = source.suffix counter = 1 while target.exists(): target = destination_dir / f"{stem}_{counter}{suffix}" counter += 1 shutil.move(str(source), target) return target def process_job(job_file: Path) -> None: """Normalize the job description, run Codex, and archive outputs.""" timestamp = datetime.now().astimezone() out_dir = build_timestamp_dir(OUTBOX, timestamp) processed_dir = build_timestamp_dir(PROCESSED, timestamp) resume_path = ensure_single_resume() normalized = normalize_job_description(job_file) if RESOLVED_PROMPT_TEMPLATE is None: raise FatalConfigurationError("Prompt template was not resolved during startup.") prompt_text = build_prompt_text( resume_path, normalized.description_markdown, RESOLVED_PROMPT_TEMPLATE, ) safe_company = slugify(normalized.company) safe_title = slugify(normalized.job_title) if safe_company and safe_title: output_stem = f"{safe_company}-{safe_title}" else: output_stem = sanitize_stem(job_file.stem) output_filename = f"{output_stem}.md" with TemporaryDirectory() as tmp_dir_str: tmp_dir = Path(tmp_dir_str) output_path = tmp_dir / "codex_output.md" run_codex(prompt_text, output_path) generated_output = out_dir / output_filename counter = 1 while generated_output.exists(): generated_output = out_dir / f"{output_stem}_{counter}.md" counter += 1 shutil.move(str(output_path), generated_output) logging.info( "Generated customized resume for %s - %s at %s", normalized.company, normalized.job_title, generated_output, ) prompt_archive = out_dir / f"prompt-{generated_output.stem}.md" prompt_archive.write_text(prompt_text, encoding="utf-8") normalized_archive = out_dir / f"job-description-{generated_output.stem}.md" normalized_archive.write_text( ( f"Company: {normalized.company}\n" f"Job Title: {normalized.job_title}\n\n" "# Job Description\n" f"{normalized.description_markdown}\n" ), encoding="utf-8", ) processed_target = move_with_unique_target(job_file, processed_dir) logging.info( "Archived job description %s to %s", job_file.name, processed_target, ) def move_job_to_failed(job_file: Path) -> None: """Move the job description into the failed directory.""" if not job_file.exists(): return failed_target = move_with_unique_target(job_file, FAILED) logging.info( "Moved job description %s into failed directory at %s", job_file.name, failed_target, ) def main() -> None: logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", ) try: ensure_environment() except FatalConfigurationError as exc: logging.error("Fatal configuration error: %s", exc) raise SystemExit(2) from exc logging.info("Resume customizer watcher started") while True: job_paths = sorted(INBOX.iterdir()) try: job_file = ensure_single_job(job_paths) except FatalConfigurationError as exc: logging.error("Fatal configuration error: %s", exc) raise SystemExit(2) from exc if job_file is None: time.sleep(POLL_INTERVAL_SECONDS) continue logging.info("Processing job description %s", job_file.name) try: process_job(job_file) except FatalConfigurationError as exc: logging.error("Fatal configuration error: %s", exc) move_job_to_failed(job_file) raise SystemExit(2) from exc except subprocess.CalledProcessError as exc: logging.error("Codex CLI failed with non-zero exit status: %s", exc) move_job_to_failed(job_file) except Exception as exc: # noqa: BLE001 logging.exception("Unexpected error while processing %s: %s", job_file.name, exc) move_job_to_failed(job_file) time.sleep(POLL_INTERVAL_SECONDS) if __name__ == "__main__": main()