feat(input): add codex automation stack
This commit is contained in:
342
input/Docker/watch_and_customize.py
Executable file
342
input/Docker/watch_and_customize.py
Executable file
@@ -0,0 +1,342 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Monitor the customization inbox for job description Markdown files and run the Codex CLI
|
||||
to produce tailored resumes.
|
||||
|
||||
The script expects exactly one base resume Markdown file and processes one job file at a
|
||||
time. After a successful Codex run, the generated resume is written into a timestamped
|
||||
outbox folder and the job description is archived under processed/. Failures move the
|
||||
job description into failed/.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Sequence
|
||||
|
||||
INBOX = Path("/workspace/inbox")
|
||||
OUTBOX = Path("/workspace/outbox")
|
||||
PROCESSED = Path("/workspace/processed")
|
||||
FAILED = Path("/workspace/failed")
|
||||
RESUME_DIR = Path("/workspace/resume")
|
||||
TEMPLATES_DIR = Path("/templates")
|
||||
TEMPLATE_CACHE = Path("/tmp/templates")
|
||||
PROMPT_TEMPLATE = TEMPLATES_DIR / "ResumeCustomizerPrompt.md"
|
||||
PROMPT_TEMPLATE_EXAMPLE = TEMPLATES_DIR / "ResumeCustomizerPrompt.md.example"
|
||||
|
||||
POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "5"))
|
||||
CODEX_COMMAND_TEMPLATE = os.environ.get(
|
||||
"CODEX_COMMAND_TEMPLATE",
|
||||
"codex prompt --input {prompt} --output {output} --format markdown",
|
||||
)
|
||||
CODEX_TIMEOUT_SECONDS = int(os.environ.get("CODEX_TIMEOUT_SECONDS", "600"))
|
||||
|
||||
RESOLVED_PROMPT_TEMPLATE: Path | None = None
|
||||
|
||||
|
||||
class FatalConfigurationError(RuntimeError):
|
||||
"""Raised when the watcher encounters a non-recoverable configuration problem."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MarkdownInputs:
|
||||
resume: Path
|
||||
job_description: Path
|
||||
prompt_template: Path
|
||||
|
||||
|
||||
def ensure_environment() -> None:
|
||||
"""Verify required directories and template assets exist."""
|
||||
global RESOLVED_PROMPT_TEMPLATE
|
||||
|
||||
missing = [
|
||||
str(path)
|
||||
for path in (
|
||||
INBOX,
|
||||
OUTBOX,
|
||||
PROCESSED,
|
||||
FAILED,
|
||||
RESUME_DIR,
|
||||
TEMPLATES_DIR,
|
||||
)
|
||||
if not path.exists()
|
||||
]
|
||||
|
||||
if missing:
|
||||
raise FatalConfigurationError(
|
||||
"Input pipeline is missing required paths: " + ", ".join(missing)
|
||||
)
|
||||
|
||||
RESOLVED_PROMPT_TEMPLATE = resolve_prompt_template(
|
||||
PROMPT_TEMPLATE,
|
||||
PROMPT_TEMPLATE_EXAMPLE,
|
||||
TEMPLATE_CACHE,
|
||||
)
|
||||
|
||||
|
||||
def resolve_prompt_template(primary: Path, example: Path, cache_dir: Path) -> Path:
|
||||
"""Return the prompt template path, copying the example if needed."""
|
||||
if primary.exists():
|
||||
return primary
|
||||
|
||||
if example.exists():
|
||||
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
cached = cache_dir / primary.name
|
||||
shutil.copy(example, cached)
|
||||
return cached
|
||||
|
||||
raise FatalConfigurationError(
|
||||
f"Prompt template missing: {primary} (no example found at {example})"
|
||||
)
|
||||
|
||||
|
||||
def ensure_single_resume() -> Path:
|
||||
"""Return the single resume markdown file or raise if conditions are not met."""
|
||||
resumes = sorted(RESUME_DIR.glob("*.md"))
|
||||
if len(resumes) == 0:
|
||||
raise FatalConfigurationError(
|
||||
f"No resume Markdown file found in {RESUME_DIR}. Exactly one is required."
|
||||
)
|
||||
if len(resumes) > 1:
|
||||
raise FatalConfigurationError(
|
||||
f"Multiple resume Markdown files found in {RESUME_DIR}: "
|
||||
+ ", ".join(r.name for r in resumes)
|
||||
)
|
||||
|
||||
return resumes[0]
|
||||
|
||||
|
||||
def ensure_single_job(md_files: Sequence[Path]) -> Path | None:
|
||||
"""Validate there is at most one job description file."""
|
||||
if not md_files:
|
||||
return None
|
||||
|
||||
if len(md_files) > 1:
|
||||
names = ", ".join(p.name for p in md_files)
|
||||
raise FatalConfigurationError(
|
||||
f"Multiple job description files detected in inbox: {names} "
|
||||
"— expected exactly one."
|
||||
)
|
||||
|
||||
return md_files[0]
|
||||
|
||||
|
||||
def read_inputs(job_file: Path) -> MarkdownInputs:
|
||||
"""Gather and return all markdown inputs required for the prompt."""
|
||||
resume = ensure_single_resume()
|
||||
|
||||
missing = [str(path) for path in (job_file,) if not path.exists()]
|
||||
if missing:
|
||||
raise FatalConfigurationError(
|
||||
"Required files disappeared before processing: " + ", ".join(missing)
|
||||
)
|
||||
|
||||
if RESOLVED_PROMPT_TEMPLATE is None:
|
||||
raise FatalConfigurationError("Prompt template was not resolved during startup.")
|
||||
|
||||
return MarkdownInputs(
|
||||
resume=resume,
|
||||
job_description=job_file,
|
||||
prompt_template=RESOLVED_PROMPT_TEMPLATE,
|
||||
)
|
||||
|
||||
|
||||
def build_prompt_text(inputs: MarkdownInputs) -> str:
|
||||
"""Return the combined prompt string fed to the Codex CLI."""
|
||||
resume_text = inputs.resume.read_text(encoding="utf-8").strip()
|
||||
jd_text = inputs.job_description.read_text(encoding="utf-8").strip()
|
||||
instructions_text = inputs.prompt_template.read_text(encoding="utf-8").strip()
|
||||
|
||||
return (
|
||||
"# Resume Customization Request\n\n"
|
||||
"## Instructions\n"
|
||||
f"{instructions_text}\n\n"
|
||||
"---\n\n"
|
||||
"## Job Description\n"
|
||||
f"{jd_text}\n\n"
|
||||
"---\n\n"
|
||||
"## Current Resume\n"
|
||||
f"{resume_text}\n"
|
||||
)
|
||||
|
||||
|
||||
def build_timestamp_dir(base: Path, timestamp: datetime) -> Path:
|
||||
"""Create (if missing) and return the timestamped directory path."""
|
||||
path = (
|
||||
base
|
||||
/ timestamp.strftime("%Y")
|
||||
/ timestamp.strftime("%m")
|
||||
/ timestamp.strftime("%d")
|
||||
/ timestamp.strftime("%H%M")
|
||||
)
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def sanitize_stem(stem: str) -> str:
|
||||
"""Replace characters that could interfere with filesystem operations."""
|
||||
return "".join(ch if ch.isalnum() else "_" for ch in stem) or "resume"
|
||||
|
||||
|
||||
def run_codex(prompt_path: Path, output_path: Path) -> None:
|
||||
"""Execute the Codex CLI using the configured command template."""
|
||||
command_text = CODEX_COMMAND_TEMPLATE.format(
|
||||
prompt=str(prompt_path),
|
||||
output=str(output_path),
|
||||
)
|
||||
logging.info("Running Codex CLI command: %s", command_text)
|
||||
|
||||
try:
|
||||
command = shlex.split(command_text)
|
||||
except ValueError as exc:
|
||||
raise FatalConfigurationError(
|
||||
f"Unable to parse CODEX_COMMAND_TEMPLATE into arguments: {exc}"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
subprocess.run(
|
||||
command,
|
||||
check=True,
|
||||
timeout=CODEX_TIMEOUT_SECONDS,
|
||||
env=os.environ.copy(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise FatalConfigurationError(
|
||||
f"Executable not found while running Codex CLI command: {command[0]}"
|
||||
) from exc
|
||||
except subprocess.TimeoutExpired as exc:
|
||||
raise RuntimeError("Codex CLI timed out") from exc
|
||||
|
||||
if not output_path.exists():
|
||||
raise RuntimeError(
|
||||
f"Codex CLI completed but expected output file {output_path} is missing."
|
||||
)
|
||||
|
||||
|
||||
def move_with_unique_target(source: Path, destination_dir: Path) -> Path:
|
||||
"""Move source into destination_dir, avoiding collisions with numeric suffixes."""
|
||||
destination_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
target = destination_dir / source.name
|
||||
stem = source.stem
|
||||
suffix = source.suffix
|
||||
counter = 1
|
||||
|
||||
while target.exists():
|
||||
target = destination_dir / f"{stem}_{counter}{suffix}"
|
||||
counter += 1
|
||||
|
||||
shutil.move(str(source), target)
|
||||
return target
|
||||
|
||||
|
||||
def process_job(job_file: Path) -> None:
|
||||
"""Combine inputs, run Codex, and archive outputs."""
|
||||
timestamp = datetime.now().astimezone()
|
||||
out_dir = build_timestamp_dir(OUTBOX, timestamp)
|
||||
processed_dir = build_timestamp_dir(PROCESSED, timestamp)
|
||||
|
||||
inputs = read_inputs(job_file)
|
||||
prompt_text = build_prompt_text(inputs)
|
||||
|
||||
safe_resume_stem = sanitize_stem(inputs.resume.stem)
|
||||
safe_job_stem = sanitize_stem(job_file.stem)
|
||||
output_filename = f"{safe_resume_stem}-for-{safe_job_stem}.md"
|
||||
|
||||
with TemporaryDirectory() as tmp_dir_str:
|
||||
tmp_dir = Path(tmp_dir_str)
|
||||
prompt_path = tmp_dir / "prompt.md"
|
||||
prompt_path.write_text(prompt_text, encoding="utf-8")
|
||||
|
||||
output_path = tmp_dir / "codex_output.md"
|
||||
|
||||
run_codex(prompt_path, output_path)
|
||||
|
||||
generated_output = out_dir / output_filename
|
||||
counter = 1
|
||||
while generated_output.exists():
|
||||
generated_output = out_dir / f"{safe_resume_stem}-for-{safe_job_stem}_{counter}.md"
|
||||
counter += 1
|
||||
|
||||
shutil.move(str(output_path), generated_output)
|
||||
logging.info("Generated customized resume at %s", generated_output)
|
||||
|
||||
prompt_archive = out_dir / f"prompt-{safe_job_stem}.md"
|
||||
prompt_archive.write_text(prompt_text, encoding="utf-8")
|
||||
|
||||
processed_target = move_with_unique_target(job_file, processed_dir)
|
||||
logging.info(
|
||||
"Archived job description %s to %s",
|
||||
job_file.name,
|
||||
processed_target,
|
||||
)
|
||||
|
||||
|
||||
def move_job_to_failed(job_file: Path) -> None:
|
||||
"""Move the job description into the failed directory."""
|
||||
if not job_file.exists():
|
||||
return
|
||||
|
||||
failed_target = move_with_unique_target(job_file, FAILED)
|
||||
logging.info(
|
||||
"Moved job description %s into failed directory at %s",
|
||||
job_file.name,
|
||||
failed_target,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
|
||||
try:
|
||||
ensure_environment()
|
||||
except FatalConfigurationError as exc:
|
||||
logging.error("Fatal configuration error: %s", exc)
|
||||
raise SystemExit(2) from exc
|
||||
|
||||
logging.info("Resume customizer watcher started")
|
||||
|
||||
while True:
|
||||
job_files = sorted(INBOX.glob("*.md"))
|
||||
|
||||
try:
|
||||
job_file = ensure_single_job(job_files)
|
||||
except FatalConfigurationError as exc:
|
||||
logging.error("Fatal configuration error: %s", exc)
|
||||
raise SystemExit(2) from exc
|
||||
|
||||
if job_file is None:
|
||||
time.sleep(POLL_INTERVAL_SECONDS)
|
||||
continue
|
||||
|
||||
logging.info("Processing job description %s", job_file.name)
|
||||
try:
|
||||
process_job(job_file)
|
||||
except FatalConfigurationError as exc:
|
||||
logging.error("Fatal configuration error: %s", exc)
|
||||
move_job_to_failed(job_file)
|
||||
raise SystemExit(2) from exc
|
||||
except subprocess.CalledProcessError as exc:
|
||||
logging.error("Codex CLI failed with non-zero exit status: %s", exc)
|
||||
move_job_to_failed(job_file)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logging.exception("Unexpected error while processing %s: %s", job_file.name, exc)
|
||||
move_job_to_failed(job_file)
|
||||
|
||||
time.sleep(POLL_INTERVAL_SECONDS)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user