diff --git a/input/AGENTS.md b/input/AGENTS.md index 77cc08c..c6f160b 100644 --- a/input/AGENTS.md +++ b/input/AGENTS.md @@ -1,15 +1,16 @@ # Input Agent Guide ## Mission -Automate the upstream resume customization workflow. Monitor the job-description inbox, combine the base resume and prompt template into a Codex-friendly request, invoke the Codex CLI, and deposit the generated Markdown in a timestamped outbox for human review. +Automate the upstream resume customization workflow. Monitor the job-description inbox, clean up messy recruiter-sourced text, combine the normalized description with the base resume and prompt templates, invoke the Codex CLI, and deposit the generated Markdown in a timestamped outbox for human review. ## Directory Responsibilities - `resume/` – contains exactly one Markdown resume. Any other count is a fatal configuration error. -- `ForCustomizing/inbox/` – drop one job-description Markdown at a time to trigger processing. -- `ForCustomizing/outbox/YYYY/MM/DD/HHMM/` – timestamped folders containing Codex output Markdown (and a copy of the prompt used). +- `ForCustomizing/inbox/` – drop one plain-text or Markdown job description at a time to trigger processing. +- `ForCustomizing/outbox/YYYY/MM/DD/HHMM/` – timestamped folders containing Codex output Markdown (`-.md`) along with the exact prompt and the cleaned job description that fed Codex. - `ForCustomizing/processed/YYYY/MM/DD/HHMM/` – archives of job descriptions that Codex processed successfully. - `ForCustomizing/failed/` – captures job descriptions when Codex errors or a recoverable issue occurs. Fatal configuration errors still exit the container. -- `templates/ResumeCustomizerPrompt.md.example` – default instruction block; copy to `ResumeCustomizerPrompt.md` (same folder) to override locally. +- `templates/JobDescriptionNormalizerPrompt.md.example` – default instructions that scrub recruiter chatter and extract metadata; copy to `JobDescriptionNormalizerPrompt.md` to override. +- `templates/ResumeCustomizerPrompt.md.example` – default resume-customization instructions; copy to `ResumeCustomizerPrompt.md` to override. ## Running the Input Processor Launch the stack with the wrapper so files inherit your UID/GID and your local Codex credentials mount in: @@ -20,7 +21,8 @@ cd input/Docker ``` Environment variables you can pass before the command: -- `CODEX_COMMAND_TEMPLATE` – override the Codex CLI invocation (defaults to `codex prompt --input {prompt} --output {output} --format markdown`). +- `CODEX_COMMAND_TEMPLATE` – override the Codex CLI invocation for the resume customization run (defaults to `codex prompt --input {prompt} --output {output} --format markdown`). +- `CODEX_NORMALIZER_COMMAND_TEMPLATE` – optional override for the job-description normalization run (defaults to the same value as `CODEX_COMMAND_TEMPLATE`). - `POLL_INTERVAL_SECONDS` – watcher polling cadence (default `5`). - `CODEX_TIMEOUT_SECONDS` – hard timeout for Codex calls (default `600`). - `CODEX_CONFIG_DIR` – optional override for the host directory that should mount into `/home/codex/.codex`. diff --git a/input/Docker/docker-compose.yml b/input/Docker/docker-compose.yml index 01b3576..016fa3b 100644 --- a/input/Docker/docker-compose.yml +++ b/input/Docker/docker-compose.yml @@ -13,6 +13,7 @@ services: POLL_INTERVAL_SECONDS: "${POLL_INTERVAL_SECONDS:-5}" CODEX_TIMEOUT_SECONDS: "${CODEX_TIMEOUT_SECONDS:-600}" CODEX_COMMAND_TEMPLATE: "${CODEX_COMMAND_TEMPLATE:-codex prompt --input {prompt} --output {output} --format markdown}" + CODEX_NORMALIZER_COMMAND_TEMPLATE: "${CODEX_NORMALIZER_COMMAND_TEMPLATE:-codex prompt --input {prompt} --output {output} --format markdown}" volumes: - ../ForCustomizing/inbox:/workspace/inbox - ../ForCustomizing/outbox:/workspace/outbox diff --git a/input/Docker/watch_and_customize.py b/input/Docker/watch_and_customize.py old mode 100755 new mode 100644 index c960555..41eac67 --- a/input/Docker/watch_and_customize.py +++ b/input/Docker/watch_and_customize.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 """ -Monitor the customization inbox for job description Markdown files and run the Codex CLI +Monitor the customization inbox, normalize messy job descriptions, and run the Codex CLI to produce tailored resumes. -The script expects exactly one base resume Markdown file and processes one job file at a -time. After a successful Codex run, the generated resume is written into a timestamped -outbox folder and the job description is archived under processed/. Failures move the -job description into failed/. +The watcher expects exactly one base resume Markdown file and processes one job file at a +time. After Codex succeeds, the generated resume is written into a timestamped outbox +folder using the pattern -.md, while the original job file is archived +under processed/. Failures move the job description into failed/. """ from __future__ import annotations @@ -32,15 +32,22 @@ TEMPLATES_DIR = Path("/templates") TEMPLATE_CACHE = Path("/tmp/templates") PROMPT_TEMPLATE = TEMPLATES_DIR / "ResumeCustomizerPrompt.md" PROMPT_TEMPLATE_EXAMPLE = TEMPLATES_DIR / "ResumeCustomizerPrompt.md.example" +NORMALIZER_TEMPLATE = TEMPLATES_DIR / "JobDescriptionNormalizerPrompt.md" +NORMALIZER_TEMPLATE_EXAMPLE = TEMPLATES_DIR / "JobDescriptionNormalizerPrompt.md.example" POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "5")) CODEX_COMMAND_TEMPLATE = os.environ.get( "CODEX_COMMAND_TEMPLATE", "codex prompt --input {prompt} --output {output} --format markdown", ) +CODEX_NORMALIZER_COMMAND_TEMPLATE = os.environ.get( + "CODEX_NORMALIZER_COMMAND_TEMPLATE", + CODEX_COMMAND_TEMPLATE, +) CODEX_TIMEOUT_SECONDS = int(os.environ.get("CODEX_TIMEOUT_SECONDS", "600")) RESOLVED_PROMPT_TEMPLATE: Path | None = None +RESOLVED_NORMALIZER_TEMPLATE: Path | None = None class FatalConfigurationError(RuntimeError): @@ -48,15 +55,15 @@ class FatalConfigurationError(RuntimeError): @dataclass(frozen=True) -class MarkdownInputs: - resume: Path - job_description: Path - prompt_template: Path +class NormalizedJobDescription: + company: str + job_title: str + description_markdown: str def ensure_environment() -> None: """Verify required directories and template assets exist.""" - global RESOLVED_PROMPT_TEMPLATE + global RESOLVED_PROMPT_TEMPLATE, RESOLVED_NORMALIZER_TEMPLATE missing = [ str(path) @@ -76,15 +83,27 @@ def ensure_environment() -> None: "Input pipeline is missing required paths: " + ", ".join(missing) ) - RESOLVED_PROMPT_TEMPLATE = resolve_prompt_template( + RESOLVED_PROMPT_TEMPLATE = resolve_template( PROMPT_TEMPLATE, PROMPT_TEMPLATE_EXAMPLE, TEMPLATE_CACHE, + "Resume customization prompt", + ) + RESOLVED_NORMALIZER_TEMPLATE = resolve_template( + NORMALIZER_TEMPLATE, + NORMALIZER_TEMPLATE_EXAMPLE, + TEMPLATE_CACHE, + "Job description normalizer prompt", ) -def resolve_prompt_template(primary: Path, example: Path, cache_dir: Path) -> Path: - """Return the prompt template path, copying the example if needed.""" +def resolve_template( + primary: Path, + example: Path, + cache_dir: Path, + description: str, +) -> Path: + """Return the template path, copying the example if needed.""" if primary.exists(): return primary @@ -95,7 +114,7 @@ def resolve_prompt_template(primary: Path, example: Path, cache_dir: Path) -> Pa return cached raise FatalConfigurationError( - f"Prompt template missing: {primary} (no example found at {example})" + f"{description} missing: {primary} (no example found at {example})" ) @@ -115,46 +134,26 @@ def ensure_single_resume() -> Path: return resumes[0] -def ensure_single_job(md_files: Sequence[Path]) -> Path | None: - """Validate there is at most one job description file.""" - if not md_files: +def ensure_single_job(paths: Sequence[Path]) -> Path | None: + """Validate there is at most one job description file (any extension).""" + visible = [path for path in paths if path.is_file() and not path.name.startswith(".")] + if not visible: return None - if len(md_files) > 1: - names = ", ".join(p.name for p in md_files) + if len(visible) > 1: + names = ", ".join(p.name for p in visible) raise FatalConfigurationError( f"Multiple job description files detected in inbox: {names} " "— expected exactly one." ) - return md_files[0] + return visible[0] -def read_inputs(job_file: Path) -> MarkdownInputs: - """Gather and return all markdown inputs required for the prompt.""" - resume = ensure_single_resume() - - missing = [str(path) for path in (job_file,) if not path.exists()] - if missing: - raise FatalConfigurationError( - "Required files disappeared before processing: " + ", ".join(missing) - ) - - if RESOLVED_PROMPT_TEMPLATE is None: - raise FatalConfigurationError("Prompt template was not resolved during startup.") - - return MarkdownInputs( - resume=resume, - job_description=job_file, - prompt_template=RESOLVED_PROMPT_TEMPLATE, - ) - - -def build_prompt_text(inputs: MarkdownInputs) -> str: +def build_prompt_text(resume: Path, job_markdown: str, prompt_template: Path) -> str: """Return the combined prompt string fed to the Codex CLI.""" - resume_text = inputs.resume.read_text(encoding="utf-8").strip() - jd_text = inputs.job_description.read_text(encoding="utf-8").strip() - instructions_text = inputs.prompt_template.read_text(encoding="utf-8").strip() + resume_text = resume.read_text(encoding="utf-8").strip() + instructions_text = prompt_template.read_text(encoding="utf-8").strip() return ( "# Resume Customization Request\n\n" @@ -162,7 +161,7 @@ def build_prompt_text(inputs: MarkdownInputs) -> str: f"{instructions_text}\n\n" "---\n\n" "## Job Description\n" - f"{jd_text}\n\n" + f"{job_markdown.strip()}\n\n" "---\n\n" "## Current Resume\n" f"{resume_text}\n" @@ -187,9 +186,18 @@ def sanitize_stem(stem: str) -> str: return "".join(ch if ch.isalnum() else "_" for ch in stem) or "resume" -def run_codex(prompt_path: Path, output_path: Path) -> None: - """Execute the Codex CLI using the configured command template.""" - command_text = CODEX_COMMAND_TEMPLATE.format( +def slugify(component: str) -> str: + """Turn a free-form string into a filesystem-friendly slug.""" + normalized = "".join( + ch.lower() if ch.isalnum() else "-" for ch in component.strip() + ) + parts = [part for part in normalized.split("-") if part] + return "-".join(parts) + + +def run_codex(prompt_path: Path, output_path: Path, command_template: str) -> None: + """Execute the Codex CLI using the provided command template.""" + command_text = command_template.format( prompt=str(prompt_path), output=str(output_path), ) @@ -199,7 +207,7 @@ def run_codex(prompt_path: Path, output_path: Path) -> None: command = shlex.split(command_text) except ValueError as exc: raise FatalConfigurationError( - f"Unable to parse CODEX_COMMAND_TEMPLATE into arguments: {exc}" + f"Unable to parse Codex command template into arguments: {exc}" ) from exc try: @@ -222,6 +230,85 @@ def run_codex(prompt_path: Path, output_path: Path) -> None: ) +def build_normalizer_prompt(raw_text: str) -> str: + """Construct the prompt for normalizing the raw job description.""" + if RESOLVED_NORMALIZER_TEMPLATE is None: + raise FatalConfigurationError("Normalizer template was not resolved during startup.") + + instructions = RESOLVED_NORMALIZER_TEMPLATE.read_text(encoding="utf-8").strip() + return ( + f"{instructions}\n\n" + "---\n\n" + "## Raw Job Description\n" + "```\n" + f"{raw_text.strip()}\n" + "```\n" + ) + + +def parse_normalized_output(text: str) -> NormalizedJobDescription: + """Parse the Codex-normalized output into structured pieces.""" + lines = text.splitlines() + idx = 0 + + def next_non_empty(start: int) -> tuple[int, str]: + pos = start + while pos < len(lines): + content = lines[pos].strip() + if content: + return pos, content + pos += 1 + raise RuntimeError("Normalized output is missing expected lines.") + + idx, company_line = next_non_empty(idx) + if not company_line.lower().startswith("company:"): + raise RuntimeError(f"Expected 'Company:' line, found: {company_line!r}") + company = company_line[len("company:") :].strip() + + idx, job_title_line = next_non_empty(idx + 1) + if not job_title_line.lower().startswith("job title:"): + raise RuntimeError(f"Expected 'Job Title:' line, found: {job_title_line!r}") + job_title = job_title_line[len("job title:") :].strip() + + idx += 1 + while idx < len(lines) and lines[idx].strip(): + idx += 1 + + while idx < len(lines) and not lines[idx].strip(): + idx += 1 + + description_lines = lines[idx:] + description = "\n".join(description_lines).strip() + if not description: + raise RuntimeError("Normalized output did not include a job description section.") + + return NormalizedJobDescription( + company=company or "Company", + job_title=job_title or "Role", + description_markdown=description, + ) + + +def normalize_job_description(job_file: Path) -> NormalizedJobDescription: + """Use Codex to clean and extract metadata from the raw job description.""" + raw_text = job_file.read_text(encoding="utf-8", errors="ignore").strip() + if not raw_text: + raise RuntimeError(f"Job description file {job_file.name} is empty after trimming.") + + prompt_text = build_normalizer_prompt(raw_text) + + with TemporaryDirectory() as tmp_dir_str: + tmp_dir = Path(tmp_dir_str) + prompt_path = tmp_dir / "normalize_prompt.md" + prompt_path.write_text(prompt_text, encoding="utf-8") + + output_path = tmp_dir / "normalize_output.md" + run_codex(prompt_path, output_path, CODEX_NORMALIZER_COMMAND_TEMPLATE) + normalized_text = output_path.read_text(encoding="utf-8").strip() + + return parse_normalized_output(normalized_text) + + def move_with_unique_target(source: Path, destination_dir: Path) -> Path: """Move source into destination_dir, avoiding collisions with numeric suffixes.""" destination_dir.mkdir(parents=True, exist_ok=True) @@ -240,17 +327,30 @@ def move_with_unique_target(source: Path, destination_dir: Path) -> Path: def process_job(job_file: Path) -> None: - """Combine inputs, run Codex, and archive outputs.""" + """Normalize the job description, run Codex, and archive outputs.""" timestamp = datetime.now().astimezone() out_dir = build_timestamp_dir(OUTBOX, timestamp) processed_dir = build_timestamp_dir(PROCESSED, timestamp) - inputs = read_inputs(job_file) - prompt_text = build_prompt_text(inputs) + resume_path = ensure_single_resume() + normalized = normalize_job_description(job_file) - safe_resume_stem = sanitize_stem(inputs.resume.stem) - safe_job_stem = sanitize_stem(job_file.stem) - output_filename = f"{safe_resume_stem}-for-{safe_job_stem}.md" + if RESOLVED_PROMPT_TEMPLATE is None: + raise FatalConfigurationError("Prompt template was not resolved during startup.") + + prompt_text = build_prompt_text( + resume_path, + normalized.description_markdown, + RESOLVED_PROMPT_TEMPLATE, + ) + + safe_company = slugify(normalized.company) + safe_title = slugify(normalized.job_title) + if safe_company and safe_title: + output_stem = f"{safe_company}-{safe_title}" + else: + output_stem = sanitize_stem(job_file.stem) + output_filename = f"{output_stem}.md" with TemporaryDirectory() as tmp_dir_str: tmp_dir = Path(tmp_dir_str) @@ -258,21 +358,36 @@ def process_job(job_file: Path) -> None: prompt_path.write_text(prompt_text, encoding="utf-8") output_path = tmp_dir / "codex_output.md" - - run_codex(prompt_path, output_path) + run_codex(prompt_path, output_path, CODEX_COMMAND_TEMPLATE) generated_output = out_dir / output_filename counter = 1 while generated_output.exists(): - generated_output = out_dir / f"{safe_resume_stem}-for-{safe_job_stem}_{counter}.md" + generated_output = out_dir / f"{output_stem}_{counter}.md" counter += 1 shutil.move(str(output_path), generated_output) - logging.info("Generated customized resume at %s", generated_output) + logging.info( + "Generated customized resume for %s - %s at %s", + normalized.company, + normalized.job_title, + generated_output, + ) - prompt_archive = out_dir / f"prompt-{safe_job_stem}.md" + prompt_archive = out_dir / f"prompt-{generated_output.stem}.md" prompt_archive.write_text(prompt_text, encoding="utf-8") + normalized_archive = out_dir / f"job-description-{generated_output.stem}.md" + normalized_archive.write_text( + ( + f"Company: {normalized.company}\n" + f"Job Title: {normalized.job_title}\n\n" + "# Job Description\n" + f"{normalized.description_markdown}\n" + ), + encoding="utf-8", + ) + processed_target = move_with_unique_target(job_file, processed_dir) logging.info( "Archived job description %s to %s", @@ -309,10 +424,10 @@ def main() -> None: logging.info("Resume customizer watcher started") while True: - job_files = sorted(INBOX.glob("*.md")) + job_paths = sorted(INBOX.iterdir()) try: - job_file = ensure_single_job(job_files) + job_file = ensure_single_job(job_paths) except FatalConfigurationError as exc: logging.error("Fatal configuration error: %s", exc) raise SystemExit(2) from exc diff --git a/input/README.md b/input/README.md index 3406c32..0f190d2 100644 --- a/input/README.md +++ b/input/README.md @@ -4,27 +4,29 @@ The input side of ResumeCustomizer prepares job-specific Markdown resumes by sti ## Workflow Recap 1. Ensure `input/resume/` contains exactly one Markdown resume. -2. Drop a single job-description Markdown into `input/ForCustomizing/inbox/`. +2. Drop a single job-description file (plain text or Markdown) into `input/ForCustomizing/inbox/`. 3. Start the watcher stack (`input/Docker/run-input-processor.sh up -d`). -4. The watcher combines the resume, job description, and the resolved instruction prompt (defaulting to `templates/ResumeCustomizerPrompt.md.example`) into a prompt, runs the Codex CLI, and writes the generated resume to `ForCustomizing/outbox/YYYY/MM/DD/HHMM/`. -5. Successful runs archive the job description under `ForCustomizing/processed/` and copy the prompt used into the same outbox folder. Failures move the job description into `ForCustomizing/failed/` for review. +4. The watcher normalizes the messy job description via Codex (using `templates/JobDescriptionNormalizerPrompt.md.example` by default), then combines the cleaned Markdown, the base resume, and the resolved customization prompt into a second Codex run that writes the generated resume to `ForCustomizing/outbox/YYYY/MM/DD/HHMM/-.md`. +5. Successful runs archive the job description under `ForCustomizing/processed/`, copy both the prompt and the cleaned job description into the same outbox folder, and leave the Codex output for human review. Failures move the job description into `ForCustomizing/failed/`. The human operator reviews the Codex output Markdown, makes any edits, and then manually hands it off to the output pipeline for document rendering. ## Container Stack The watcher lives in `input/Docker/`: - `Dockerfile` – builds a Node/Python base image, installs gosu, and prepares a non-root `codex` user. -- `watch_and_customize.py` – polls the inbox, validates preconditions, resolves the prompt template (`ResumeCustomizerPrompt.md` or its `.example` fallback), constructs prompts, runs Codex, and routes files. +- `watch_and_customize.py` – polls the inbox, validates preconditions, resolves both templates (normalizer and customization), cleans the job description, builds prompts, runs Codex twice, and routes files. - `entrypoint.sh` – maps the container user to the caller’s UID/GID and ensures shared directories exist. - `run-input-processor.sh` – wrapper around `docker compose` that mounts your `~/.codex` directory and forwards CLI arguments. - `docker-compose.yml` – defines the container, volumes, environment variables, and restart policy (`no` so fatal errors halt the stack). ### Templates -- `templates/ResumeCustomizerPrompt.md.example` ships with default Codex instructions. -- To customize, copy the `.example` file to `templates/ResumeCustomizerPrompt.md` (the `.gitignore` keeps your local overrides out of version control). +- `templates/JobDescriptionNormalizerPrompt.md.example` ships with default instructions that clean recruiter chatter and extract company/role details. Copy it to `JobDescriptionNormalizerPrompt.md` to override. +- `templates/ResumeCustomizerPrompt.md.example` ships with default resume-customization instructions. Copy it to `ResumeCustomizerPrompt.md` to override. +- The `.gitignore` in `templates/` keeps local overrides out of version control. ### Key Environment Variables -- `CODEX_COMMAND_TEMPLATE` – format string for invoking Codex (placeholders: `{prompt}`, `{output}`). +- `CODEX_COMMAND_TEMPLATE` – format string for the resume-customization Codex run (placeholders: `{prompt}`, `{output}`). +- `CODEX_NORMALIZER_COMMAND_TEMPLATE` – optional override for the normalization Codex run (defaults to `CODEX_COMMAND_TEMPLATE`). - `POLL_INTERVAL_SECONDS` – watch loop delay (defaults to 5). - `CODEX_TIMEOUT_SECONDS` – wall-clock timeout for each Codex call (defaults to 600). - `CODEX_CONFIG_DIR` – host path to mount as `/home/codex/.codex` (defaults to `${HOME}/.codex` via the wrapper). diff --git a/input/templates/.gitignore b/input/templates/.gitignore index 7b5c605..064cd9a 100644 --- a/input/templates/.gitignore +++ b/input/templates/.gitignore @@ -1,2 +1,4 @@ ResumeCustomizerPrompt.md +JobDescriptionNormalizerPrompt.md !ResumeCustomizerPrompt.md.example +!JobDescriptionNormalizerPrompt.md.example diff --git a/input/templates/JobDescriptionNormalizerPrompt.md.example b/input/templates/JobDescriptionNormalizerPrompt.md.example new file mode 100644 index 0000000..160ebd7 --- /dev/null +++ b/input/templates/JobDescriptionNormalizerPrompt.md.example @@ -0,0 +1,27 @@ +# Default instructions that clean messy job descriptions before resume customization. +# Copy to `JobDescriptionNormalizerPrompt.md` (same directory) to override locally. + +You are an expert technical recruiter. + +Given the following “job description” input, which may contain greetings, recruiter chatter, +email signatures, address blocks, or forwarding artifacts, produce cleaned Markdown with +this exact structure: + +``` +Company: +Job Title: + +# Job Description + +``` + +Guidelines: +- Strip all chatter that is not part of the actual role description. +- If multiple companies or roles appear, pick the primary one the candidate should target. +- Normalize whitespace, headings, and bullet lists in the Markdown output. +- Omit personal identifiers for recruiters or candidates unless they are essential job facts. +- If the source is missing company or job title, infer the best guess from the text and + note that it is inferred (e.g., “ (inferred)”). +- Never add commentary outside the format shown above. + +The raw content follows.