From 144e5130685f41f2b1ed8d8ba46753e27bcea7c7 Mon Sep 17 00:00:00 2001 From: Anish Sarkar <104695310+AnishSarkar22@users.noreply.github.com> Date: Wed, 22 Apr 2026 18:46:17 +0530 Subject: [PATCH] feat: implement resume page limit functionality --- .../app/agents/new_chat/system_prompt.py | 15 +- .../app/agents/new_chat/tools/resume.py | 225 +++++++++++++++--- .../new_chat/tools/test_resume_page_limits.py | 213 +++++++++++++++++ .../components/tool-ui/generate-resume.tsx | 1 + 4 files changed, 411 insertions(+), 43 deletions(-) create mode 100644 surfsense_backend/tests/unit/agents/new_chat/tools/test_resume_page_limits.py diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index b7b3d6b33..9b8a7e0f9 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -450,6 +450,9 @@ _TOOL_INSTRUCTIONS["generate_resume"] = """ - WHEN NOT TO CALL: General career advice, resume tips, cover letters, or reviewing a resume without making changes. For cover letters, use generate_report instead. - The tool produces Typst source code that is compiled to a PDF preview automatically. + - PAGE POLICY: + - Default behavior is ONE PAGE. For new resume creation, set max_pages=1 unless the user explicitly asks for more. + - If the user requests a longer resume (e.g., "make it 2 pages"), set max_pages to that value. - Args: - user_info: The user's resume content — work experience, education, skills, contact info, etc. Can be structured or unstructured text. @@ -465,6 +468,7 @@ _TOOL_INSTRUCTIONS["generate_resume"] = """ "keep it to one page"). For revisions, describe what to change. - parent_report_id: Set this when the user wants to MODIFY an existing resume from this conversation. Use the report_id from a previous generate_resume result. + - max_pages: Maximum resume length in pages (integer 1-5). Default is 1. - Returns: Dict with status, report_id, title, and content_type. - After calling: Give a brief confirmation. Do NOT paste resume content in chat. Do NOT mention report_id or any internal IDs — the resume card is shown automatically. - VERSIONING: Same rules as generate_report — set parent_report_id for modifications @@ -473,17 +477,20 @@ _TOOL_INSTRUCTIONS["generate_resume"] = """ _TOOL_EXAMPLES["generate_resume"] = """ - User: "Build me a resume. I'm John Doe, engineer at Acme Corp..." - - Call: `generate_resume(user_info="John Doe, engineer at Acme Corp...")` + - Call: `generate_resume(user_info="John Doe, engineer at Acme Corp...", max_pages=1)` - WHY: Has creation verb "build" + resume → call the tool. - User: "Create my CV with this info: [experience, education, skills]" - - Call: `generate_resume(user_info="[experience, education, skills]")` + - Call: `generate_resume(user_info="[experience, education, skills]", max_pages=1)` - User: "Build me a resume" (and there is a resume/CV document in the conversation context) - Extract the FULL content from the document in context, then call: - `generate_resume(user_info="Name: John Doe\\nEmail: john@example.com\\n\\nExperience:\\n- Senior Engineer at Acme Corp (2020-2024)\\n Led team of 5...\\n\\nEducation:\\n- BS Computer Science, MIT (2016-2020)\\n\\nSkills: Python, TypeScript, AWS...")` + `generate_resume(user_info="Name: John Doe\\nEmail: john@example.com\\n\\nExperience:\\n- Senior Engineer at Acme Corp (2020-2024)\\n Led team of 5...\\n\\nEducation:\\n- BS Computer Science, MIT (2016-2020)\\n\\nSkills: Python, TypeScript, AWS...", max_pages=1)` - WHY: Document content is available in context — extract ALL of it into user_info. Do NOT ignore referenced documents. - User: (after resume generated) "Change my title to Senior Engineer" - - Call: `generate_resume(user_info="", user_instructions="Change the job title to Senior Engineer", parent_report_id=)` + - Call: `generate_resume(user_info="", user_instructions="Change the job title to Senior Engineer", parent_report_id=, max_pages=1)` - WHY: Modification verb "change" + refers to existing resume → set parent_report_id. +- User: (after resume generated) "Make this 2 pages and expand projects" + - Call: `generate_resume(user_info="", user_instructions="Expand projects and keep this to at most 2 pages", parent_report_id=, max_pages=2)` + - WHY: Explicit page increase request → set max_pages to 2. - User: "How should I structure my resume?" - Do NOT call generate_resume. Answer in chat with advice. - WHY: No creation/modification verb. diff --git a/surfsense_backend/app/agents/new_chat/tools/resume.py b/surfsense_backend/app/agents/new_chat/tools/resume.py index b1962f8d1..02ce1604a 100644 --- a/surfsense_backend/app/agents/new_chat/tools/resume.py +++ b/surfsense_backend/app/agents/new_chat/tools/resume.py @@ -13,11 +13,13 @@ Uses the same short-lived session pattern as generate_report so no DB connection is held during the long LLM call. """ +import io import logging import re from datetime import UTC, datetime from typing import Any +import pypdf import typst from langchain_core.callbacks import dispatch_custom_event from langchain_core.messages import HumanMessage @@ -188,11 +190,15 @@ RULES: - Keep content professional, concise, and achievement-oriented. - Use action verbs for bullet points (Led, Built, Designed, Reduced, etc.). - This template works for ALL professions — adapt sections to the user's field. +- Default behavior should prioritize concise one-page content. """, }, } DEFAULT_TEMPLATE = "classic" +MIN_RESUME_PAGES = 1 +MAX_RESUME_PAGES = 5 +MAX_COMPRESSION_ATTEMPTS = 2 # ─── Template Helpers ───────────────────────────────────────────────────────── @@ -315,6 +321,8 @@ You are an expert resume writer. Generate professional resume content as Typst m **User Information:** {user_info} +**Target Maximum Pages:** {max_pages} + {user_instructions_section} Generate the resume content now (starting with = Full Name): @@ -326,6 +334,8 @@ Apply ONLY the requested changes — do NOT rewrite sections that are not affect {llm_reference} +**Target Maximum Pages:** {max_pages} + **Modification Instructions:** {user_instructions} **EXISTING RESUME CONTENT:** @@ -352,6 +362,28 @@ The resume content you generated failed to compile. Fix the error while preservi (starting with = Full Name), NOT the #import or #show rule:** """ +_COMPRESS_TO_PAGE_LIMIT_PROMPT = """\ +The resume compiles, but it exceeds the maximum allowed page count. +Compress the resume while preserving high-impact accomplishments and role relevance. + +{llm_reference} + +**Target Maximum Pages:** {max_pages} +**Current Page Count:** {actual_pages} +**Compression Attempt:** {attempt_number} + +Compression priorities (in this order): +1) Keep recent, high-impact, role-relevant bullets. +2) Remove low-impact or redundant bullets. +3) Shorten verbose wording while preserving meaning. +4) Trim older or less relevant details before recent ones. + +Return the complete updated Typst content (starting with = Full Name), and keep it at or below the target pages. + +**EXISTING RESUME CONTENT:** +{previous_content} +""" + # ─── Helpers ───────────────────────────────────────────────────────────────── @@ -373,6 +405,24 @@ def _compile_typst(source: str) -> bytes: return typst.compile(source.encode("utf-8")) +def _count_pdf_pages(pdf_bytes: bytes) -> int: + """Count the number of pages in compiled PDF bytes.""" + with io.BytesIO(pdf_bytes) as pdf_stream: + reader = pypdf.PdfReader(pdf_stream) + return len(reader.pages) + + +def _validate_max_pages(max_pages: int) -> int: + """Validate and normalize max_pages input.""" + if MIN_RESUME_PAGES <= max_pages <= MAX_RESUME_PAGES: + return max_pages + msg = ( + f"max_pages must be between {MIN_RESUME_PAGES} and " + f"{MAX_RESUME_PAGES}. Received: {max_pages}" + ) + raise ValueError(msg) + + # ─── Tool Factory ─────────────────────────────────────────────────────────── @@ -394,6 +444,7 @@ def create_generate_resume_tool( user_info: str, user_instructions: str | None = None, parent_report_id: int | None = None, + max_pages: int = 1, ) -> dict[str, Any]: """ Generate a professional resume as a Typst document. @@ -426,6 +477,8 @@ def create_generate_resume_tool( "use a modern style"). For revisions, describe what to change. parent_report_id: ID of a previous resume to revise (creates new version in the same version group). + max_pages: Maximum number of pages for the generated resume. + Defaults to 1. Allowed range: 1-5. Returns: Dict with status, report_id, title, and content_type. @@ -469,6 +522,19 @@ def create_generate_resume_tool( return None try: + try: + validated_max_pages = _validate_max_pages(max_pages) + except ValueError as e: + error_msg = str(e) + report_id = await _save_failed_report(error_msg) + return { + "status": "failed", + "error": error_msg, + "report_id": report_id, + "title": "Resume", + "content_type": "typst", + } + # ── Phase 1: READ ───────────────────────────────────────────── async with shielded_async_session() as read_session: if parent_report_id: @@ -512,6 +578,7 @@ def create_generate_resume_tool( parent_body = _strip_header(parent_content) prompt = _REVISION_PROMPT.format( llm_reference=llm_reference, + max_pages=validated_max_pages, user_instructions=user_instructions or "Improve and refine the resume.", previous_content=parent_body, @@ -524,6 +591,7 @@ def create_generate_resume_tool( prompt = _RESUME_PROMPT.format( llm_reference=llm_reference, user_info=user_info, + max_pages=validated_max_pages, user_instructions_section=user_instructions_section, ) @@ -551,49 +619,116 @@ def create_generate_resume_tool( ) name = _extract_name(body) or "Resume" - header = _build_header(template, name) - typst_source = header + body + typst_source = "" + actual_pages = 0 + compression_attempts = 0 + target_page_met = False - compile_error: str | None = None - for attempt in range(2): - try: - _compile_typst(typst_source) - compile_error = None - break - except Exception as e: - compile_error = str(e) - logger.warning( - f"[generate_resume] Compile attempt {attempt + 1} failed: {compile_error}" + for compression_round in range(MAX_COMPRESSION_ATTEMPTS + 1): + header = _build_header(template, name) + typst_source = header + body + compile_error: str | None = None + pdf_bytes: bytes | None = None + + for compile_attempt in range(2): + try: + pdf_bytes = _compile_typst(typst_source) + compile_error = None + break + except Exception as e: + compile_error = str(e) + logger.warning( + "[generate_resume] Compile attempt %s failed: %s", + compile_attempt + 1, + compile_error, + ) + + if compile_attempt == 0: + dispatch_custom_event( + "report_progress", + { + "phase": "fixing", + "message": "Fixing compilation issue...", + }, + ) + fix_prompt = _FIX_COMPILE_PROMPT.format( + llm_reference=llm_reference, + error=compile_error, + full_source=typst_source, + ) + fix_response = await llm.ainvoke( + [HumanMessage(content=fix_prompt)] + ) + if fix_response.content and isinstance( + fix_response.content, str + ): + body = _strip_typst_fences(fix_response.content) + body = _strip_imports(body) + name = _extract_name(body) or name + header = _build_header(template, name) + typst_source = header + body + + if compile_error or not pdf_bytes: + error_msg = ( + "Typst compilation failed after 2 attempts: " + f"{compile_error or 'Unknown compile error'}" ) + report_id = await _save_failed_report(error_msg) + return { + "status": "failed", + "error": error_msg, + "report_id": report_id, + "title": "Resume", + "content_type": "typst", + } - if attempt == 0: - dispatch_custom_event( - "report_progress", - { - "phase": "fixing", - "message": "Fixing compilation issue...", - }, - ) - fix_prompt = _FIX_COMPILE_PROMPT.format( - llm_reference=llm_reference, - error=compile_error, - full_source=typst_source, - ) - fix_response = await llm.ainvoke( - [HumanMessage(content=fix_prompt)] - ) - if fix_response.content and isinstance( - fix_response.content, str - ): - body = _strip_typst_fences(fix_response.content) - body = _strip_imports(body) - name = _extract_name(body) or name - header = _build_header(template, name) - typst_source = header + body + actual_pages = _count_pdf_pages(pdf_bytes) + if actual_pages <= validated_max_pages: + target_page_met = True + break - if compile_error: + if compression_round >= MAX_COMPRESSION_ATTEMPTS: + break + + compression_attempts += 1 + dispatch_custom_event( + "report_progress", + { + "phase": "compressing", + "message": f"Condensing resume to {validated_max_pages} page(s)...", + }, + ) + compress_prompt = _COMPRESS_TO_PAGE_LIMIT_PROMPT.format( + llm_reference=llm_reference, + max_pages=validated_max_pages, + actual_pages=actual_pages, + attempt_number=compression_attempts, + previous_content=body, + ) + compress_response = await llm.ainvoke( + [HumanMessage(content=compress_prompt)] + ) + if not compress_response.content or not isinstance( + compress_response.content, str + ): + error_msg = "LLM returned empty content while compressing resume" + report_id = await _save_failed_report(error_msg) + return { + "status": "failed", + "error": error_msg, + "report_id": report_id, + "title": "Resume", + "content_type": "typst", + } + + body = _strip_typst_fences(compress_response.content) + body = _strip_imports(body) + name = _extract_name(body) or name + + if actual_pages > MAX_RESUME_PAGES: error_msg = ( - f"Typst compilation failed after 2 attempts: {compile_error}" + "Resume exceeds hard page limit after compression retries. " + f"Hard limit: <= {MAX_RESUME_PAGES} page(s), actual: {actual_pages}." ) report_id = await _save_failed_report(error_msg) return { @@ -616,6 +751,11 @@ def create_generate_resume_tool( "status": "ready", "word_count": len(typst_source.split()), "char_count": len(typst_source), + "target_max_pages": validated_max_pages, + "actual_page_count": actual_pages, + "page_limit_enforced": True, + "compression_attempts": compression_attempts, + "target_page_met": target_page_met, } async with shielded_async_session() as write_session: @@ -647,7 +787,14 @@ def create_generate_resume_tool( "title": resume_title, "content_type": "typst", "is_revision": bool(parent_content), - "message": f"Resume generated successfully: {resume_title}", + "message": ( + f"Resume generated successfully: {resume_title}" + if target_page_met + else ( + f"Resume generated, but could not fit the target of <= {validated_max_pages} " + f"page(s). Final length: {actual_pages} page(s)." + ) + ), } except Exception as e: diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_resume_page_limits.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_resume_page_limits.py new file mode 100644 index 000000000..4f93ad732 --- /dev/null +++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_resume_page_limits.py @@ -0,0 +1,213 @@ +"""Unit tests for resume page-limit helpers and enforcement flow.""" + +import io +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pypdf +import pytest + +from app.agents.new_chat.tools import resume as resume_tool + +pytestmark = pytest.mark.unit + + +class _FakeReport: + _next_id = 1000 + + def __init__(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + self.id = None + + +class _FakeSession: + def __init__(self, parent_report=None): + self.parent_report = parent_report + self.added: list[_FakeReport] = [] + + async def get(self, _model, _id): + return self.parent_report + + def add(self, report): + self.added.append(report) + + async def commit(self): + for report in self.added: + if getattr(report, "id", None) is None: + report.id = _FakeReport._next_id + _FakeReport._next_id += 1 + + async def refresh(self, _report): + return None + + +class _SessionContext: + def __init__(self, session): + self.session = session + + async def __aenter__(self): + return self.session + + async def __aexit__(self, exc_type, exc, tb): + return False + + +class _SessionFactory: + def __init__(self, sessions): + self._sessions = list(sessions) + + def __call__(self): + if not self._sessions: + raise RuntimeError("No fake sessions left") + return _SessionContext(self._sessions.pop(0)) + + +def _make_pdf_with_pages(page_count: int) -> bytes: + writer = pypdf.PdfWriter() + for _ in range(page_count): + writer.add_blank_page(width=612, height=792) + output = io.BytesIO() + writer.write(output) + return output.getvalue() + + +def test_count_pdf_pages_reads_compiled_bytes() -> None: + pdf_bytes = _make_pdf_with_pages(2) + assert resume_tool._count_pdf_pages(pdf_bytes) == 2 + + +def test_validate_max_pages_rejects_out_of_range() -> None: + with pytest.raises(ValueError): + resume_tool._validate_max_pages(0) + with pytest.raises(ValueError): + resume_tool._validate_max_pages(6) + + +@pytest.mark.asyncio +async def test_generate_resume_defaults_to_one_page_target(monkeypatch) -> None: + read_session = _FakeSession() + write_session = _FakeSession() + session_factory = _SessionFactory([read_session, write_session]) + monkeypatch.setattr(resume_tool, "shielded_async_session", session_factory) + monkeypatch.setattr(resume_tool, "Report", _FakeReport) + + prompts: list[str] = [] + + async def _llm_invoke(messages): + prompts.append(messages[0].content) + return SimpleNamespace(content="= Jane Doe\n== Experience\n- Built systems") + + llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=_llm_invoke)) + monkeypatch.setattr( + resume_tool, + "get_document_summary_llm", + AsyncMock(return_value=llm), + ) + monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf") + monkeypatch.setattr(resume_tool, "_count_pdf_pages", lambda _pdf: 1) + + tool = resume_tool.create_generate_resume_tool(search_space_id=1, thread_id=1) + result = await tool.ainvoke({"user_info": "Jane Doe experience"}) + + assert result["status"] == "ready" + assert prompts + assert "**Target Maximum Pages:** 1" in prompts[0] + + +@pytest.mark.asyncio +async def test_generate_resume_compresses_when_over_limit(monkeypatch) -> None: + read_session = _FakeSession() + write_session = _FakeSession() + session_factory = _SessionFactory([read_session, write_session]) + monkeypatch.setattr(resume_tool, "shielded_async_session", session_factory) + monkeypatch.setattr(resume_tool, "Report", _FakeReport) + + responses = [ + SimpleNamespace(content="= Jane Doe\n== Experience\n- Detailed bullet 1"), + SimpleNamespace(content="= Jane Doe\n== Experience\n- Condensed bullet"), + ] + llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses)) + monkeypatch.setattr( + resume_tool, + "get_document_summary_llm", + AsyncMock(return_value=llm), + ) + monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf") + page_counts = iter([2, 1]) + monkeypatch.setattr(resume_tool, "_count_pdf_pages", lambda _pdf: next(page_counts)) + + tool = resume_tool.create_generate_resume_tool(search_space_id=1, thread_id=1) + result = await tool.ainvoke({"user_info": "Jane Doe experience", "max_pages": 1}) + + assert result["status"] == "ready" + assert write_session.added, "Expected successful report write" + metadata = write_session.added[0].report_metadata + assert metadata["target_max_pages"] == 1 + assert metadata["actual_page_count"] == 1 + assert metadata["compression_attempts"] == 1 + assert metadata["page_limit_enforced"] is True + + +@pytest.mark.asyncio +async def test_generate_resume_returns_ready_when_target_not_met(monkeypatch) -> None: + read_session = _FakeSession() + write_session = _FakeSession() + session_factory = _SessionFactory([read_session, write_session]) + monkeypatch.setattr(resume_tool, "shielded_async_session", session_factory) + monkeypatch.setattr(resume_tool, "Report", _FakeReport) + + responses = [ + SimpleNamespace(content="= Jane Doe\n== Experience\n- Long detail"), + SimpleNamespace(content="= Jane Doe\n== Experience\n- Still long"), + SimpleNamespace(content="= Jane Doe\n== Experience\n- Still too long"), + ] + llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses)) + monkeypatch.setattr( + resume_tool, + "get_document_summary_llm", + AsyncMock(return_value=llm), + ) + monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf") + page_counts = iter([3, 3, 2]) + monkeypatch.setattr(resume_tool, "_count_pdf_pages", lambda _pdf: next(page_counts)) + + tool = resume_tool.create_generate_resume_tool(search_space_id=1, thread_id=1) + result = await tool.ainvoke({"user_info": "Jane Doe experience", "max_pages": 1}) + + assert result["status"] == "ready" + assert "could not fit the target" in (result["message"] or "").lower() + metadata = write_session.added[0].report_metadata + assert metadata["target_page_met"] is False + assert metadata["actual_page_count"] == 2 + + +@pytest.mark.asyncio +async def test_generate_resume_fails_when_hard_limit_exceeded(monkeypatch) -> None: + read_session = _FakeSession() + failed_session = _FakeSession() + session_factory = _SessionFactory([read_session, failed_session]) + monkeypatch.setattr(resume_tool, "shielded_async_session", session_factory) + monkeypatch.setattr(resume_tool, "Report", _FakeReport) + + responses = [ + SimpleNamespace(content="= Jane Doe\n== Experience\n- Long detail"), + SimpleNamespace(content="= Jane Doe\n== Experience\n- Still long"), + SimpleNamespace(content="= Jane Doe\n== Experience\n- Still too long"), + ] + llm = SimpleNamespace(ainvoke=AsyncMock(side_effect=responses)) + monkeypatch.setattr( + resume_tool, + "get_document_summary_llm", + AsyncMock(return_value=llm), + ) + monkeypatch.setattr(resume_tool, "_compile_typst", lambda _source: b"pdf") + page_counts = iter([7, 6, 6]) + monkeypatch.setattr(resume_tool, "_count_pdf_pages", lambda _pdf: next(page_counts)) + + tool = resume_tool.create_generate_resume_tool(search_space_id=1, thread_id=1) + result = await tool.ainvoke({"user_info": "Jane Doe experience", "max_pages": 1}) + + assert result["status"] == "failed" + assert "hard page limit" in (result["error"] or "").lower() + assert failed_session.added, "Expected failed report persistence" diff --git a/surfsense_web/components/tool-ui/generate-resume.tsx b/surfsense_web/components/tool-ui/generate-resume.tsx index f329ff95d..1290a70ea 100644 --- a/surfsense_web/components/tool-ui/generate-resume.tsx +++ b/surfsense_web/components/tool-ui/generate-resume.tsx @@ -20,6 +20,7 @@ const GenerateResumeArgsSchema = z.object({ user_info: z.string(), user_instructions: z.string().nullish(), parent_report_id: z.number().nullish(), + max_pages: z.number().int().min(1).max(5).optional(), }); const GenerateResumeResultSchema = z.object({