Merge pull request #826 from AnishSarkar22/fix/report-artifact

fix: improve report artifact & enhance revision handling
2026-07-08 22:22:17 +02:00 · 2026-02-19 19:10:21 -08:00 · 2026-02-19 19:10:21 -08:00 · f15833fec6
commit f15833fec6
parent 9aef655566 0ead9fc0cc
6 changed files with 956 additions and 181 deletions
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -23,6 +23,8 @@ Today's date (UTC): {resolved_today}

 When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.

+NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
+
 </system_instruction>
 """

@ -37,6 +39,8 @@ Today's date (UTC): {resolved_today}

 When writing mathematical formulas or equations, ALWAYS use LaTeX notation. NEVER use backtick code spans or Unicode symbols for math.

+NEVER expose internal tool parameter names, backend IDs, or implementation details to the user. Always use natural, user-friendly language instead.
+
 </system_instruction>
 """

@ -96,41 +100,43 @@ You have access to the following tools:
  - IMPORTANT: Only one podcast can be generated at a time. If a podcast is already being generated, the tool will return status "already_generating".
  - After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes).

-3. generate_report: Generate a structured Markdown report from provided content.
-  - Use this when the user asks to create, generate, write, produce, draft, or summarize into a report-style deliverable.
-  - DECISION RULE (HIGH PRIORITY): If the user asks for a report in any form, call `generate_report` instead of writing the full report directly in chat.
-  - Only skip `generate_report` if the user explicitly asks for chat-only output (e.g., "just answer in chat", "no report card", "don't generate a report").
-  - Trigger classes include:
-    * Direct trigger words: report, document, memo, letter, template
-    * Creation-intent phrases: "write a document/report/post/article"
-    * File-intent words: requests containing "save", "file", or "document" when intent is to create a report-like deliverable
-    * Word-doc specific triggers: professional report-style deliverable, professional document, Word doc, .docx
-    * Other report-like output intents: one-pager, blog post, article, standalone written content, comprehensive guide
-    * General artifact-style intents: analysis / writing as substantial deliverables
-  - Trigger phrases include:
-    * "generate a report about", "write a report", "produce a report"
-    * "create a detailed report about", "make a research report on"
-    * "summarize this into a report", "turn this into a report"
-    * "write a report/document", "draft a report"
-    * "create an executive summary", "make a briefing note", "write a one-pager"
-    * "write a blog post", "write an article", "create a comprehensive guide"
-    * "create a small report", "write a short report", "make a quick report", "brief report for class"
+3. generate_report: Generate or revise a structured Markdown report artifact.
+  - WHEN TO CALL THIS TOOL — the message must contain a creation or modification VERB directed at producing a deliverable:
+    * Creation verbs: write, create, generate, draft, produce, summarize into, turn into, make
+    * Modification verbs: revise, update, expand, add (a section), rewrite, make (it shorter/longer/formal)
+    * Example triggers: "generate a report about...", "write a document on...", "add a section about budget", "make the report shorter", "rewrite in formal tone"
+  - WHEN NOT TO CALL THIS TOOL (answer in chat instead):
+    * Questions or discussion about the report: "What can we add?", "What's missing?", "Is the data accurate?", "How could this be improved?"
+    * Suggestions or brainstorming: "What other topics could be covered?", "What else could be added?", "What would make this better?"
+    * Asking for explanations: "Can you explain section 2?", "Why did you include that?", "What does this part mean?"
+    * Quick follow-ups or critiques: "Is the conclusion strong enough?", "Are there any gaps?", "What about the competitors?"
+    * THE TEST: Does the message contain a creation/modification VERB (from the list above) directed at producing or changing a deliverable? If NO verb → answer conversationally in chat. Do NOT assume the user wants a revision just because a report exists in the conversation.
  - IMPORTANT FORMAT RULE: Reports are ALWAYS generated in Markdown.
  - Args:
-    - topic: The main topic or title of the report
-    - source_content: The text content to base the report on. This MUST be comprehensive and include:
-      * If discussing the current conversation: Include a detailed summary of the FULL chat history (all user questions and your responses)
-      * If based on knowledge base search: Include the key findings and insights from the search results
-      * You can combine both: conversation context + search results for richer reports
-      * The more detailed the source_content, the better the report quality
-    - report_style: Optional style. Options: "detailed" (default), "executive_summary", "deep_research", "brief"
-    - user_instructions: Optional specific instructions (e.g., "focus on financial impacts", "include recommendations")
+    - topic: Short title for the report (max ~8 words).
+    - source_content: The text content to base the report on.
+      * For source_strategy="conversation" or "provided": Include a comprehensive summary of the relevant content.
+      * For source_strategy="kb_search": Can be empty or minimal — the tool handles searching internally.
+      * For source_strategy="auto": Include what you have; the tool searches KB if it's not enough.
+    - source_strategy: Controls how the tool collects source material. One of:
+      * "conversation" — The conversation already contains enough context (prior Q&A, discussion, pasted text, scraped pages). Pass a thorough summary as source_content. Do NOT call search_knowledge_base separately.
+      * "kb_search" — The tool will search the knowledge base internally. Provide search_queries with 1-5 targeted queries. Do NOT call search_knowledge_base separately.
+      * "auto" — Use source_content if sufficient, otherwise fall back to internal KB search using search_queries.
+      * "provided" — Use only what is in source_content (default, backward-compatible).
+    - search_queries: When source_strategy is "kb_search" or "auto", provide 1-5 specific search queries for the knowledge base. These should be precise, not just the topic name repeated.
+    - report_style: Controls report depth. Options: "detailed" (DEFAULT), "deep_research", "brief".
+      Use "brief" ONLY when the user explicitly asks for a short/concise/one-page report (e.g., "one page", "keep it short", "brief report", "500 words"). Default to "detailed" for all other requests.
+    - user_instructions: Optional specific instructions (e.g., "focus on financial impacts", "include recommendations"). When revising (parent_report_id set), describe WHAT TO CHANGE. If the user mentions a length preference (e.g., "one page", "500 words", "2 pages"), include that VERBATIM here AND set report_style="brief".
+    - parent_report_id: Set this to the report_id from a previous generate_report result when the user wants to MODIFY an existing report. Do NOT set it for new reports or questions about reports.
  - Returns: A dictionary with status "ready" or "failed", report_id, title, and word_count.
  - The report is generated immediately in Markdown and displayed inline in the chat.
  - Export/download formats (e.g., PDF/DOCX) are produced from the generated Markdown report.
-  - SOURCE-COLLECTION RULE:
-    * If the user already provided enough source material (current chat content, uploaded files, pasted text, or a summarized video/article), generate the report directly from that.
-    * Use search_knowledge_base first when additional context is needed or the user asks for information beyond what is already available in the conversation.
+  - SOURCE STRATEGY DECISION (HIGH PRIORITY — follow this exactly):
+    * If the conversation already has substantive Q&A / discussion on the topic → use source_strategy="conversation" with a comprehensive summary as source_content. Do NOT call search_knowledge_base first.
+    * If the user wants a report on a topic not yet discussed → use source_strategy="kb_search" with targeted search_queries. Do NOT call search_knowledge_base first.
+    * If you have some content but might need more → use source_strategy="auto" with both source_content and search_queries.
+    * When revising an existing report (parent_report_id set) and the conversation has relevant context → use source_strategy="conversation". The revision will use the previous report content plus your source_content.
+    * NEVER call search_knowledge_base and then pass its results to generate_report. The tool handles KB search internally.
  - AFTER CALLING THIS TOOL: Do NOT repeat, summarize, or reproduce the report content in the chat. The report is already displayed as an interactive card that the user can open, read, copy, and export. Simply confirm that the report was generated (e.g., "I've generated your report on [topic]. You can view the Markdown report now, and export to PDF/DOCX from the card."). NEVER write out the report text in the chat.

 4. link_preview: Fetch metadata for a URL to display a rich preview card.
@ -363,15 +369,36 @@ _TOOLS_INSTRUCTIONS_EXAMPLES_COMMON = """
  - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")`

 - User: "Generate a report about AI trends"
-  - First search: `search_knowledge_base(query="AI trends")`
-  - Then: `generate_report(topic="AI Trends Report", source_content="Key insights about AI trends from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", report_style="detailed")`
+  - Call: `generate_report(topic="AI Trends Report", source_strategy="kb_search", search_queries=["AI trends recent developments", "artificial intelligence industry trends", "AI market growth and predictions"], report_style="detailed")`
+  - WHY: Has creation verb "generate" → call the tool. No prior discussion → use kb_search.

 - User: "Write a research report from this conversation"
-  - Call: `generate_report(topic="Research Report", source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", report_style="deep_research")`
+  - Call: `generate_report(topic="Research Report", source_strategy="conversation", source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", report_style="deep_research")`
+  - WHY: Has creation verb "write" → call the tool. Conversation has the content → use source_strategy="conversation".

 - User: "Create a brief executive summary about our project progress"
-  - First search: `search_knowledge_base(query="project progress updates")`
-  - Then: `generate_report(topic="Project Progress Executive Summary", source_content="[Combined search results and conversation context]", report_style="executive_summary", user_instructions="Focus on milestones achieved and upcoming deadlines")`
+  - Call: `generate_report(topic="Project Progress Executive Summary", source_strategy="kb_search", search_queries=["project progress updates", "project milestones completed", "upcoming project deadlines"], report_style="executive_summary", user_instructions="Focus on milestones achieved and upcoming deadlines")`
+  - WHY: Has creation verb "create" → call the tool. New topic → use kb_search.
+
+- User: (after extensive Q&A about React performance) "Turn this into a report"
+  - Call: `generate_report(topic="React Performance Optimization Guide", source_strategy="conversation", source_content="[Thorough summary of all Q&A from this conversation about React performance...]", report_style="detailed")`
+  - WHY: Has creation verb "turn into" → call the tool. Conversation has the content → use source_strategy="conversation".
+
+- User: (after a report on Climate Change was generated) "Add a section about carbon capture technologies"
+  - Call: `generate_report(topic="Climate Crisis: Causes, Impacts, and Solutions", source_strategy="conversation", source_content="[summary of conversation context if any]", parent_report_id=<previous_report_id>, user_instructions="Add a new section about carbon capture technologies")`
+  - WHY: Has modification verb "add" + specific deliverable target → call the tool with parent_report_id. Use source_strategy="conversation" since the report already exists.
+
+- User: (after a report was generated) "What else could we add to have more depth?"
+  - Do NOT call generate_report. Answer in chat with suggestions, e.g.: "Here are some areas we could expand: 1. ... 2. ... 3. ... Would you like me to add any of these to the report?"
+  - WHY: No creation/modification verb directed at producing a deliverable. This is a question asking for suggestions.
+
+- User: (after a report was generated) "Is the conclusion strong enough?"
+  - Do NOT call generate_report. Answer in chat, e.g.: "The conclusion covers X and Y well, but could be strengthened by adding Z. Want me to revise it?"
+  - WHY: This is a question/critique, not a modification request.
+
+- User: (after a report was generated) "What's missing from this report?"
+  - Do NOT call generate_report. Answer in chat with analysis of gaps.
+  - WHY: This is a question. The user is asking you to identify gaps, not to fix them yet.

 - User: "Check out https://dev.to/some-article"
  - Call: `link_preview(url="https://dev.to/some-article")`
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@ -130,14 +130,20 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
        requires=["search_space_id", "db_session", "thread_id"],
    ),
    # Report generation tool (inline, short-lived sessions for DB ops)
+    # Supports internal KB search via source_strategy so the agent doesn't
+    # need to call search_knowledge_base separately before generating.
    ToolDefinition(
        name="generate_report",
        description="Generate a structured Markdown report from provided content",
        factory=lambda deps: create_generate_report_tool(
            search_space_id=deps["search_space_id"],
            thread_id=deps["thread_id"],
+            connector_service=deps.get("connector_service"),
+            available_connectors=deps.get("available_connectors"),
        ),
        requires=["search_space_id", "thread_id"],
+        # connector_service and available_connectors are optional —
+        # when missing, source_strategy="kb_search" degrades gracefully to "provided"
    ),
    # Link preview tool - fetches Open Graph metadata for URLs
    ToolDefinition(
--- a/surfsense_backend/app/agents/new_chat/tools/report.py
+++ b/surfsense_backend/app/agents/new_chat/tools/report.py
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -28,7 +28,7 @@ from app.agents.new_chat.llm_config import (
    load_agent_config,
    load_llm_config_from_yaml,
 )
-from app.db import ChatVisibility, Document, SurfsenseDocsDocument
+from app.db import ChatVisibility, Document, Report, SurfsenseDocsDocument
 from app.prompts import TITLE_GENERATION_PROMPT_TEMPLATE
 from app.services.chat_session_state_service import (
    clear_ai_responding,
@ -226,6 +226,7 @@ async def _stream_agent_events(
    last_active_step_title: str = initial_step_title
    last_active_step_items: list[str] = initial_step_items or []
    just_finished_tool: bool = False
+    active_tool_depth: int = 0  # Track nesting: >0 means we're inside a tool

    def next_thinking_step_id() -> str:
        nonlocal thinking_step_counter
@ -250,6 +251,8 @@ async def _stream_agent_events(
        event_type = event.get("event", "")

        if event_type == "on_chat_model_stream":
+            if active_tool_depth > 0:
+                continue  # Suppress inner-tool LLM tokens from leaking into chat
            chunk = event.get("data", {}).get("chunk")
            if chunk and hasattr(chunk, "content"):
                content = chunk.content
@ -269,6 +272,7 @@ async def _stream_agent_events(
                    accumulated_text += content

        elif event_type == "on_tool_start":
+            active_tool_depth += 1
            tool_name = event.get("name", "unknown_tool")
            run_id = event.get("run_id", "")
            tool_input = event.get("data", {}).get("input", {})
@ -383,26 +387,18 @@ async def _stream_agent_events(
                    if isinstance(tool_input, dict)
                    else "Report"
                )
-                report_style = (
-                    tool_input.get("report_style", "detailed")
-                    if isinstance(tool_input, dict)
-                    else "detailed"
+                is_revision = bool(
+                    isinstance(tool_input, dict) and tool_input.get("parent_report_id")
                )
-                content_len = len(
-                    tool_input.get("source_content", "")
-                    if isinstance(tool_input, dict)
-                    else ""
-                )
-                last_active_step_title = "Generating report"
+                step_title = "Revising report" if is_revision else "Generating report"
+                last_active_step_title = step_title
                last_active_step_items = [
                    f"Topic: {report_topic}",
-                    f"Style: {report_style}",
-                    f"Source content: {content_len:,} characters",
-                    "Generating report with LLM...",
+                    "Analyzing source content...",
                ]
                yield streaming_service.format_thinking_step(
                    step_id=tool_step_id,
-                    title="Generating report",
+                    title=step_title,
                    status="in_progress",
                    items=last_active_step_items,
                )
@ -428,6 +424,7 @@ async def _stream_agent_events(
            )

        elif event_type == "on_tool_end":
+            active_tool_depth = max(0, active_tool_depth - 1)
            run_id = event.get("run_id", "")
            tool_name = event.get("name", "unknown_tool")
            raw_output = event.get("data", {}).get("output", "")
@ -589,12 +586,18 @@ async def _stream_agent_events(
                    if isinstance(tool_output, dict)
                    else 0
                )
+                is_revision = (
+                    tool_output.get("is_revision", False)
+                    if isinstance(tool_output, dict)
+                    else False
+                )
+                step_title = "Revising report" if is_revision else "Generating report"

                if report_status == "ready":
                    completed_items = [
-                        f"Title: {report_title}",
-                        f"Words: {word_count:,}",
-                        "Report generated successfully",
+                        f"Topic: {report_title}",
+                        f"{word_count:,} words",
+                        "Report ready",
                    ]
                elif report_status == "failed":
                    error_msg = (
@ -603,7 +606,7 @@ async def _stream_agent_events(
                        else "Unknown error"
                    )
                    completed_items = [
-                        f"Title: {report_title}",
+                        f"Topic: {report_title}",
                        f"Error: {error_msg[:50]}",
                    ]
                else:
@ -611,7 +614,7 @@ async def _stream_agent_events(

                yield streaming_service.format_thinking_step(
                    step_id=original_step_id,
-                    title="Generating report",
+                    title=step_title,
                    status="completed",
                    items=completed_items,
                )
@ -815,6 +818,43 @@ async def _stream_agent_events(
                    f"Tool {tool_name} completed", "success"
                )

+        elif event_type == "on_custom_event" and event.get("name") == "report_progress":
+            # Live progress updates from inside the generate_report tool
+            data = event.get("data", {})
+            message = data.get("message", "")
+            if message and last_active_step_id:
+                phase = data.get("phase", "")
+                # Always keep the "Topic: ..." line
+                topic_items = [
+                    item for item in last_active_step_items if item.startswith("Topic:")
+                ]
+
+                if phase in ("revising_section", "adding_section"):
+                    # During section-level ops: keep plan summary + show current op
+                    plan_items = [
+                        item
+                        for item in last_active_step_items
+                        if item.startswith("Topic:")
+                        or item.startswith("Modifying ")
+                        or item.startswith("Adding ")
+                        or item.startswith("Removing ")
+                    ]
+                    # Only keep plan_items that don't end with "..." (not progress lines)
+                    plan_items = [
+                        item for item in plan_items if not item.endswith("...")
+                    ]
+                    last_active_step_items = [*plan_items, message]
+                else:
+                    # Phase transitions: replace everything after topic
+                    last_active_step_items = [*topic_items, message]
+
+                yield streaming_service.format_thinking_step(
+                    step_id=last_active_step_id,
+                    title=last_active_step_title,
+                    status="in_progress",
+                    items=last_active_step_items,
+                )
+
        elif event_type in ("on_chain_end", "on_agent_end"):
            if current_text_id is not None:
                yield streaming_service.format_text_end(current_text_id)
@ -996,6 +1036,20 @@ async def stream_new_chat(
            )
            mentioned_surfsense_docs = list(result.scalars().all())

+        # Fetch the most recent report(s) in this thread so the LLM can
+        # easily find report_id for versioning decisions, instead of
+        # having to dig through conversation history.
+        recent_reports_result = await session.execute(
+            select(Report)
+            .filter(
+                Report.thread_id == chat_id,
+                Report.content.isnot(None),  # exclude failed reports
+            )
+            .order_by(Report.id.desc())
+            .limit(3)
+        )
+        recent_reports = list(recent_reports_result.scalars().all())
+
        # Format the user query with context (mentioned documents + SurfSense docs)
        final_query = user_query
        context_parts = []
@ -1010,6 +1064,27 @@ async def stream_new_chat(
                format_mentioned_surfsense_docs_as_context(mentioned_surfsense_docs)
            )

+        # Surface report IDs prominently so the LLM doesn't have to
+        # retrieve them from old tool responses in conversation history.
+        if recent_reports:
+            report_lines = []
+            for r in recent_reports:
+                report_lines.append(
+                    f'  - report_id={r.id}, title="{r.title}", '
+                    f'style="{r.report_style or "detailed"}"'
+                )
+            reports_listing = "\n".join(report_lines)
+            context_parts.append(
+                "<report_context>\n"
+                "Previously generated reports in this conversation:\n"
+                f"{reports_listing}\n\n"
+                "If the user wants to MODIFY, REVISE, UPDATE, or ADD to one of "
+                "these reports, set parent_report_id to the relevant report_id above.\n"
+                "If the user wants a completely NEW report on a different topic, "
+                "leave parent_report_id unset.\n"
+                "</report_context>"
+            )
+
        if context_parts:
            context = "\n\n".join(context_parts)
            final_query = f"{context}\n\n<user_query>{user_query}</user_query>"
--- a/surfsense_web/components/markdown-viewer.tsx
+++ b/surfsense_web/components/markdown-viewer.tsx
@ -24,8 +24,9 @@ interface MarkdownViewerProps {
 */
 function stripOuterMarkdownFence(content: string): string {
 	const trimmed = content.trim();
-	const match = trimmed.match(/^```(?:markdown|md)?\s*\n([\s\S]+?)\n```\s*$/);
-	return match ? match[1] : content;
+	// Match 3+ backtick fences (LLMs escalate to 4+ when content has triple-backtick blocks)
+	const match = trimmed.match(/^(`{3,})(?:markdown|md)?\s*\n([\s\S]+?)\n\1\s*$/);
+	return match ? match[2] : content;
 }

 /**
--- a/surfsense_web/components/new-chat/chat-share-button.tsx
+++ b/surfsense_web/components/new-chat/chat-share-button.tsx
@ -2,7 +2,7 @@

 import { useQuery, useQueryClient } from "@tanstack/react-query";
 import { useAtomValue, useSetAtom } from "jotai";
-import { Globe, User, Users } from "lucide-react";
+import { Earth, User, Users } from "lucide-react";
 import { useParams, useRouter } from "next/navigation";
 import { useCallback, useMemo, useState } from "react";
 import { toast } from "sonner";
@ -244,7 +244,7 @@ export function ChatShareButton({ thread, onVisibilityChange, className }: ChatS
 									)}
 								>
 									<div className="size-7 rounded-md shrink-0 grid place-items-center bg-muted">
-										<Globe className="size-4 block text-muted-foreground" />
+										<Earth className="size-4 block text-muted-foreground" />
 									</div>
 									<div className="flex-1 text-left min-w-0">
 										<div className="flex items-center gap-1.5">