diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py index a3520dad6..392314116 100644 --- a/surfsense_backend/app/agents/new_chat/system_prompt.py +++ b/surfsense_backend/app/agents/new_chat/system_prompt.py @@ -50,7 +50,7 @@ def _get_system_instructions( return SURFSENSE_SYSTEM_INSTRUCTIONS.format(resolved_today=resolved_today) -# Tools 0-6 (common to both private and shared prompts) +# Tools 0-7 (common to both private and shared prompts) _TOOLS_INSTRUCTIONS_COMMON = """ You have access to the following tools: @@ -92,7 +92,23 @@ You have access to the following tools: - IMPORTANT: Only one podcast can be generated at a time. If a podcast is already being generated, the tool will return status "already_generating". - After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes). -3. link_preview: Fetch metadata for a URL to display a rich preview card. +3. generate_report: Generate a structured Markdown report from provided content. + - Use this when the user asks to create, generate, write, or produce a report. + - Trigger phrases: "generate a report about", "write a report", "create a detailed report about", "make a research report on", "summarize this into a report", "produce a report" + - Args: + - topic: The main topic or title of the report + - source_content: The text content to base the report on. This MUST be comprehensive and include: + * If discussing the current conversation: Include a detailed summary of the FULL chat history (all user questions and your responses) + * If based on knowledge base search: Include the key findings and insights from the search results + * You can combine both: conversation context + search results for richer reports + * The more detailed the source_content, the better the report quality + - report_style: Optional style. Options: "detailed" (default), "executive_summary", "deep_research", "brief" + - user_instructions: Optional specific instructions (e.g., "focus on financial impacts", "include recommendations") + - Returns: A dictionary with status "ready" or "failed", report_id, title, and word_count. + - The report is generated immediately and will be displayed inline in the chat with export options (PDF/DOCX). + - IMPORTANT: Always search the knowledge base first to gather comprehensive source_content before generating a report. + +4. link_preview: Fetch metadata for a URL to display a rich preview card. - IMPORTANT: Use this tool WHENEVER the user shares or mentions a URL/link in their message. - This fetches the page's Open Graph metadata (title, description, thumbnail) to show a preview card. - NOTE: This tool only fetches metadata, NOT the full page content. It cannot read the article text. @@ -105,7 +121,7 @@ You have access to the following tools: - Returns: A rich preview card with title, description, thumbnail, and domain - The preview card will automatically be displayed in the chat. -4. display_image: Display an image in the chat with metadata. +5. display_image: Display an image in the chat with metadata. - Use this tool ONLY when you have a valid public HTTP/HTTPS image URL to show. - This displays the image with an optional title, description, and source attribution. - Valid use cases: @@ -130,7 +146,7 @@ You have access to the following tools: - Returns: An image card with the image, title, and description - The image will automatically be displayed in the chat. -5. generate_image: Generate images from text descriptions using AI image models. +6. generate_image: Generate images from text descriptions using AI image models. - Use this when the user asks you to create, generate, draw, design, or make an image. - Trigger phrases: "generate an image of", "create a picture of", "draw me", "make an image", "design a logo", "create artwork" - Args: @@ -144,7 +160,7 @@ You have access to the following tools: expand and improve the prompt with specific details about style, lighting, composition, and mood. - If the user's request is vague (e.g., "make me an image of a cat"), enhance the prompt with artistic details. -6. scrape_webpage: Scrape and extract the main content from a webpage. +7. scrape_webpage: Scrape and extract the main content from a webpage. - Use this when the user wants you to READ and UNDERSTAND the actual content of a webpage. - IMPORTANT: This is different from link_preview: * link_preview: Only fetches metadata (title, description, thumbnail) for display @@ -169,9 +185,9 @@ You have access to the following tools: """ -# Private (user) memory: tools 7-8 + memory-specific examples +# Private (user) memory: tools 8-9 + memory-specific examples _TOOLS_INSTRUCTIONS_MEMORY_PRIVATE = """ -7. save_memory: Save facts, preferences, or context for personalized responses. +8. save_memory: Save facts, preferences, or context for personalized responses. - Use this when the user explicitly or implicitly shares information worth remembering. - Trigger scenarios: * User says "remember this", "keep this in mind", "note that", or similar @@ -194,7 +210,7 @@ _TOOLS_INSTRUCTIONS_MEMORY_PRIVATE = """ - IMPORTANT: Only save information that would be genuinely useful for future conversations. Don't save trivial or temporary information. -8. recall_memory: Retrieve relevant memories about the user for personalized responses. +9. recall_memory: Retrieve relevant memories about the user for personalized responses. - Use this to access stored information about the user. - Trigger scenarios: * You need user context to give a better, more personalized answer @@ -232,7 +248,7 @@ _TOOLS_INSTRUCTIONS_MEMORY_PRIVATE = """ # Shared (team) memory: tools 7-8 + team memory examples _TOOLS_INSTRUCTIONS_MEMORY_SHARED = """ -7. save_memory: Save a fact, preference, or context to the team's shared memory for future reference. +8. save_memory: Save a fact, preference, or context to the team's shared memory for future reference. - Use this when the user or a team member says "remember this", "keep this in mind", or similar in this shared chat. - Use when the team agrees on something to remember (e.g., decisions, conventions). - Someone shares a preference or fact that should be visible to the whole team. @@ -247,7 +263,7 @@ _TOOLS_INSTRUCTIONS_MEMORY_SHARED = """ - Returns: Confirmation of saved memory; returned context may include who added it (added_by). - IMPORTANT: Only save information that would be genuinely useful for future team conversations in this space. -8. recall_memory: Recall relevant team memories for this space to provide contextual responses. +9. recall_memory: Recall relevant team memories for this space to provide contextual responses. - Use when you need team context to answer (e.g., "where do we store X?", "what did we decide about Y?"). - Use when someone asks about something the team agreed to remember. - Use when team preferences or conventions would improve the response. @@ -321,6 +337,17 @@ _TOOLS_INSTRUCTIONS_EXAMPLES_COMMON = """ - First search: `search_knowledge_base(query="quantum computing")` - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")` +- User: "Generate a report about AI trends" + - First search: `search_knowledge_base(query="AI trends")` + - Then: `generate_report(topic="AI Trends Report", source_content="Key insights about AI trends from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", report_style="detailed")` + +- User: "Write a research report from this conversation" + - Call: `generate_report(topic="Research Report", source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", report_style="deep_research")` + +- User: "Create a brief executive summary about our project progress" + - First search: `search_knowledge_base(query="project progress updates")` + - Then: `generate_report(topic="Project Progress Executive Summary", source_content="[Combined search results and conversation context]", report_style="executive_summary", user_instructions="Focus on milestones achieved and upcoming deadlines")` + - User: "Check out https://dev.to/some-article" - Call: `link_preview(url="https://dev.to/some-article")` - Call: `scrape_webpage(url="https://dev.to/some-article")` diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py index 8092a6104..275b674ec 100644 --- a/surfsense_backend/app/agents/new_chat/tools/registry.py +++ b/surfsense_backend/app/agents/new_chat/tools/registry.py @@ -51,6 +51,7 @@ from .knowledge_base import create_search_knowledge_base_tool from .link_preview import create_link_preview_tool from .mcp_tool import load_mcp_tools from .podcast import create_generate_podcast_tool +from .report import create_generate_report_tool from .scrape_webpage import create_scrape_webpage_tool from .search_surfsense_docs import create_search_surfsense_docs_tool from .shared_memory import ( @@ -118,6 +119,17 @@ BUILTIN_TOOLS: list[ToolDefinition] = [ ), requires=["search_space_id", "db_session", "thread_id"], ), + # Report generation tool (inline, no Celery) + ToolDefinition( + name="generate_report", + description="Generate a structured Markdown report from provided content", + factory=lambda deps: create_generate_report_tool( + search_space_id=deps["search_space_id"], + db_session=deps["db_session"], + thread_id=deps["thread_id"], + ), + requires=["search_space_id", "db_session", "thread_id"], + ), # Link preview tool - fetches Open Graph metadata for URLs ToolDefinition( name="link_preview", diff --git a/surfsense_backend/app/agents/new_chat/tools/report.py b/surfsense_backend/app/agents/new_chat/tools/report.py new file mode 100644 index 000000000..d2cadb94e --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/report.py @@ -0,0 +1,211 @@ +""" +Report generation tool for the SurfSense agent. + +This module provides a factory function for creating the generate_report tool +that generates a structured Markdown report inline (no Celery). The LLM is +called within the tool, the result is saved to the database, and the tool +returns immediately with a ready status. + +This follows the same inline pattern as generate_image and display_image, +NOT the Celery-based podcast pattern. +""" + +import logging +import re +from typing import Any + +from langchain_core.tools import tool +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import Report +from app.services.llm_service import get_document_summary_llm + +logger = logging.getLogger(__name__) + +# Prompt template for report generation +_REPORT_PROMPT = """You are an expert report writer. Generate a well-structured, comprehensive Markdown report based on the provided information. + +**Topic:** {topic} + +**Report Style:** {report_style} + +{user_instructions_section} + +**Source Content:** +{source_content} + +--- + +**Instructions:** +1. Write the report in well-formatted Markdown. +2. Include a clear title (as a level-1 heading), an executive summary, and logically organized sections. +3. Use headings (##, ###), bullet points, numbered lists, bold/italic text, and tables where appropriate. +4. Cite specific facts, figures, and findings from the source content. +5. Be thorough and comprehensive — include all relevant information from the source content. +6. End with a conclusion or key takeaways section. +7. The report should be professional and ready to export. + +Write the report now: +""" + + +def _extract_metadata(content: str) -> dict[str, Any]: + """Extract metadata from generated Markdown content.""" + # Extract section headings + headings = re.findall(r"^(#{1,6})\s+(.+)$", content, re.MULTILINE) + sections = [ + {"level": len(h[0]), "title": h[1].strip()} for h in headings + ] + + # Word count + word_count = len(content.split()) + + # Character count + char_count = len(content) + + return { + "sections": sections, + "word_count": word_count, + "char_count": char_count, + "section_count": len(sections), + } + + +def create_generate_report_tool( + search_space_id: int, + db_session: AsyncSession, + thread_id: int | None = None, +): + """ + Factory function to create the generate_report tool with injected dependencies. + + The tool generates a Markdown report inline using the search space's + document summary LLM, saves it to the database, and returns immediately. + + Args: + search_space_id: The user's search space ID + db_session: Database session for creating the report record + thread_id: The chat thread ID for associating the report + + Returns: + A configured tool function for generating reports + """ + + @tool + async def generate_report( + topic: str, + source_content: str, + report_style: str = "detailed", + user_instructions: str | None = None, + ) -> dict[str, Any]: + """ + Generate a structured Markdown report from provided content. + + Use this tool when the user asks to create, generate, or write a report. + Common triggers include phrases like: + - "Generate a report about this" + - "Write a report from this conversation" + - "Create a detailed report about..." + - "Make a research report on..." + - "Summarize this into a report" + + Args: + topic: The main topic or title of the report. + source_content: The text content to base the report on. This MUST be comprehensive and include: + * If discussing the current conversation: a detailed summary of the FULL chat history + * If based on knowledge base search: the key findings and insights from search results + * You can combine both: conversation context + search results for richer reports + * The more detailed the source_content, the better the report quality + report_style: Style of the report. Options: "detailed", "executive_summary", "deep_research", "brief". Default: "detailed" + user_instructions: Optional specific instructions for the report (e.g., "focus on financial impacts", "include recommendations") + + Returns: + A dictionary containing: + - status: "ready" or "failed" + - report_id: The report ID + - title: The report title + - word_count: Number of words in the report + - message: Status message (or "error" field if failed) + """ + try: + # Get the LLM instance for this search space + llm = await get_document_summary_llm(db_session, search_space_id) + if not llm: + return { + "status": "failed", + "error": "No LLM configured. Please configure a language model in Settings.", + "report_id": None, + "title": topic, + } + + # Build the prompt + user_instructions_section = "" + if user_instructions: + user_instructions_section = ( + f"**Additional Instructions:** {user_instructions}" + ) + + prompt = _REPORT_PROMPT.format( + topic=topic, + report_style=report_style, + user_instructions_section=user_instructions_section, + source_content=source_content[:100000], # Cap source content + ) + + # Call the LLM inline + from langchain_core.messages import HumanMessage + + response = await llm.ainvoke([HumanMessage(content=prompt)]) + report_content = response.content + + if not report_content or not isinstance(report_content, str): + return { + "status": "failed", + "error": "LLM returned empty or invalid content", + "report_id": None, + "title": topic, + } + + # Extract metadata + metadata = _extract_metadata(report_content) + + # Save to database + report = Report( + title=topic, + content=report_content, + report_metadata=metadata, + report_style=report_style, + search_space_id=search_space_id, + thread_id=thread_id, + ) + db_session.add(report) + await db_session.commit() + await db_session.refresh(report) + + logger.info( + f"[generate_report] Created report {report.id}: " + f"{metadata.get('word_count', 0)} words, " + f"{metadata.get('section_count', 0)} sections" + ) + + return { + "status": "ready", + "report_id": report.id, + "title": topic, + "word_count": metadata.get("word_count", 0), + "message": f"Report generated successfully: {topic}", + } + + except Exception as e: + error_message = str(e) + logger.exception(f"[generate_report] Error: {error_message}") + + return { + "status": "failed", + "error": error_message, + "report_id": None, + "title": topic, + } + + return generate_report + diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index d9353284c..3e949c687 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -32,6 +32,7 @@ from .notes_routes import router as notes_router from .notifications_routes import router as notifications_router from .notion_add_connector_route import router as notion_add_connector_router from .podcasts_routes import router as podcasts_router +from .reports_routes import router as reports_router from .public_chat_routes import router as public_chat_router from .rbac_routes import router as rbac_router from .search_source_connectors_routes import router as search_source_connectors_router @@ -50,6 +51,7 @@ router.include_router(notes_router) router.include_router(new_chat_router) # Chat with assistant-ui persistence router.include_router(chat_comments_router) router.include_router(podcasts_router) # Podcast task status and audio +router.include_router(reports_router) # Report CRUD and export (PDF/DOCX) router.include_router(image_generation_router) # Image generation via litellm router.include_router(search_source_connectors_router) router.include_router(google_calendar_add_connector_router) diff --git a/surfsense_backend/app/routes/reports_routes.py b/surfsense_backend/app/routes/reports_routes.py new file mode 100644 index 000000000..b4ee0e889 --- /dev/null +++ b/surfsense_backend/app/routes/reports_routes.py @@ -0,0 +1,250 @@ +""" +Report routes for CRUD operations and export (PDF/DOCX). + +These routes support the report generation feature in new-chat. +Reports are generated inline by the agent tool and stored as Markdown. +Export to PDF/DOCX is on-demand via pypandoc. + +Authorization: lightweight search-space membership checks (no granular RBAC) +since reports are chat-generated artifacts, not standalone managed resources. +""" + +import asyncio +import io +import logging +from enum import Enum + +import pypandoc +from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi.responses import StreamingResponse +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import ( + Report, + SearchSpace, + SearchSpaceMembership, + User, + get_async_session, +) +from app.schemas import ReportContentRead, ReportRead +from app.users import current_active_user +from app.utils.rbac import check_search_space_access + +logger = logging.getLogger(__name__) + +router = APIRouter() + +MAX_REPORT_LIST_LIMIT = 500 + + +class ExportFormat(str, Enum): + PDF = "pdf" + DOCX = "docx" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +async def _get_report_with_access( + report_id: int, + session: AsyncSession, + user: User, +) -> Report: + """Fetch a report and verify the user belongs to its search space. + + Raises HTTPException(404) if not found, HTTPException(403) if no access. + """ + result = await session.execute(select(Report).filter(Report.id == report_id)) + report = result.scalars().first() + + if not report: + raise HTTPException(status_code=404, detail="Report not found") + + # Lightweight membership check – no granular RBAC, just "is the user a + # member of the search space this report belongs to?" + await check_search_space_access(session, user, report.search_space_id) + + return report + + +# --------------------------------------------------------------------------- +# Routes +# --------------------------------------------------------------------------- + + +@router.get("/reports", response_model=list[ReportRead]) +async def read_reports( + skip: int = Query(default=0, ge=0), + limit: int = Query(default=100, ge=1, le=MAX_REPORT_LIST_LIMIT), + search_space_id: int | None = None, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + List reports the user has access to. + Filters by search space membership. + """ + try: + if search_space_id is not None: + # Verify the caller is a member of the requested search space + await check_search_space_access(session, user, search_space_id) + + result = await session.execute( + select(Report) + .filter(Report.search_space_id == search_space_id) + .order_by(Report.id.desc()) + .offset(skip) + .limit(limit) + ) + else: + result = await session.execute( + select(Report) + .join(SearchSpace) + .join(SearchSpaceMembership) + .filter(SearchSpaceMembership.user_id == user.id) + .order_by(Report.id.desc()) + .offset(skip) + .limit(limit) + ) + return result.scalars().all() + except HTTPException: + raise + except SQLAlchemyError: + raise HTTPException( + status_code=500, detail="Database error occurred while fetching reports" + ) from None + + +@router.get("/reports/{report_id}", response_model=ReportRead) +async def read_report( + report_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get a specific report by ID (metadata only, no content). + """ + try: + return await _get_report_with_access(report_id, session, user) + except HTTPException: + raise + except SQLAlchemyError: + raise HTTPException( + status_code=500, detail="Database error occurred while fetching report" + ) from None + + +@router.get("/reports/{report_id}/content", response_model=ReportContentRead) +async def read_report_content( + report_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Get full Markdown content of a report. + """ + try: + return await _get_report_with_access(report_id, session, user) + except HTTPException: + raise + except SQLAlchemyError: + raise HTTPException( + status_code=500, + detail="Database error occurred while fetching report content", + ) from None + + +@router.get("/reports/{report_id}/export") +async def export_report( + report_id: int, + format: ExportFormat = Query(ExportFormat.PDF, description="Export format: pdf or docx"), + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Export a report as PDF or DOCX. + """ + try: + report = await _get_report_with_access(report_id, session, user) + + if not report.content: + raise HTTPException( + status_code=400, detail="Report has no content to export" + ) + + # Convert Markdown to the requested format via pypandoc. + # pypandoc spawns a pandoc subprocess (blocking), so we run it in a + # thread executor to avoid blocking the async event loop. + extra_args = ["--standalone"] + if format == ExportFormat.PDF: + extra_args.append("--pdf-engine=wkhtmltopdf") + + loop = asyncio.get_running_loop() + output = await loop.run_in_executor( + None, # default thread-pool + lambda: pypandoc.convert_text( + report.content, + format.value, + format="md", + extra_args=extra_args, + ), + ) + + # pypandoc returns bytes for binary formats (pdf, docx), str for text formats + if isinstance(output, str): + output = output.encode("utf-8") + + # Sanitize filename + safe_title = ( + "".join(c if c.isalnum() or c in " -_" else "_" for c in report.title) + .strip()[:80] + or "report" + ) + + media_types = { + ExportFormat.PDF: "application/pdf", + ExportFormat.DOCX: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + } + + return StreamingResponse( + io.BytesIO(output), + media_type=media_types[format], + headers={ + "Content-Disposition": f'attachment; filename="{safe_title}.{format.value}"', + }, + ) + + except HTTPException: + raise + except Exception as e: + logger.exception("Report export failed") + raise HTTPException( + status_code=500, detail=f"Export failed: {e!s}" + ) from e + + +@router.delete("/reports/{report_id}", response_model=dict) +async def delete_report( + report_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """ + Delete a report. + """ + try: + db_report = await _get_report_with_access(report_id, session, user) + + await session.delete(db_report) + await session.commit() + return {"message": "Report deleted successfully"} + except HTTPException: + raise + except SQLAlchemyError: + await session.rollback() + raise HTTPException( + status_code=500, detail="Database error occurred while deleting report" + ) from None diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py index b015cf715..376b55407 100644 --- a/surfsense_backend/app/schemas/__init__.py +++ b/surfsense_backend/app/schemas/__init__.py @@ -59,6 +59,7 @@ from .new_llm_config import ( NewLLMConfigUpdate, ) from .podcasts import PodcastBase, PodcastCreate, PodcastRead, PodcastUpdate +from .reports import ReportBase, ReportContentRead, ReportRead from .rbac_schemas import ( InviteAcceptRequest, InviteAcceptResponse, @@ -185,6 +186,10 @@ __all__ = [ "PodcastUpdate", "RefreshTokenRequest", "RefreshTokenResponse", + # Report schemas + "ReportBase", + "ReportContentRead", + "ReportRead", "RoleCreate", "RoleRead", "RoleUpdate", diff --git a/surfsense_backend/app/schemas/reports.py b/surfsense_backend/app/schemas/reports.py new file mode 100644 index 000000000..90add2b04 --- /dev/null +++ b/surfsense_backend/app/schemas/reports.py @@ -0,0 +1,41 @@ +"""Report schemas for API responses.""" + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel + + +class ReportBase(BaseModel): + """Base report schema.""" + + title: str + content: str | None = None + report_style: str | None = None + search_space_id: int + + +class ReportRead(BaseModel): + """Schema for reading a report (list view, no content).""" + + id: int + title: str + report_style: str | None = None + report_metadata: dict[str, Any] | None = None + created_at: datetime + + class Config: + from_attributes = True + + +class ReportContentRead(BaseModel): + """Schema for reading a report with full Markdown content.""" + + id: int + title: str + content: str | None = None + report_metadata: dict[str, Any] | None = None + + class Config: + from_attributes = True + diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 31e67c7ff..636a7413b 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -692,6 +692,35 @@ async def stream_new_chat( status="in_progress", items=last_active_step_items, ) + elif tool_name == "generate_report": + report_topic = ( + tool_input.get("topic", "Report") + if isinstance(tool_input, dict) + else "Report" + ) + report_style = ( + tool_input.get("report_style", "detailed") + if isinstance(tool_input, dict) + else "detailed" + ) + content_len = len( + tool_input.get("source_content", "") + if isinstance(tool_input, dict) + else "" + ) + last_active_step_title = "Generating report" + last_active_step_items = [ + f"Topic: {report_topic}", + f"Style: {report_style}", + f"Source content: {content_len:,} characters", + "Generating report with LLM...", + ] + yield streaming_service.format_thinking_step( + step_id=tool_step_id, + title="Generating report", + status="in_progress", + items=last_active_step_items, + ) # elif tool_name == "ls": # last_active_step_title = "Exploring files" # last_active_step_items = [] @@ -895,6 +924,49 @@ async def stream_new_chat( status="completed", items=completed_items, ) + elif tool_name == "generate_report": + # Build detailed completion items based on report status + report_status = ( + tool_output.get("status", "unknown") + if isinstance(tool_output, dict) + else "unknown" + ) + report_title = ( + tool_output.get("title", "Report") + if isinstance(tool_output, dict) + else "Report" + ) + word_count = ( + tool_output.get("word_count", 0) + if isinstance(tool_output, dict) + else 0 + ) + + if report_status == "ready": + completed_items = [ + f"Title: {report_title}", + f"Words: {word_count:,}", + "Report generated successfully", + ] + elif report_status == "failed": + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + completed_items = [ + f"Title: {report_title}", + f"Error: {error_msg[:50]}", + ] + else: + completed_items = last_active_step_items + + yield streaming_service.format_thinking_step( + step_id=original_step_id, + title="Generating report", + status="completed", + items=completed_items, + ) # elif tool_name == "write_todos": # Disabled for now # # Build completion items for planning/updating # if isinstance(tool_output, dict): @@ -1037,6 +1109,34 @@ async def stream_new_chat( f"Podcast generation failed: {error_msg}", "error", ) + elif tool_name == "generate_report": + # Stream the full report result so frontend can render the ReportViewer + yield streaming_service.format_tool_output_available( + tool_call_id, + tool_output + if isinstance(tool_output, dict) + else {"result": tool_output}, + ) + # Send appropriate terminal message based on status + if ( + isinstance(tool_output, dict) + and tool_output.get("status") == "ready" + ): + word_count = tool_output.get("word_count", 0) + yield streaming_service.format_terminal_info( + f"Report generated: {tool_output.get('title', 'Report')} ({word_count:,} words)", + "success", + ) + else: + error_msg = ( + tool_output.get("error", "Unknown error") + if isinstance(tool_output, dict) + else "Unknown error" + ) + yield streaming_service.format_terminal_info( + f"Report generation failed: {error_msg}", + "error", + ) elif tool_name == "link_preview": # Stream the full link preview result so frontend can render the MediaCard yield streaming_service.format_tool_output_available( diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index bc5aca91e..355fb2211 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -35,6 +35,7 @@ import { ChatHeader } from "@/components/new-chat/chat-header"; import type { ThinkingStep } from "@/components/tool-ui/deepagent-thinking"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; +import { GenerateReportToolUI } from "@/components/tool-ui/generate-report"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; import { RecallMemoryToolUI, SaveMemoryToolUI } from "@/components/tool-ui/user-memory"; @@ -117,6 +118,7 @@ function extractMentionedDocuments(content: unknown): MentionedDocumentInfo[] { */ const TOOLS_WITH_UI = new Set([ "generate_podcast", + "generate_report", "link_preview", "display_image", "scrape_webpage", @@ -1427,6 +1429,7 @@ export default function NewChatPage() { return ( + diff --git a/surfsense_web/components/public-chat/public-chat-view.tsx b/surfsense_web/components/public-chat/public-chat-view.tsx index a1e6008ff..ad175128d 100644 --- a/surfsense_web/components/public-chat/public-chat-view.tsx +++ b/surfsense_web/components/public-chat/public-chat-view.tsx @@ -4,6 +4,7 @@ import { AssistantRuntimeProvider } from "@assistant-ui/react"; import { Navbar } from "@/components/homepage/navbar"; import { DisplayImageToolUI } from "@/components/tool-ui/display-image"; import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast"; +import { GenerateReportToolUI } from "@/components/tool-ui/generate-report"; import { LinkPreviewToolUI } from "@/components/tool-ui/link-preview"; import { ScrapeWebpageToolUI } from "@/components/tool-ui/scrape-webpage"; import { Spinner } from "@/components/ui/spinner"; @@ -42,6 +43,7 @@ export function PublicChatView({ shareToken }: PublicChatViewProps) { {/* Tool UIs for rendering tool results */} + diff --git a/surfsense_web/components/tool-ui/generate-report.tsx b/surfsense_web/components/tool-ui/generate-report.tsx new file mode 100644 index 000000000..5b21f67b1 --- /dev/null +++ b/surfsense_web/components/tool-ui/generate-report.tsx @@ -0,0 +1,390 @@ +"use client"; + +import { makeAssistantToolUI } from "@assistant-ui/react"; +import { + CheckIcon, + ClipboardIcon, + DownloadIcon, + FileTextIcon, + Loader2Icon, +} from "lucide-react"; +import { useCallback, useEffect, useState } from "react"; +import { z } from "zod"; +import { Button } from "@/components/ui/button"; +import { Spinner } from "@/components/ui/spinner"; +import { MarkdownViewer } from "@/components/markdown-viewer"; +import { baseApiService } from "@/lib/apis/base-api.service"; +import { authenticatedFetch } from "@/lib/auth-utils"; + +/** + * Zod schemas for runtime validation + */ +const GenerateReportArgsSchema = z.object({ + topic: z.string(), + source_content: z.string(), + report_style: z.string().nullish(), + user_instructions: z.string().nullish(), +}); + +const GenerateReportResultSchema = z.object({ + status: z.enum(["ready", "failed"]), + report_id: z.number().nullish(), + title: z.string().nullish(), + word_count: z.number().nullish(), + message: z.string().nullish(), + error: z.string().nullish(), +}); + +const ReportContentResponseSchema = z.object({ + id: z.number(), + title: z.string(), + content: z.string().nullish(), + report_metadata: z + .object({ + sections: z + .array( + z.object({ + level: z.number(), + title: z.string(), + }) + ) + .nullish(), + word_count: z.number().nullish(), + char_count: z.number().nullish(), + section_count: z.number().nullish(), + }) + .nullish(), +}); + +/** + * Types derived from Zod schemas + */ +type GenerateReportArgs = z.infer; +type GenerateReportResult = z.infer; +type ReportContentResponse = z.infer; + +/** + * Loading state component shown while report is being generated + */ +function ReportGeneratingState({ topic }: { topic: string }) { + return ( +
+
+
+
+ +
+
+
+
+

+ {topic} +

+
+ + + Generating report. This may take a moment... + +
+
+
+
+
+
+
+
+
+ ); +} + +/** + * Error state component shown when report generation fails + */ +function ReportErrorState({ title, error }: { title: string; error: string }) { + return ( +
+
+
+ +
+
+

+ {title} +

+

+ {error} +

+
+
+
+ ); +} + +/** + * Report viewer component that fetches and renders the full Markdown report + */ +function ReportViewer({ + reportId, + title, + wordCount, +}: { + reportId: number; + title: string; + wordCount?: number; +}) { + const [reportContent, setReportContent] = useState(null); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(null); + const [copied, setCopied] = useState(false); + const [exporting, setExporting] = useState<"pdf" | "docx" | null>(null); + + // Fetch report content + useEffect(() => { + const fetchContent = async () => { + setIsLoading(true); + setError(null); + try { + const rawData = await baseApiService.get( + `/api/v1/reports/${reportId}/content` + ); + const parsed = ReportContentResponseSchema.safeParse(rawData); + if (parsed.success) { + setReportContent(parsed.data); + } else { + console.warn("Invalid report content response:", parsed.error.issues); + setError("Invalid response format"); + } + } catch (err) { + console.error("Error fetching report content:", err); + setError(err instanceof Error ? err.message : "Failed to load report"); + } finally { + setIsLoading(false); + } + }; + + fetchContent(); + }, [reportId]); + + // Copy markdown content + const handleCopy = useCallback(async () => { + if (!reportContent?.content) return; + try { + await navigator.clipboard.writeText(reportContent.content); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + } catch (err) { + console.error("Failed to copy:", err); + } + }, [reportContent?.content]); + + // Export report + const handleExport = useCallback( + async (format: "pdf" | "docx") => { + setExporting(format); + try { + const response = await authenticatedFetch( + `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/reports/${reportId}/export?format=${format}`, + { method: "GET" } + ); + + if (!response.ok) { + throw new Error(`Export failed: ${response.status}`); + } + + const blob = await response.blob(); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = `${title.replace(/[^a-zA-Z0-9 _-]/g, "_").trim().slice(0, 80) || "report"}.${format}`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + } catch (err) { + console.error(`Export ${format} failed:`, err); + } finally { + setExporting(null); + } + }, + [reportId, title] + ); + + if (isLoading) { + return ( +
+
+
+ +
+
+

+ {title} +

+
+ + Loading report... +
+
+
+
+ ); + } + + if (error || !reportContent) { + return ; + } + + const displayWordCount = + wordCount ?? reportContent.report_metadata?.word_count ?? null; + + return ( +
+ {/* Header */} +
+
+
+ +
+
+

+ {reportContent.title || title} +

+ {displayWordCount != null && ( +

+ {displayWordCount.toLocaleString()} words + {reportContent.report_metadata?.section_count + ? ` · ${reportContent.report_metadata.section_count} sections` + : ""} +

+ )} +
+
+ + {/* Action buttons */} +
+ + + +
+
+ + {/* Markdown content */} +
+ {reportContent.content ? ( + + ) : ( +

No content available.

+ )} +
+
+ ); +} + +/** + * Generate Report Tool UI Component + * + * This component is registered with assistant-ui to render custom UI + * when the generate_report tool is called by the agent. + * + * Unlike podcast (which uses polling), the report is generated inline + * and the result contains status: "ready" immediately. + */ +export const GenerateReportToolUI = makeAssistantToolUI< + GenerateReportArgs, + GenerateReportResult +>({ + toolName: "generate_report", + render: function GenerateReportUI({ args, result, status }) { + const topic = args.topic || "Report"; + + // Loading state - tool is still running (LLM generating report) + if (status.type === "running" || status.type === "requires-action") { + return ; + } + + // Incomplete/cancelled state + if (status.type === "incomplete") { + if (status.reason === "cancelled") { + return ( +
+

+ + Report generation cancelled +

+
+ ); + } + if (status.reason === "error") { + return ( + + ); + } + } + + // No result yet + if (!result) { + return ; + } + + // Failed result + if (result.status === "failed") { + return ; + } + + // Ready with report_id + if (result.status === "ready" && result.report_id) { + return ( + + ); + } + + // Fallback - missing required data + return ; + }, +}); + diff --git a/surfsense_web/components/tool-ui/index.ts b/surfsense_web/components/tool-ui/index.ts index 5b4ea0a34..8b7bc466e 100644 --- a/surfsense_web/components/tool-ui/index.ts +++ b/surfsense_web/components/tool-ui/index.ts @@ -31,6 +31,7 @@ export { DisplayImageToolUI, } from "./display-image"; export { GeneratePodcastToolUI } from "./generate-podcast"; +export { GenerateReportToolUI } from "./generate-report"; export { Image, ImageErrorBoundary,