feat: implement report generation tool and associated routes for CRUD operations

- Added a new tool for generating structured Markdown reports based on user input.
- Implemented routes for creating, reading, exporting, and deleting reports.
- Integrated report generation into the chat flow, allowing users to generate reports inline.
- Updated schemas to support report data structures and responses.
- Enhanced frontend components to handle report generation and display results.
This commit is contained in:
Anish Sarkar 2026-02-11 17:55:52 +05:30
parent 6fc5dc224b
commit acad8c6d2b
12 changed files with 1054 additions and 10 deletions

View file

@ -51,6 +51,7 @@ from .knowledge_base import create_search_knowledge_base_tool
from .link_preview import create_link_preview_tool
from .mcp_tool import load_mcp_tools
from .podcast import create_generate_podcast_tool
from .report import create_generate_report_tool
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .shared_memory import (
@ -118,6 +119,17 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
),
requires=["search_space_id", "db_session", "thread_id"],
),
# Report generation tool (inline, no Celery)
ToolDefinition(
name="generate_report",
description="Generate a structured Markdown report from provided content",
factory=lambda deps: create_generate_report_tool(
search_space_id=deps["search_space_id"],
db_session=deps["db_session"],
thread_id=deps["thread_id"],
),
requires=["search_space_id", "db_session", "thread_id"],
),
# Link preview tool - fetches Open Graph metadata for URLs
ToolDefinition(
name="link_preview",

View file

@ -0,0 +1,211 @@
"""
Report generation tool for the SurfSense agent.
This module provides a factory function for creating the generate_report tool
that generates a structured Markdown report inline (no Celery). The LLM is
called within the tool, the result is saved to the database, and the tool
returns immediately with a ready status.
This follows the same inline pattern as generate_image and display_image,
NOT the Celery-based podcast pattern.
"""
import logging
import re
from typing import Any
from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import Report
from app.services.llm_service import get_document_summary_llm
logger = logging.getLogger(__name__)
# Prompt template for report generation
_REPORT_PROMPT = """You are an expert report writer. Generate a well-structured, comprehensive Markdown report based on the provided information.
**Topic:** {topic}
**Report Style:** {report_style}
{user_instructions_section}
**Source Content:**
{source_content}
---
**Instructions:**
1. Write the report in well-formatted Markdown.
2. Include a clear title (as a level-1 heading), an executive summary, and logically organized sections.
3. Use headings (##, ###), bullet points, numbered lists, bold/italic text, and tables where appropriate.
4. Cite specific facts, figures, and findings from the source content.
5. Be thorough and comprehensive include all relevant information from the source content.
6. End with a conclusion or key takeaways section.
7. The report should be professional and ready to export.
Write the report now:
"""
def _extract_metadata(content: str) -> dict[str, Any]:
"""Extract metadata from generated Markdown content."""
# Extract section headings
headings = re.findall(r"^(#{1,6})\s+(.+)$", content, re.MULTILINE)
sections = [
{"level": len(h[0]), "title": h[1].strip()} for h in headings
]
# Word count
word_count = len(content.split())
# Character count
char_count = len(content)
return {
"sections": sections,
"word_count": word_count,
"char_count": char_count,
"section_count": len(sections),
}
def create_generate_report_tool(
search_space_id: int,
db_session: AsyncSession,
thread_id: int | None = None,
):
"""
Factory function to create the generate_report tool with injected dependencies.
The tool generates a Markdown report inline using the search space's
document summary LLM, saves it to the database, and returns immediately.
Args:
search_space_id: The user's search space ID
db_session: Database session for creating the report record
thread_id: The chat thread ID for associating the report
Returns:
A configured tool function for generating reports
"""
@tool
async def generate_report(
topic: str,
source_content: str,
report_style: str = "detailed",
user_instructions: str | None = None,
) -> dict[str, Any]:
"""
Generate a structured Markdown report from provided content.
Use this tool when the user asks to create, generate, or write a report.
Common triggers include phrases like:
- "Generate a report about this"
- "Write a report from this conversation"
- "Create a detailed report about..."
- "Make a research report on..."
- "Summarize this into a report"
Args:
topic: The main topic or title of the report.
source_content: The text content to base the report on. This MUST be comprehensive and include:
* If discussing the current conversation: a detailed summary of the FULL chat history
* If based on knowledge base search: the key findings and insights from search results
* You can combine both: conversation context + search results for richer reports
* The more detailed the source_content, the better the report quality
report_style: Style of the report. Options: "detailed", "executive_summary", "deep_research", "brief". Default: "detailed"
user_instructions: Optional specific instructions for the report (e.g., "focus on financial impacts", "include recommendations")
Returns:
A dictionary containing:
- status: "ready" or "failed"
- report_id: The report ID
- title: The report title
- word_count: Number of words in the report
- message: Status message (or "error" field if failed)
"""
try:
# Get the LLM instance for this search space
llm = await get_document_summary_llm(db_session, search_space_id)
if not llm:
return {
"status": "failed",
"error": "No LLM configured. Please configure a language model in Settings.",
"report_id": None,
"title": topic,
}
# Build the prompt
user_instructions_section = ""
if user_instructions:
user_instructions_section = (
f"**Additional Instructions:** {user_instructions}"
)
prompt = _REPORT_PROMPT.format(
topic=topic,
report_style=report_style,
user_instructions_section=user_instructions_section,
source_content=source_content[:100000], # Cap source content
)
# Call the LLM inline
from langchain_core.messages import HumanMessage
response = await llm.ainvoke([HumanMessage(content=prompt)])
report_content = response.content
if not report_content or not isinstance(report_content, str):
return {
"status": "failed",
"error": "LLM returned empty or invalid content",
"report_id": None,
"title": topic,
}
# Extract metadata
metadata = _extract_metadata(report_content)
# Save to database
report = Report(
title=topic,
content=report_content,
report_metadata=metadata,
report_style=report_style,
search_space_id=search_space_id,
thread_id=thread_id,
)
db_session.add(report)
await db_session.commit()
await db_session.refresh(report)
logger.info(
f"[generate_report] Created report {report.id}: "
f"{metadata.get('word_count', 0)} words, "
f"{metadata.get('section_count', 0)} sections"
)
return {
"status": "ready",
"report_id": report.id,
"title": topic,
"word_count": metadata.get("word_count", 0),
"message": f"Report generated successfully: {topic}",
}
except Exception as e:
error_message = str(e)
logger.exception(f"[generate_report] Error: {error_message}")
return {
"status": "failed",
"error": error_message,
"report_id": None,
"title": topic,
}
return generate_report