feat: add podcast generation capabilities to SurfSense deep agent and UI integration

2026-05-21 18:55:16 +02:00 · 2025-12-21 19:07:46 +05:30 · 2025-12-21 19:07:46 +05:30 · 4c4e4b3c4c
commit 4c4e4b3c4c
parent 3906ba52e0
9 changed files with 985 additions and 22 deletions
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@ -2,7 +2,7 @@
 SurfSense deep agent implementation.

 This module provides the factory function for creating SurfSense deep agents
-with knowledge base search capability.
+with knowledge base search and podcast generation capabilities.
 """

 from collections.abc import Sequence
@ -14,6 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.agents.new_chat.context import SurfSenseContextSchema
 from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool
+from app.agents.new_chat.podcast import create_generate_podcast_tool
 from app.agents.new_chat.system_prompt import build_surfsense_system_prompt
 from app.services.connector_service import ConnectorService

@ -27,22 +28,27 @@ def create_surfsense_deep_agent(
    search_space_id: int,
    db_session: AsyncSession,
    connector_service: ConnectorService,
+    user_id: str | None = None,
    user_instructions: str | None = None,
    enable_citations: bool = True,
+    enable_podcast: bool = True,
    additional_tools: Sequence[BaseTool] | None = None,
 ):
    """
-    Create a SurfSense deep agent with knowledge base search capability.
+    Create a SurfSense deep agent with knowledge base search and podcast generation capabilities.

    Args:
        llm: ChatLiteLLM instance
        search_space_id: The user's search space ID
        db_session: Database session
        connector_service: Initialized connector service
+        user_id: The user's ID (required for podcast generation)
        user_instructions: Optional user instructions to inject into the system prompt.
                          These will be added to the system prompt to customize agent behavior.
        enable_citations: Whether to include citation instructions in the system prompt (default: True).
                         When False, the agent will not be instructed to add citations to responses.
+        enable_podcast: Whether to include the podcast generation tool (default: True).
+                       When True and user_id is provided, the agent can generate podcasts.
        additional_tools: Optional sequence of additional tools to inject into the agent.
                         The search_knowledge_base tool will always be included.

@ -58,6 +64,16 @@ def create_surfsense_deep_agent(

    # Combine search tool with any additional tools
    tools = [search_tool]
+
+    # Add podcast tool if enabled and user_id is provided
+    if enable_podcast and user_id:
+        podcast_tool = create_generate_podcast_tool(
+            search_space_id=search_space_id,
+            db_session=db_session,
+            user_id=str(user_id),
+        )
+        tools.append(podcast_tool)
+
    if additional_tools:
        tools.extend(additional_tools)

--- a/surfsense_backend/app/agents/new_chat/podcast.py
+++ b/surfsense_backend/app/agents/new_chat/podcast.py
@ -0,0 +1,170 @@
+"""
+Podcast generation tool for the new chat agent.
+
+This module provides a factory function for creating the generate_podcast tool
+that integrates with the existing podcaster agent. Podcasts are saved to the
+database like the old system, providing authentication and persistence.
+"""
+
+from typing import Any
+
+from langchain_core.tools import tool
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.agents.podcaster.graph import graph as podcaster_graph
+from app.agents.podcaster.state import State as PodcasterState
+from app.db import Podcast
+
+
+def create_generate_podcast_tool(
+    search_space_id: int,
+    db_session: AsyncSession,
+    user_id: str,
+):
+    """
+    Factory function to create the generate_podcast tool with injected dependencies.
+
+    Args:
+        search_space_id: The user's search space ID
+        db_session: Database session
+        user_id: The user's ID (as string)
+
+    Returns:
+        A configured tool function for generating podcasts
+    """
+
+    @tool
+    async def generate_podcast(
+        source_content: str,
+        podcast_title: str = "SurfSense Podcast",
+        user_prompt: str | None = None,
+    ) -> dict[str, Any]:
+        """
+        Generate a podcast from the provided content.
+
+        Use this tool when the user asks to create, generate, or make a podcast.
+        Common triggers include phrases like:
+        - "Give me a podcast about this"
+        - "Create a podcast from this conversation"
+        - "Generate a podcast summary"
+        - "Make a podcast about..."
+        - "Turn this into a podcast"
+
+        The tool will generate a complete audio podcast with two speakers
+        discussing the provided content in an engaging conversational format.
+
+        Args:
+            source_content: The text content to convert into a podcast.
+                           This can be a summary, research findings, or any text
+                           the user wants transformed into an audio podcast.
+            podcast_title: Title for the podcast (default: "SurfSense Podcast")
+            user_prompt: Optional instructions for podcast style, tone, or format.
+                        For example: "Make it casual and fun" or "Focus on the key insights"
+
+        Returns:
+            A dictionary containing:
+            - status: "success" or "error"
+            - podcast_id: The database ID of the saved podcast (for API access)
+            - title: The podcast title
+            - transcript: Full podcast transcript with all dialogue entries
+            - duration_ms: Estimated podcast duration in milliseconds
+            - transcript_entries: Number of dialogue entries
+        """
+        try:
+            # Configure the podcaster graph
+            config = {
+                "configurable": {
+                    "podcast_title": podcast_title,
+                    "user_id": str(user_id),
+                    "search_space_id": search_space_id,
+                    "user_prompt": user_prompt,
+                }
+            }
+
+            # Initialize the podcaster state with the source content
+            initial_state = PodcasterState(
+                source_content=source_content,
+                db_session=db_session,
+            )
+
+            # Run the podcaster graph
+            result = await podcaster_graph.ainvoke(initial_state, config=config)
+
+            # Extract results
+            podcast_transcript = result.get("podcast_transcript", [])
+            file_path = result.get("final_podcast_file_path", "")
+
+            # Calculate estimated duration (rough estimate: ~150 words per minute)
+            total_words = sum(
+                len(entry.dialog.split()) if hasattr(entry, "dialog") else len(entry.get("dialog", "").split())
+                for entry in podcast_transcript
+            )
+            estimated_duration_ms = int((total_words / 150) * 60 * 1000)
+
+            # Create full transcript for display (all entries, complete dialog)
+            full_transcript = []
+            for entry in podcast_transcript:
+                if hasattr(entry, "speaker_id"):
+                    speaker = f"Speaker {entry.speaker_id + 1}"
+                    dialog = entry.dialog
+                else:
+                    speaker = f"Speaker {entry.get('speaker_id', 0) + 1}"
+                    dialog = entry.get("dialog", "")
+                full_transcript.append(f"{speaker}: {dialog}")
+
+            # Convert podcast transcript entries to serializable format (like old system)
+            serializable_transcript = []
+            for entry in podcast_transcript:
+                if hasattr(entry, "speaker_id"):
+                    serializable_transcript.append({
+                        "speaker_id": entry.speaker_id,
+                        "dialog": entry.dialog
+                    })
+                else:
+                    serializable_transcript.append({
+                        "speaker_id": entry.get("speaker_id", 0),
+                        "dialog": entry.get("dialog", "")
+                    })
+
+            # Save podcast to database (like old system)
+            # This provides authentication and persistence
+            podcast = Podcast(
+                title=podcast_title,
+                podcast_transcript=serializable_transcript,
+                file_location=file_path,
+                search_space_id=search_space_id,
+                # chat_id is None since new-chat uses LangGraph threads, not DB chats
+                chat_id=None,
+                chat_state_version=None,
+            )
+            db_session.add(podcast)
+            await db_session.commit()
+            await db_session.refresh(podcast)
+
+            # Return podcast_id - frontend will use it to call the API endpoint
+            # GET /api/v1/podcasts/{podcast_id}/stream (like the old system)
+            return {
+                "status": "success",
+                "podcast_id": podcast.id,
+                "title": podcast_title,
+                "transcript": "\n\n".join(full_transcript),
+                "duration_ms": estimated_duration_ms,
+                "transcript_entries": len(podcast_transcript),
+            }
+
+        except Exception as e:
+            error_message = str(e)
+            print(f"[generate_podcast] Error: {error_message}")
+            # Rollback on error
+            await db_session.rollback()
+            return {
+                "status": "error",
+                "error": error_message,
+                "title": podcast_title,
+                "podcast_id": None,
+                "duration_ms": 0,
+                "transcript_entries": 0,
+            }
+
+    return generate_podcast
+
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -121,7 +121,8 @@ Today's date (UTC): {resolved_today}
 </system_instruction>{user_section}
 <tools>
 You have access to the following tools:
- search_knowledge_base: Search the user's personal knowledge base for relevant information.
+
+1. search_knowledge_base: Search the user's personal knowledge base for relevant information.
  - Args:
    - query: The search query - be specific and include key terms
    - top_k: Number of results to retrieve (default: 10)
@ -129,6 +130,15 @@ You have access to the following tools:
    - end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
    - connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
  - Returns: Formatted string with relevant documents and their content
+
+2. generate_podcast: Generate an audio podcast from provided content.
+  - Use this when the user asks to create, generate, or make a podcast.
+  - Trigger phrases: "give me a podcast about", "create a podcast", "generate a podcast", "make a podcast", "turn this into a podcast"
+  - Args:
+    - source_content: The text content to convert into a podcast (e.g., a summary, research findings, or conversation)
+    - podcast_title: Optional title for the podcast (default: "SurfSense Podcast")
+    - user_prompt: Optional instructions for podcast style/format (e.g., "Make it casual and fun")
+  - Returns: A podcast with audio that the user can listen to and download
 </tools>
 <tool_call_examples>
 - User: "Fetch all my notes and what's in them?"
@ -136,6 +146,12 @@ You have access to the following tools:

 - User: "What did I discuss on Slack last week about the React migration?"
  - Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")`
+
+- User: "Give me a podcast about AI trends based on what we discussed"
+  - First search for relevant content, then call: `generate_podcast(source_content="[summarized content from search]", podcast_title="AI Trends Podcast")`
+
+- User: "Create a podcast summary of this conversation"
+  - Call: `generate_podcast(source_content="[summary of the conversation so far]", podcast_title="Conversation Summary")`
 </tool_call_examples>{citation_section}
 """