feat: add podcast generation capabilities to SurfSense deep agent and UI integration

This commit is contained in:
Anish Sarkar 2025-12-21 19:07:46 +05:30
parent 3906ba52e0
commit 4c4e4b3c4c
9 changed files with 985 additions and 22 deletions

View file

@ -2,7 +2,7 @@
SurfSense deep agent implementation.
This module provides the factory function for creating SurfSense deep agents
with knowledge base search capability.
with knowledge base search and podcast generation capabilities.
"""
from collections.abc import Sequence
@ -14,6 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool
from app.agents.new_chat.podcast import create_generate_podcast_tool
from app.agents.new_chat.system_prompt import build_surfsense_system_prompt
from app.services.connector_service import ConnectorService
@ -27,22 +28,27 @@ def create_surfsense_deep_agent(
search_space_id: int,
db_session: AsyncSession,
connector_service: ConnectorService,
user_id: str | None = None,
user_instructions: str | None = None,
enable_citations: bool = True,
enable_podcast: bool = True,
additional_tools: Sequence[BaseTool] | None = None,
):
"""
Create a SurfSense deep agent with knowledge base search capability.
Create a SurfSense deep agent with knowledge base search and podcast generation capabilities.
Args:
llm: ChatLiteLLM instance
search_space_id: The user's search space ID
db_session: Database session
connector_service: Initialized connector service
user_id: The user's ID (required for podcast generation)
user_instructions: Optional user instructions to inject into the system prompt.
These will be added to the system prompt to customize agent behavior.
enable_citations: Whether to include citation instructions in the system prompt (default: True).
When False, the agent will not be instructed to add citations to responses.
enable_podcast: Whether to include the podcast generation tool (default: True).
When True and user_id is provided, the agent can generate podcasts.
additional_tools: Optional sequence of additional tools to inject into the agent.
The search_knowledge_base tool will always be included.
@ -58,6 +64,16 @@ def create_surfsense_deep_agent(
# Combine search tool with any additional tools
tools = [search_tool]
# Add podcast tool if enabled and user_id is provided
if enable_podcast and user_id:
podcast_tool = create_generate_podcast_tool(
search_space_id=search_space_id,
db_session=db_session,
user_id=str(user_id),
)
tools.append(podcast_tool)
if additional_tools:
tools.extend(additional_tools)

View file

@ -0,0 +1,170 @@
"""
Podcast generation tool for the new chat agent.
This module provides a factory function for creating the generate_podcast tool
that integrates with the existing podcaster agent. Podcasts are saved to the
database like the old system, providing authentication and persistence.
"""
from typing import Any
from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.podcaster.graph import graph as podcaster_graph
from app.agents.podcaster.state import State as PodcasterState
from app.db import Podcast
def create_generate_podcast_tool(
search_space_id: int,
db_session: AsyncSession,
user_id: str,
):
"""
Factory function to create the generate_podcast tool with injected dependencies.
Args:
search_space_id: The user's search space ID
db_session: Database session
user_id: The user's ID (as string)
Returns:
A configured tool function for generating podcasts
"""
@tool
async def generate_podcast(
source_content: str,
podcast_title: str = "SurfSense Podcast",
user_prompt: str | None = None,
) -> dict[str, Any]:
"""
Generate a podcast from the provided content.
Use this tool when the user asks to create, generate, or make a podcast.
Common triggers include phrases like:
- "Give me a podcast about this"
- "Create a podcast from this conversation"
- "Generate a podcast summary"
- "Make a podcast about..."
- "Turn this into a podcast"
The tool will generate a complete audio podcast with two speakers
discussing the provided content in an engaging conversational format.
Args:
source_content: The text content to convert into a podcast.
This can be a summary, research findings, or any text
the user wants transformed into an audio podcast.
podcast_title: Title for the podcast (default: "SurfSense Podcast")
user_prompt: Optional instructions for podcast style, tone, or format.
For example: "Make it casual and fun" or "Focus on the key insights"
Returns:
A dictionary containing:
- status: "success" or "error"
- podcast_id: The database ID of the saved podcast (for API access)
- title: The podcast title
- transcript: Full podcast transcript with all dialogue entries
- duration_ms: Estimated podcast duration in milliseconds
- transcript_entries: Number of dialogue entries
"""
try:
# Configure the podcaster graph
config = {
"configurable": {
"podcast_title": podcast_title,
"user_id": str(user_id),
"search_space_id": search_space_id,
"user_prompt": user_prompt,
}
}
# Initialize the podcaster state with the source content
initial_state = PodcasterState(
source_content=source_content,
db_session=db_session,
)
# Run the podcaster graph
result = await podcaster_graph.ainvoke(initial_state, config=config)
# Extract results
podcast_transcript = result.get("podcast_transcript", [])
file_path = result.get("final_podcast_file_path", "")
# Calculate estimated duration (rough estimate: ~150 words per minute)
total_words = sum(
len(entry.dialog.split()) if hasattr(entry, "dialog") else len(entry.get("dialog", "").split())
for entry in podcast_transcript
)
estimated_duration_ms = int((total_words / 150) * 60 * 1000)
# Create full transcript for display (all entries, complete dialog)
full_transcript = []
for entry in podcast_transcript:
if hasattr(entry, "speaker_id"):
speaker = f"Speaker {entry.speaker_id + 1}"
dialog = entry.dialog
else:
speaker = f"Speaker {entry.get('speaker_id', 0) + 1}"
dialog = entry.get("dialog", "")
full_transcript.append(f"{speaker}: {dialog}")
# Convert podcast transcript entries to serializable format (like old system)
serializable_transcript = []
for entry in podcast_transcript:
if hasattr(entry, "speaker_id"):
serializable_transcript.append({
"speaker_id": entry.speaker_id,
"dialog": entry.dialog
})
else:
serializable_transcript.append({
"speaker_id": entry.get("speaker_id", 0),
"dialog": entry.get("dialog", "")
})
# Save podcast to database (like old system)
# This provides authentication and persistence
podcast = Podcast(
title=podcast_title,
podcast_transcript=serializable_transcript,
file_location=file_path,
search_space_id=search_space_id,
# chat_id is None since new-chat uses LangGraph threads, not DB chats
chat_id=None,
chat_state_version=None,
)
db_session.add(podcast)
await db_session.commit()
await db_session.refresh(podcast)
# Return podcast_id - frontend will use it to call the API endpoint
# GET /api/v1/podcasts/{podcast_id}/stream (like the old system)
return {
"status": "success",
"podcast_id": podcast.id,
"title": podcast_title,
"transcript": "\n\n".join(full_transcript),
"duration_ms": estimated_duration_ms,
"transcript_entries": len(podcast_transcript),
}
except Exception as e:
error_message = str(e)
print(f"[generate_podcast] Error: {error_message}")
# Rollback on error
await db_session.rollback()
return {
"status": "error",
"error": error_message,
"title": podcast_title,
"podcast_id": None,
"duration_ms": 0,
"transcript_entries": 0,
}
return generate_podcast

View file

@ -121,7 +121,8 @@ Today's date (UTC): {resolved_today}
</system_instruction>{user_section}
<tools>
You have access to the following tools:
- search_knowledge_base: Search the user's personal knowledge base for relevant information.
1. search_knowledge_base: Search the user's personal knowledge base for relevant information.
- Args:
- query: The search query - be specific and include key terms
- top_k: Number of results to retrieve (default: 10)
@ -129,6 +130,15 @@ You have access to the following tools:
- end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
- connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
- Returns: Formatted string with relevant documents and their content
2. generate_podcast: Generate an audio podcast from provided content.
- Use this when the user asks to create, generate, or make a podcast.
- Trigger phrases: "give me a podcast about", "create a podcast", "generate a podcast", "make a podcast", "turn this into a podcast"
- Args:
- source_content: The text content to convert into a podcast (e.g., a summary, research findings, or conversation)
- podcast_title: Optional title for the podcast (default: "SurfSense Podcast")
- user_prompt: Optional instructions for podcast style/format (e.g., "Make it casual and fun")
- Returns: A podcast with audio that the user can listen to and download
</tools>
<tool_call_examples>
- User: "Fetch all my notes and what's in them?"
@ -136,6 +146,12 @@ You have access to the following tools:
- User: "What did I discuss on Slack last week about the React migration?"
- Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")`
- User: "Give me a podcast about AI trends based on what we discussed"
- First search for relevant content, then call: `generate_podcast(source_content="[summarized content from search]", podcast_title="AI Trends Podcast")`
- User: "Create a podcast summary of this conversation"
- Call: `generate_podcast(source_content="[summary of the conversation so far]", podcast_title="Conversation Summary")`
</tool_call_examples>{citation_section}
"""