mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-21 18:55:16 +02:00
feat: add podcast generation capabilities to SurfSense deep agent and UI integration
This commit is contained in:
parent
3906ba52e0
commit
4c4e4b3c4c
9 changed files with 985 additions and 22 deletions
|
|
@ -2,7 +2,7 @@
|
|||
SurfSense deep agent implementation.
|
||||
|
||||
This module provides the factory function for creating SurfSense deep agents
|
||||
with knowledge base search capability.
|
||||
with knowledge base search and podcast generation capabilities.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
|
@ -14,6 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
|
||||
from app.agents.new_chat.context import SurfSenseContextSchema
|
||||
from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool
|
||||
from app.agents.new_chat.podcast import create_generate_podcast_tool
|
||||
from app.agents.new_chat.system_prompt import build_surfsense_system_prompt
|
||||
from app.services.connector_service import ConnectorService
|
||||
|
||||
|
|
@ -27,22 +28,27 @@ def create_surfsense_deep_agent(
|
|||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
connector_service: ConnectorService,
|
||||
user_id: str | None = None,
|
||||
user_instructions: str | None = None,
|
||||
enable_citations: bool = True,
|
||||
enable_podcast: bool = True,
|
||||
additional_tools: Sequence[BaseTool] | None = None,
|
||||
):
|
||||
"""
|
||||
Create a SurfSense deep agent with knowledge base search capability.
|
||||
Create a SurfSense deep agent with knowledge base search and podcast generation capabilities.
|
||||
|
||||
Args:
|
||||
llm: ChatLiteLLM instance
|
||||
search_space_id: The user's search space ID
|
||||
db_session: Database session
|
||||
connector_service: Initialized connector service
|
||||
user_id: The user's ID (required for podcast generation)
|
||||
user_instructions: Optional user instructions to inject into the system prompt.
|
||||
These will be added to the system prompt to customize agent behavior.
|
||||
enable_citations: Whether to include citation instructions in the system prompt (default: True).
|
||||
When False, the agent will not be instructed to add citations to responses.
|
||||
enable_podcast: Whether to include the podcast generation tool (default: True).
|
||||
When True and user_id is provided, the agent can generate podcasts.
|
||||
additional_tools: Optional sequence of additional tools to inject into the agent.
|
||||
The search_knowledge_base tool will always be included.
|
||||
|
||||
|
|
@ -58,6 +64,16 @@ def create_surfsense_deep_agent(
|
|||
|
||||
# Combine search tool with any additional tools
|
||||
tools = [search_tool]
|
||||
|
||||
# Add podcast tool if enabled and user_id is provided
|
||||
if enable_podcast and user_id:
|
||||
podcast_tool = create_generate_podcast_tool(
|
||||
search_space_id=search_space_id,
|
||||
db_session=db_session,
|
||||
user_id=str(user_id),
|
||||
)
|
||||
tools.append(podcast_tool)
|
||||
|
||||
if additional_tools:
|
||||
tools.extend(additional_tools)
|
||||
|
||||
|
|
|
|||
170
surfsense_backend/app/agents/new_chat/podcast.py
Normal file
170
surfsense_backend/app/agents/new_chat/podcast.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""
|
||||
Podcast generation tool for the new chat agent.
|
||||
|
||||
This module provides a factory function for creating the generate_podcast tool
|
||||
that integrates with the existing podcaster agent. Podcasts are saved to the
|
||||
database like the old system, providing authentication and persistence.
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.podcaster.graph import graph as podcaster_graph
|
||||
from app.agents.podcaster.state import State as PodcasterState
|
||||
from app.db import Podcast
|
||||
|
||||
|
||||
def create_generate_podcast_tool(
|
||||
search_space_id: int,
|
||||
db_session: AsyncSession,
|
||||
user_id: str,
|
||||
):
|
||||
"""
|
||||
Factory function to create the generate_podcast tool with injected dependencies.
|
||||
|
||||
Args:
|
||||
search_space_id: The user's search space ID
|
||||
db_session: Database session
|
||||
user_id: The user's ID (as string)
|
||||
|
||||
Returns:
|
||||
A configured tool function for generating podcasts
|
||||
"""
|
||||
|
||||
@tool
|
||||
async def generate_podcast(
|
||||
source_content: str,
|
||||
podcast_title: str = "SurfSense Podcast",
|
||||
user_prompt: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Generate a podcast from the provided content.
|
||||
|
||||
Use this tool when the user asks to create, generate, or make a podcast.
|
||||
Common triggers include phrases like:
|
||||
- "Give me a podcast about this"
|
||||
- "Create a podcast from this conversation"
|
||||
- "Generate a podcast summary"
|
||||
- "Make a podcast about..."
|
||||
- "Turn this into a podcast"
|
||||
|
||||
The tool will generate a complete audio podcast with two speakers
|
||||
discussing the provided content in an engaging conversational format.
|
||||
|
||||
Args:
|
||||
source_content: The text content to convert into a podcast.
|
||||
This can be a summary, research findings, or any text
|
||||
the user wants transformed into an audio podcast.
|
||||
podcast_title: Title for the podcast (default: "SurfSense Podcast")
|
||||
user_prompt: Optional instructions for podcast style, tone, or format.
|
||||
For example: "Make it casual and fun" or "Focus on the key insights"
|
||||
|
||||
Returns:
|
||||
A dictionary containing:
|
||||
- status: "success" or "error"
|
||||
- podcast_id: The database ID of the saved podcast (for API access)
|
||||
- title: The podcast title
|
||||
- transcript: Full podcast transcript with all dialogue entries
|
||||
- duration_ms: Estimated podcast duration in milliseconds
|
||||
- transcript_entries: Number of dialogue entries
|
||||
"""
|
||||
try:
|
||||
# Configure the podcaster graph
|
||||
config = {
|
||||
"configurable": {
|
||||
"podcast_title": podcast_title,
|
||||
"user_id": str(user_id),
|
||||
"search_space_id": search_space_id,
|
||||
"user_prompt": user_prompt,
|
||||
}
|
||||
}
|
||||
|
||||
# Initialize the podcaster state with the source content
|
||||
initial_state = PodcasterState(
|
||||
source_content=source_content,
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
# Run the podcaster graph
|
||||
result = await podcaster_graph.ainvoke(initial_state, config=config)
|
||||
|
||||
# Extract results
|
||||
podcast_transcript = result.get("podcast_transcript", [])
|
||||
file_path = result.get("final_podcast_file_path", "")
|
||||
|
||||
# Calculate estimated duration (rough estimate: ~150 words per minute)
|
||||
total_words = sum(
|
||||
len(entry.dialog.split()) if hasattr(entry, "dialog") else len(entry.get("dialog", "").split())
|
||||
for entry in podcast_transcript
|
||||
)
|
||||
estimated_duration_ms = int((total_words / 150) * 60 * 1000)
|
||||
|
||||
# Create full transcript for display (all entries, complete dialog)
|
||||
full_transcript = []
|
||||
for entry in podcast_transcript:
|
||||
if hasattr(entry, "speaker_id"):
|
||||
speaker = f"Speaker {entry.speaker_id + 1}"
|
||||
dialog = entry.dialog
|
||||
else:
|
||||
speaker = f"Speaker {entry.get('speaker_id', 0) + 1}"
|
||||
dialog = entry.get("dialog", "")
|
||||
full_transcript.append(f"{speaker}: {dialog}")
|
||||
|
||||
# Convert podcast transcript entries to serializable format (like old system)
|
||||
serializable_transcript = []
|
||||
for entry in podcast_transcript:
|
||||
if hasattr(entry, "speaker_id"):
|
||||
serializable_transcript.append({
|
||||
"speaker_id": entry.speaker_id,
|
||||
"dialog": entry.dialog
|
||||
})
|
||||
else:
|
||||
serializable_transcript.append({
|
||||
"speaker_id": entry.get("speaker_id", 0),
|
||||
"dialog": entry.get("dialog", "")
|
||||
})
|
||||
|
||||
# Save podcast to database (like old system)
|
||||
# This provides authentication and persistence
|
||||
podcast = Podcast(
|
||||
title=podcast_title,
|
||||
podcast_transcript=serializable_transcript,
|
||||
file_location=file_path,
|
||||
search_space_id=search_space_id,
|
||||
# chat_id is None since new-chat uses LangGraph threads, not DB chats
|
||||
chat_id=None,
|
||||
chat_state_version=None,
|
||||
)
|
||||
db_session.add(podcast)
|
||||
await db_session.commit()
|
||||
await db_session.refresh(podcast)
|
||||
|
||||
# Return podcast_id - frontend will use it to call the API endpoint
|
||||
# GET /api/v1/podcasts/{podcast_id}/stream (like the old system)
|
||||
return {
|
||||
"status": "success",
|
||||
"podcast_id": podcast.id,
|
||||
"title": podcast_title,
|
||||
"transcript": "\n\n".join(full_transcript),
|
||||
"duration_ms": estimated_duration_ms,
|
||||
"transcript_entries": len(podcast_transcript),
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
print(f"[generate_podcast] Error: {error_message}")
|
||||
# Rollback on error
|
||||
await db_session.rollback()
|
||||
return {
|
||||
"status": "error",
|
||||
"error": error_message,
|
||||
"title": podcast_title,
|
||||
"podcast_id": None,
|
||||
"duration_ms": 0,
|
||||
"transcript_entries": 0,
|
||||
}
|
||||
|
||||
return generate_podcast
|
||||
|
||||
|
|
@ -121,7 +121,8 @@ Today's date (UTC): {resolved_today}
|
|||
</system_instruction>{user_section}
|
||||
<tools>
|
||||
You have access to the following tools:
|
||||
- search_knowledge_base: Search the user's personal knowledge base for relevant information.
|
||||
|
||||
1. search_knowledge_base: Search the user's personal knowledge base for relevant information.
|
||||
- Args:
|
||||
- query: The search query - be specific and include key terms
|
||||
- top_k: Number of results to retrieve (default: 10)
|
||||
|
|
@ -129,6 +130,15 @@ You have access to the following tools:
|
|||
- end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
|
||||
- connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
|
||||
- Returns: Formatted string with relevant documents and their content
|
||||
|
||||
2. generate_podcast: Generate an audio podcast from provided content.
|
||||
- Use this when the user asks to create, generate, or make a podcast.
|
||||
- Trigger phrases: "give me a podcast about", "create a podcast", "generate a podcast", "make a podcast", "turn this into a podcast"
|
||||
- Args:
|
||||
- source_content: The text content to convert into a podcast (e.g., a summary, research findings, or conversation)
|
||||
- podcast_title: Optional title for the podcast (default: "SurfSense Podcast")
|
||||
- user_prompt: Optional instructions for podcast style/format (e.g., "Make it casual and fun")
|
||||
- Returns: A podcast with audio that the user can listen to and download
|
||||
</tools>
|
||||
<tool_call_examples>
|
||||
- User: "Fetch all my notes and what's in them?"
|
||||
|
|
@ -136,6 +146,12 @@ You have access to the following tools:
|
|||
|
||||
- User: "What did I discuss on Slack last week about the React migration?"
|
||||
- Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")`
|
||||
|
||||
- User: "Give me a podcast about AI trends based on what we discussed"
|
||||
- First search for relevant content, then call: `generate_podcast(source_content="[summarized content from search]", podcast_title="AI Trends Podcast")`
|
||||
|
||||
- User: "Create a podcast summary of this conversation"
|
||||
- Call: `generate_podcast(source_content="[summary of the conversation so far]", podcast_title="Conversation Summary")`
|
||||
</tool_call_examples>{citation_section}
|
||||
"""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue