feat: add podcast generation capabilities to SurfSense deep agent and UI integration

This commit is contained in:
Anish Sarkar 2025-12-21 19:07:46 +05:30
parent 3906ba52e0
commit 4c4e4b3c4c
9 changed files with 985 additions and 22 deletions

View file

@ -2,7 +2,7 @@
SurfSense deep agent implementation.
This module provides the factory function for creating SurfSense deep agents
with knowledge base search capability.
with knowledge base search and podcast generation capabilities.
"""
from collections.abc import Sequence
@ -14,6 +14,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.new_chat.context import SurfSenseContextSchema
from app.agents.new_chat.knowledge_base import create_search_knowledge_base_tool
from app.agents.new_chat.podcast import create_generate_podcast_tool
from app.agents.new_chat.system_prompt import build_surfsense_system_prompt
from app.services.connector_service import ConnectorService
@ -27,22 +28,27 @@ def create_surfsense_deep_agent(
search_space_id: int,
db_session: AsyncSession,
connector_service: ConnectorService,
user_id: str | None = None,
user_instructions: str | None = None,
enable_citations: bool = True,
enable_podcast: bool = True,
additional_tools: Sequence[BaseTool] | None = None,
):
"""
Create a SurfSense deep agent with knowledge base search capability.
Create a SurfSense deep agent with knowledge base search and podcast generation capabilities.
Args:
llm: ChatLiteLLM instance
search_space_id: The user's search space ID
db_session: Database session
connector_service: Initialized connector service
user_id: The user's ID (required for podcast generation)
user_instructions: Optional user instructions to inject into the system prompt.
These will be added to the system prompt to customize agent behavior.
enable_citations: Whether to include citation instructions in the system prompt (default: True).
When False, the agent will not be instructed to add citations to responses.
enable_podcast: Whether to include the podcast generation tool (default: True).
When True and user_id is provided, the agent can generate podcasts.
additional_tools: Optional sequence of additional tools to inject into the agent.
The search_knowledge_base tool will always be included.
@ -58,6 +64,16 @@ def create_surfsense_deep_agent(
# Combine search tool with any additional tools
tools = [search_tool]
# Add podcast tool if enabled and user_id is provided
if enable_podcast and user_id:
podcast_tool = create_generate_podcast_tool(
search_space_id=search_space_id,
db_session=db_session,
user_id=str(user_id),
)
tools.append(podcast_tool)
if additional_tools:
tools.extend(additional_tools)

View file

@ -0,0 +1,170 @@
"""
Podcast generation tool for the new chat agent.
This module provides a factory function for creating the generate_podcast tool
that integrates with the existing podcaster agent. Podcasts are saved to the
database like the old system, providing authentication and persistence.
"""
from typing import Any
from langchain_core.tools import tool
from sqlalchemy.ext.asyncio import AsyncSession
from app.agents.podcaster.graph import graph as podcaster_graph
from app.agents.podcaster.state import State as PodcasterState
from app.db import Podcast
def create_generate_podcast_tool(
search_space_id: int,
db_session: AsyncSession,
user_id: str,
):
"""
Factory function to create the generate_podcast tool with injected dependencies.
Args:
search_space_id: The user's search space ID
db_session: Database session
user_id: The user's ID (as string)
Returns:
A configured tool function for generating podcasts
"""
@tool
async def generate_podcast(
source_content: str,
podcast_title: str = "SurfSense Podcast",
user_prompt: str | None = None,
) -> dict[str, Any]:
"""
Generate a podcast from the provided content.
Use this tool when the user asks to create, generate, or make a podcast.
Common triggers include phrases like:
- "Give me a podcast about this"
- "Create a podcast from this conversation"
- "Generate a podcast summary"
- "Make a podcast about..."
- "Turn this into a podcast"
The tool will generate a complete audio podcast with two speakers
discussing the provided content in an engaging conversational format.
Args:
source_content: The text content to convert into a podcast.
This can be a summary, research findings, or any text
the user wants transformed into an audio podcast.
podcast_title: Title for the podcast (default: "SurfSense Podcast")
user_prompt: Optional instructions for podcast style, tone, or format.
For example: "Make it casual and fun" or "Focus on the key insights"
Returns:
A dictionary containing:
- status: "success" or "error"
- podcast_id: The database ID of the saved podcast (for API access)
- title: The podcast title
- transcript: Full podcast transcript with all dialogue entries
- duration_ms: Estimated podcast duration in milliseconds
- transcript_entries: Number of dialogue entries
"""
try:
# Configure the podcaster graph
config = {
"configurable": {
"podcast_title": podcast_title,
"user_id": str(user_id),
"search_space_id": search_space_id,
"user_prompt": user_prompt,
}
}
# Initialize the podcaster state with the source content
initial_state = PodcasterState(
source_content=source_content,
db_session=db_session,
)
# Run the podcaster graph
result = await podcaster_graph.ainvoke(initial_state, config=config)
# Extract results
podcast_transcript = result.get("podcast_transcript", [])
file_path = result.get("final_podcast_file_path", "")
# Calculate estimated duration (rough estimate: ~150 words per minute)
total_words = sum(
len(entry.dialog.split()) if hasattr(entry, "dialog") else len(entry.get("dialog", "").split())
for entry in podcast_transcript
)
estimated_duration_ms = int((total_words / 150) * 60 * 1000)
# Create full transcript for display (all entries, complete dialog)
full_transcript = []
for entry in podcast_transcript:
if hasattr(entry, "speaker_id"):
speaker = f"Speaker {entry.speaker_id + 1}"
dialog = entry.dialog
else:
speaker = f"Speaker {entry.get('speaker_id', 0) + 1}"
dialog = entry.get("dialog", "")
full_transcript.append(f"{speaker}: {dialog}")
# Convert podcast transcript entries to serializable format (like old system)
serializable_transcript = []
for entry in podcast_transcript:
if hasattr(entry, "speaker_id"):
serializable_transcript.append({
"speaker_id": entry.speaker_id,
"dialog": entry.dialog
})
else:
serializable_transcript.append({
"speaker_id": entry.get("speaker_id", 0),
"dialog": entry.get("dialog", "")
})
# Save podcast to database (like old system)
# This provides authentication and persistence
podcast = Podcast(
title=podcast_title,
podcast_transcript=serializable_transcript,
file_location=file_path,
search_space_id=search_space_id,
# chat_id is None since new-chat uses LangGraph threads, not DB chats
chat_id=None,
chat_state_version=None,
)
db_session.add(podcast)
await db_session.commit()
await db_session.refresh(podcast)
# Return podcast_id - frontend will use it to call the API endpoint
# GET /api/v1/podcasts/{podcast_id}/stream (like the old system)
return {
"status": "success",
"podcast_id": podcast.id,
"title": podcast_title,
"transcript": "\n\n".join(full_transcript),
"duration_ms": estimated_duration_ms,
"transcript_entries": len(podcast_transcript),
}
except Exception as e:
error_message = str(e)
print(f"[generate_podcast] Error: {error_message}")
# Rollback on error
await db_session.rollback()
return {
"status": "error",
"error": error_message,
"title": podcast_title,
"podcast_id": None,
"duration_ms": 0,
"transcript_entries": 0,
}
return generate_podcast

View file

@ -121,7 +121,8 @@ Today's date (UTC): {resolved_today}
</system_instruction>{user_section}
<tools>
You have access to the following tools:
- search_knowledge_base: Search the user's personal knowledge base for relevant information.
1. search_knowledge_base: Search the user's personal knowledge base for relevant information.
- Args:
- query: The search query - be specific and include key terms
- top_k: Number of results to retrieve (default: 10)
@ -129,6 +130,15 @@ You have access to the following tools:
- end_date: Optional ISO date/datetime (e.g. "2025-12-19" or "2025-12-19T23:59:59+00:00")
- connectors_to_search: Optional list of connector enums to search. If omitted, searches all.
- Returns: Formatted string with relevant documents and their content
2. generate_podcast: Generate an audio podcast from provided content.
- Use this when the user asks to create, generate, or make a podcast.
- Trigger phrases: "give me a podcast about", "create a podcast", "generate a podcast", "make a podcast", "turn this into a podcast"
- Args:
- source_content: The text content to convert into a podcast (e.g., a summary, research findings, or conversation)
- podcast_title: Optional title for the podcast (default: "SurfSense Podcast")
- user_prompt: Optional instructions for podcast style/format (e.g., "Make it casual and fun")
- Returns: A podcast with audio that the user can listen to and download
</tools>
<tool_call_examples>
- User: "Fetch all my notes and what's in them?"
@ -136,6 +146,12 @@ You have access to the following tools:
- User: "What did I discuss on Slack last week about the React migration?"
- Call: `search_knowledge_base(query="React migration", connectors_to_search=["SLACK_CONNECTOR"], start_date="YYYY-MM-DD", end_date="YYYY-MM-DD")`
- User: "Give me a podcast about AI trends based on what we discussed"
- First search for relevant content, then call: `generate_podcast(source_content="[summarized content from search]", podcast_title="AI Trends Podcast")`
- User: "Create a podcast summary of this conversation"
- Call: `generate_podcast(source_content="[summary of the conversation so far]", podcast_title="Conversation Summary")`
</tool_call_examples>{citation_section}
"""

View file

@ -5,6 +5,7 @@ This module streams responses from the deep agent using the Vercel AI SDK
Data Stream Protocol (SSE format).
"""
import json
from collections.abc import AsyncGenerator
from uuid import UUID
@ -73,12 +74,14 @@ async def stream_new_chat(
# Create connector service
connector_service = ConnectorService(session, search_space_id=search_space_id)
# Create the deep agent
# Create the deep agent with podcast capability
agent = create_surfsense_deep_agent(
llm=llm,
search_space_id=search_space_id,
db_session=session,
connector_service=connector_service,
user_id=str(user_id),
enable_podcast=True,
)
# Build input with just the current user query
@ -162,22 +165,72 @@ async def stream_new_chat(
f"Searching knowledge base: {query[:100]}{'...' if len(query) > 100 else ''}",
"info",
)
elif tool_name == "generate_podcast":
title = (
tool_input.get("podcast_title", "SurfSense Podcast")
if isinstance(tool_input, dict)
else "SurfSense Podcast"
)
yield streaming_service.format_terminal_info(
f"Generating podcast: {title}",
"info",
)
elif event_type == "on_tool_end":
run_id = event.get("run_id", "")
tool_output = event.get("data", {}).get("output", "")
tool_name = event.get("name", "unknown_tool")
raw_output = event.get("data", {}).get("output", "")
# Extract content from ToolMessage if needed
# LangGraph may return a ToolMessage object instead of raw dict
if hasattr(raw_output, "content"):
# It's a ToolMessage object - extract the content
content = raw_output.content
# If content is a string that looks like JSON, try to parse it
if isinstance(content, str):
try:
tool_output = json.loads(content)
except (json.JSONDecodeError, TypeError):
tool_output = {"result": content}
elif isinstance(content, dict):
tool_output = content
else:
tool_output = {"result": str(content)}
elif isinstance(raw_output, dict):
tool_output = raw_output
else:
tool_output = {"result": str(raw_output) if raw_output else "completed"}
tool_call_id = f"call_{run_id[:32]}" if run_id else "call_unknown"
# Don't stream the full output (can be very large), just acknowledge
yield streaming_service.format_tool_output_available(
tool_call_id,
{"status": "completed", "result_length": len(str(tool_output))},
)
yield streaming_service.format_terminal_info(
"Knowledge base search completed", "success"
)
# Handle different tool outputs
if tool_name == "generate_podcast":
# Stream the full podcast result so frontend can render the audio player
yield streaming_service.format_tool_output_available(
tool_call_id,
tool_output if isinstance(tool_output, dict) else {"result": tool_output},
)
# Send appropriate terminal message based on status
if isinstance(tool_output, dict) and tool_output.get("status") == "success":
yield streaming_service.format_terminal_info(
f"Podcast generated successfully: {tool_output.get('title', 'Podcast')}",
"success",
)
else:
error_msg = tool_output.get("error", "Unknown error") if isinstance(tool_output, dict) else "Unknown error"
yield streaming_service.format_terminal_info(
f"Podcast generation failed: {error_msg}",
"error",
)
else:
# Don't stream the full output for other tools (can be very large), just acknowledge
yield streaming_service.format_tool_output_available(
tool_call_id,
{"status": "completed", "result_length": len(str(tool_output))},
)
yield streaming_service.format_terminal_info(
"Knowledge base search completed", "success"
)
# Handle chain/agent end to close any open text blocks
elif event_type in ("on_chain_end", "on_agent_end"):

View file

@ -4,6 +4,7 @@ import { AssistantRuntimeProvider, useLocalRuntime } from "@assistant-ui/react";
import { useParams } from "next/navigation";
import { useMemo } from "react";
import { Thread } from "@/components/assistant-ui/thread";
import { GeneratePodcastToolUI } from "@/components/tool-ui/generate-podcast";
import { createNewChatAdapter } from "@/lib/chat/new-chat-transport";
export default function NewChatPage() {
@ -38,6 +39,8 @@ export default function NewChatPage() {
return (
<AssistantRuntimeProvider runtime={runtime}>
{/* Register tool UI components */}
<GeneratePodcastToolUI />
<div className="h-[calc(100vh-64px)] max-h-[calc(100vh-64px)] overflow-hidden">
<Thread />
</div>

View file

@ -0,0 +1,310 @@
"use client";
import { DownloadIcon, PauseIcon, PlayIcon, Volume2Icon, VolumeXIcon } from "lucide-react";
import Image from "next/image";
import { useCallback, useEffect, useRef, useState } from "react";
import { Button } from "@/components/ui/button";
import { Slider } from "@/components/ui/slider";
import { cn } from "@/lib/utils";
interface AudioProps {
id: string;
assetId?: string;
src: string;
title: string;
description?: string;
artwork?: string;
durationMs?: number;
className?: string;
}
function formatTime(seconds: number): string {
if (!Number.isFinite(seconds) || seconds < 0) return "0:00";
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, "0")}`;
}
export function Audio({
id,
src,
title,
description,
artwork,
durationMs,
className,
}: AudioProps) {
const audioRef = useRef<HTMLAudioElement>(null);
const [isPlaying, setIsPlaying] = useState(false);
const [currentTime, setCurrentTime] = useState(0);
const [duration, setDuration] = useState(durationMs ? durationMs / 1000 : 0);
const [volume, setVolume] = useState(1);
const [isMuted, setIsMuted] = useState(false);
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
// Handle play/pause
const togglePlayPause = useCallback(() => {
const audio = audioRef.current;
if (!audio) return;
if (isPlaying) {
audio.pause();
} else {
audio.play().catch((err) => {
console.error("Error playing audio:", err);
setError("Failed to play audio");
});
}
}, [isPlaying]);
// Handle seek
const handleSeek = useCallback((value: number[]) => {
const audio = audioRef.current;
if (!audio || !Number.isFinite(value[0])) return;
audio.currentTime = value[0];
setCurrentTime(value[0]);
}, []);
// Handle volume change
const handleVolumeChange = useCallback((value: number[]) => {
const audio = audioRef.current;
if (!audio || !Number.isFinite(value[0])) return;
const newVolume = value[0];
audio.volume = newVolume;
setVolume(newVolume);
setIsMuted(newVolume === 0);
}, []);
// Toggle mute
const toggleMute = useCallback(() => {
const audio = audioRef.current;
if (!audio) return;
if (isMuted) {
audio.volume = volume || 1;
setIsMuted(false);
} else {
audio.volume = 0;
setIsMuted(true);
}
}, [isMuted, volume]);
// Handle download
const handleDownload = useCallback(async () => {
try {
const response = await fetch(src);
const blob = await response.blob();
const url = window.URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = `${title.replace(/[^a-zA-Z0-9]/g, "_")}.mp3`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
} catch (err) {
console.error("Error downloading audio:", err);
}
}, [src, title]);
// Set up audio event listeners
useEffect(() => {
const audio = audioRef.current;
if (!audio) return;
const handleLoadedMetadata = () => {
setDuration(audio.duration);
setIsLoading(false);
};
const handleTimeUpdate = () => {
setCurrentTime(audio.currentTime);
};
const handlePlay = () => setIsPlaying(true);
const handlePause = () => setIsPlaying(false);
const handleEnded = () => {
setIsPlaying(false);
setCurrentTime(0);
};
const handleError = () => {
setError("Failed to load audio");
setIsLoading(false);
};
const handleCanPlay = () => setIsLoading(false);
audio.addEventListener("loadedmetadata", handleLoadedMetadata);
audio.addEventListener("timeupdate", handleTimeUpdate);
audio.addEventListener("play", handlePlay);
audio.addEventListener("pause", handlePause);
audio.addEventListener("ended", handleEnded);
audio.addEventListener("error", handleError);
audio.addEventListener("canplay", handleCanPlay);
return () => {
audio.removeEventListener("loadedmetadata", handleLoadedMetadata);
audio.removeEventListener("timeupdate", handleTimeUpdate);
audio.removeEventListener("play", handlePlay);
audio.removeEventListener("pause", handlePause);
audio.removeEventListener("ended", handleEnded);
audio.removeEventListener("error", handleError);
audio.removeEventListener("canplay", handleCanPlay);
};
}, []);
if (error) {
return (
<div
className={cn(
"flex items-center gap-4 rounded-xl border border-destructive/20 bg-destructive/5 p-4",
className,
)}
>
<div className="flex size-16 items-center justify-center rounded-lg bg-destructive/10">
<Volume2Icon className="size-8 text-destructive" />
</div>
<div className="flex-1">
<p className="font-medium text-destructive">{title}</p>
<p className="text-destructive/70 text-sm">{error}</p>
</div>
</div>
);
}
return (
<div
id={id}
className={cn(
"group relative overflow-hidden rounded-xl border bg-gradient-to-br from-background to-muted/30 p-4 shadow-sm transition-all hover:shadow-md",
className,
)}
>
{/* Hidden audio element */}
<audio ref={audioRef} src={src} preload="metadata">
<track kind="captions" srcLang="en" label="English captions" default />
</audio>
<div className="flex gap-4">
{/* Artwork */}
<div className="relative shrink-0">
<div className="relative size-20 overflow-hidden rounded-lg bg-gradient-to-br from-primary/20 to-primary/5 shadow-inner">
{artwork ? (
<Image
src={artwork}
alt={title}
fill
className="object-cover"
unoptimized
/>
) : (
<div className="flex size-full items-center justify-center">
<Volume2Icon className="size-8 text-primary/50" />
</div>
)}
{/* Play overlay on artwork */}
<button
type="button"
onClick={togglePlayPause}
className="absolute inset-0 flex items-center justify-center bg-black/0 opacity-0 transition-all group-hover:bg-black/30 group-hover:opacity-100"
aria-label={isPlaying ? "Pause" : "Play"}
>
{isPlaying ? (
<PauseIcon className="size-8 text-white drop-shadow-lg" />
) : (
<PlayIcon className="size-8 text-white drop-shadow-lg" />
)}
</button>
</div>
</div>
{/* Content */}
<div className="flex min-w-0 flex-1 flex-col justify-between">
{/* Title and description */}
<div className="min-w-0">
<h3 className="truncate font-semibold text-foreground">{title}</h3>
{description && (
<p className="mt-0.5 line-clamp-1 text-muted-foreground text-sm">
{description}
</p>
)}
</div>
{/* Progress bar */}
<div className="mt-2 space-y-1">
<Slider
value={[currentTime]}
max={duration || 100}
step={0.1}
onValueChange={handleSeek}
className="cursor-pointer"
disabled={isLoading}
/>
<div className="flex justify-between text-muted-foreground text-xs">
<span>{formatTime(currentTime)}</span>
<span>{formatTime(duration)}</span>
</div>
</div>
</div>
</div>
{/* Controls */}
<div className="mt-3 flex items-center justify-between border-t pt-3">
<div className="flex items-center gap-2">
{/* Play/Pause button */}
<Button
variant="default"
size="sm"
onClick={togglePlayPause}
disabled={isLoading}
className="gap-2"
>
{isLoading ? (
<div className="size-4 animate-spin rounded-full border-2 border-current border-t-transparent" />
) : isPlaying ? (
<PauseIcon className="size-4" />
) : (
<PlayIcon className="size-4" />
)}
{isPlaying ? "Pause" : "Play"}
</Button>
{/* Volume control */}
<div className="flex items-center gap-2">
<Button
variant="ghost"
size="icon"
onClick={toggleMute}
className="size-8"
>
{isMuted ? (
<VolumeXIcon className="size-4" />
) : (
<Volume2Icon className="size-4" />
)}
</Button>
<Slider
value={[isMuted ? 0 : volume]}
max={1}
step={0.01}
onValueChange={handleVolumeChange}
className="w-20"
/>
</div>
</div>
{/* Download button */}
<Button
variant="outline"
size="sm"
onClick={handleDownload}
className="gap-2"
>
<DownloadIcon className="size-4" />
Download
</Button>
</div>
</div>
);
}

View file

@ -0,0 +1,288 @@
"use client";
import { makeAssistantToolUI } from "@assistant-ui/react";
import { AlertCircleIcon, Loader2Icon, MicIcon } from "lucide-react";
import { useCallback, useEffect, useRef, useState } from "react";
import { Audio } from "@/components/tool-ui/audio";
import { podcastsApiService } from "@/lib/apis/podcasts-api.service";
/**
* Type definitions for the generate_podcast tool
*/
interface GeneratePodcastArgs {
source_content: string;
podcast_title?: string;
user_prompt?: string;
}
interface GeneratePodcastResult {
status: "success" | "error";
podcast_id?: number;
title?: string;
transcript?: string;
duration_ms?: number;
transcript_entries?: number;
error?: string;
}
/**
* Loading state component shown while podcast is being generated
*/
function PodcastGeneratingState({ title }: { title: string }) {
return (
<div className="my-4 overflow-hidden rounded-xl border border-primary/20 bg-gradient-to-br from-primary/5 to-primary/10 p-6">
<div className="flex items-center gap-4">
<div className="relative">
<div className="flex size-16 items-center justify-center rounded-full bg-primary/20">
<MicIcon className="size-8 text-primary" />
</div>
{/* Animated rings */}
<div className="absolute inset-0 animate-ping rounded-full bg-primary/20" />
</div>
<div className="flex-1">
<h3 className="font-semibold text-foreground text-lg">{title}</h3>
<div className="mt-2 flex items-center gap-2 text-muted-foreground">
<Loader2Icon className="size-4 animate-spin" />
<span className="text-sm">Generating podcast... This may take a few minutes</span>
</div>
<div className="mt-3">
<div className="h-1.5 w-full overflow-hidden rounded-full bg-primary/10">
<div className="h-full w-1/3 animate-pulse rounded-full bg-primary" />
</div>
</div>
</div>
</div>
</div>
);
}
/**
* Error state component shown when podcast generation fails
*/
function PodcastErrorState({ title, error }: { title: string; error: string }) {
return (
<div className="my-4 overflow-hidden rounded-xl border border-destructive/20 bg-destructive/5 p-6">
<div className="flex items-center gap-4">
<div className="flex size-16 shrink-0 items-center justify-center rounded-full bg-destructive/10">
<AlertCircleIcon className="size-8 text-destructive" />
</div>
<div className="flex-1">
<h3 className="font-semibold text-foreground">{title}</h3>
<p className="mt-1 text-destructive text-sm">Failed to generate podcast</p>
<p className="mt-2 text-muted-foreground text-sm">{error}</p>
</div>
</div>
</div>
);
}
/**
* Audio loading state component
*/
function AudioLoadingState({ title }: { title: string }) {
return (
<div className="my-4 overflow-hidden rounded-xl border bg-muted/30 p-6">
<div className="flex items-center gap-4">
<div className="flex size-16 items-center justify-center rounded-full bg-primary/10">
<MicIcon className="size-8 text-primary/50" />
</div>
<div className="flex-1">
<h3 className="font-semibold text-foreground">{title}</h3>
<div className="mt-2 flex items-center gap-2 text-muted-foreground">
<Loader2Icon className="size-4 animate-spin" />
<span className="text-sm">Loading audio...</span>
</div>
</div>
</div>
</div>
);
}
/**
* Podcast Player Component - Fetches audio with authentication
*/
function PodcastPlayer({
podcastId,
title,
description,
durationMs,
transcript,
transcriptEntries,
}: {
podcastId: number;
title: string;
description: string;
durationMs?: number;
transcript?: string;
transcriptEntries?: number;
}) {
const [audioSrc, setAudioSrc] = useState<string | null>(null);
const [isLoading, setIsLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const objectUrlRef = useRef<string | null>(null);
// Cleanup object URL on unmount
useEffect(() => {
return () => {
if (objectUrlRef.current) {
URL.revokeObjectURL(objectUrlRef.current);
}
};
}, []);
// Fetch audio with authentication
const loadAudio = useCallback(async () => {
setIsLoading(true);
setError(null);
try {
// Revoke previous object URL if exists
if (objectUrlRef.current) {
URL.revokeObjectURL(objectUrlRef.current);
objectUrlRef.current = null;
}
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 60000); // 60s timeout
try {
// Fetch audio blob with authentication
const response = await podcastsApiService.loadPodcast({
request: { id: podcastId },
controller,
});
// Create object URL from blob
const objectUrl = URL.createObjectURL(response);
objectUrlRef.current = objectUrl;
setAudioSrc(objectUrl);
} finally {
clearTimeout(timeoutId);
}
} catch (err) {
console.error("Error loading podcast audio:", err);
if (err instanceof DOMException && err.name === "AbortError") {
setError("Request timed out. Please try again.");
} else {
setError(err instanceof Error ? err.message : "Failed to load audio");
}
} finally {
setIsLoading(false);
}
}, [podcastId]);
// Load audio when component mounts
useEffect(() => {
loadAudio();
}, [loadAudio]);
if (isLoading) {
return <AudioLoadingState title={title} />;
}
if (error || !audioSrc) {
return <PodcastErrorState title={title} error={error || "Failed to load audio"} />;
}
return (
<div className="my-4">
<Audio
id={`podcast-${podcastId}`}
src={audioSrc}
title={title}
description={description}
durationMs={durationMs}
className="w-full"
/>
{/* Full transcript */}
{transcript && (
<details className="mt-3 rounded-lg border bg-muted/30 p-3">
<summary className="cursor-pointer font-medium text-muted-foreground text-sm hover:text-foreground">
View full transcript{transcriptEntries ? ` (${transcriptEntries} entries)` : ""}
</summary>
<pre className="mt-2 max-h-96 overflow-y-auto whitespace-pre-wrap text-muted-foreground text-xs">
{transcript}
</pre>
</details>
)}
</div>
);
}
/**
* Generate Podcast Tool UI Component
*
* This component is registered with assistant-ui to render custom UI
* when the generate_podcast tool is called by the agent.
*
* It fetches the podcast audio with authentication (like the old system)
* and displays it using the Audio component.
*/
export const GeneratePodcastToolUI = makeAssistantToolUI<
GeneratePodcastArgs,
GeneratePodcastResult
>({
toolName: "generate_podcast",
render: function GeneratePodcastUI({ args, result, status }) {
const title = args.podcast_title || "SurfSense Podcast";
// Loading state - podcast is being generated
if (status.type === "running" || status.type === "requires-action") {
return <PodcastGeneratingState title={title} />;
}
// Incomplete/cancelled state
if (status.type === "incomplete") {
if (status.reason === "cancelled") {
return (
<div className="my-4 rounded-xl border border-muted p-4 text-muted-foreground">
<p className="flex items-center gap-2">
<MicIcon className="size-4" />
<span className="line-through">Podcast generation cancelled</span>
</p>
</div>
);
}
if (status.reason === "error") {
return (
<PodcastErrorState
title={title}
error={typeof status.error === "string" ? status.error : "An error occurred"}
/>
);
}
}
// No result yet
if (!result) {
return <PodcastGeneratingState title={title} />;
}
// Error result
if (result.status === "error") {
return <PodcastErrorState title={title} error={result.error || "Unknown error"} />;
}
// Success - need podcast_id to fetch with auth
if (!result.podcast_id) {
return <PodcastErrorState title={title} error="Missing podcast ID" />;
}
// Render the podcast player (handles auth fetch internally)
return (
<PodcastPlayer
podcastId={result.podcast_id}
title={result.title || title}
description={
result.transcript_entries
? `${result.transcript_entries} dialogue entries`
: "SurfSense AI-generated podcast"
}
durationMs={result.duration_ms}
transcript={result.transcript}
transcriptEntries={result.transcript_entries}
/>
);
},
});

View file

@ -0,0 +1,11 @@
/**
* Tool UI Components
*
* This module exports custom UI components for assistant tools.
* These components are registered with assistant-ui to render
* rich UI when specific tools are called by the agent.
*/
export { Audio } from "./audio";
export { GeneratePodcastToolUI } from "./generate-podcast";

View file

@ -11,6 +11,22 @@ interface NewChatAdapterConfig {
chatId: number;
}
/**
* Represents an in-progress or completed tool call
*/
interface ToolCallState {
toolCallId: string;
toolName: string;
args: Record<string, unknown>;
result?: unknown;
}
/**
* Tools that should render custom UI in the chat.
* Other tools (like search_knowledge_base) will be hidden from the UI.
*/
const TOOLS_WITH_UI = new Set(["generate_podcast"]);
/**
* Creates a ChatModelAdapter that connects to the FastAPI new_chat endpoint.
*
@ -77,6 +93,41 @@ export function createNewChatAdapter(config: NewChatAdapterConfig): ChatModelAda
let buffer = "";
let accumulatedText = "";
// Track tool calls by their ID
const toolCalls = new Map<string, ToolCallState>();
/**
* Build the content array with text and tool calls.
* Only includes tools that have custom UI (defined in TOOLS_WITH_UI).
*/
function buildContent() {
const content: Array<
| { type: "text"; text: string }
| { type: "tool-call"; toolCallId: string; toolName: string; args: Record<string, unknown>; result?: unknown }
> = [];
// Add text content if any
if (accumulatedText) {
content.push({ type: "text" as const, text: accumulatedText });
}
// Only add tool calls that have custom UI registered
// Other tools (like search_knowledge_base) are hidden from the UI
for (const toolCall of toolCalls.values()) {
if (TOOLS_WITH_UI.has(toolCall.toolName)) {
content.push({
type: "tool-call" as const,
toolCallId: toolCall.toolCallId,
toolName: toolCall.toolName,
args: toolCall.args,
result: toolCall.result,
});
}
}
return content;
}
try {
while (true) {
const { done, value } = await reader.read();
@ -113,16 +164,56 @@ export function createNewChatAdapter(config: NewChatAdapterConfig): ChatModelAda
switch (parsed.type) {
case "text-delta":
accumulatedText += parsed.delta;
yield {
content: [{ type: "text" as const, text: accumulatedText }],
};
yield { content: buildContent() };
break;
case "tool-input-start": {
// Tool call is starting - create a new tool call entry
const { toolCallId, toolName } = parsed;
toolCalls.set(toolCallId, {
toolCallId,
toolName,
args: {},
});
// Yield to show tool is starting (running state)
yield { content: buildContent() };
break;
}
case "tool-input-available": {
// Tool input is complete - update the args
const { toolCallId, toolName, input } = parsed;
const existing = toolCalls.get(toolCallId);
if (existing) {
existing.args = input || {};
} else {
// Create new entry if we missed tool-input-start
toolCalls.set(toolCallId, {
toolCallId,
toolName,
args: input || {},
});
}
yield { content: buildContent() };
break;
}
case "tool-output-available": {
// Tool execution is complete - add the result
const { toolCallId, output } = parsed;
const existing = toolCalls.get(toolCallId);
if (existing) {
existing.result = output;
}
yield { content: buildContent() };
break;
}
case "error":
throw new Error(parsed.errorText || "Unknown error from server");
// Other types like text-start, text-end, tool-*, etc.
// are handled implicitly - we just accumulate text deltas
// Other types like text-start, text-end, start-step, finish-step, etc.
// are handled implicitly
default:
break;
}
@ -148,9 +239,14 @@ export function createNewChatAdapter(config: NewChatAdapterConfig): ChatModelAda
const parsed = JSON.parse(data);
if (parsed.type === "text-delta") {
accumulatedText += parsed.delta;
yield {
content: [{ type: "text" as const, text: accumulatedText }],
};
yield { content: buildContent() };
} else if (parsed.type === "tool-output-available") {
const { toolCallId, output } = parsed;
const existing = toolCalls.get(toolCallId);
if (existing) {
existing.result = output;
}
yield { content: buildContent() };
}
} catch {
// Ignore parse errors