diff --git a/.gitignore b/.gitignore
index a44664ad5..a5c44ce73 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,5 +5,4 @@ node_modules/
.ruff_cache/
.venv
.pnpm-store
-.DS_Store
-RemotionTets/
\ No newline at end of file
+.DS_Store
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 2c4784c0e..029e7c647 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -22,7 +22,11 @@
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/surfsense_backend",
- "python": "${command:python.interpreterPath}"
+ "python": "uv",
+ "pythonArgs": [
+ "run",
+ "python"
+ ]
},
{
"name": "Backend: FastAPI (No Reload)",
@@ -32,7 +36,11 @@
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/surfsense_backend",
- "python": "${command:python.interpreterPath}"
+ "python": "uv",
+ "pythonArgs": [
+ "run",
+ "python"
+ ]
},
{
"name": "Backend: FastAPI (main.py)",
@@ -41,14 +49,19 @@
"program": "${workspaceFolder}/surfsense_backend/main.py",
"console": "integratedTerminal",
"justMyCode": false,
- "cwd": "${workspaceFolder}/surfsense_backend"
+ "cwd": "${workspaceFolder}/surfsense_backend",
+ "python": "uv",
+ "pythonArgs": [
+ "run",
+ "python"
+ ]
},
{
"name": "Frontend: Next.js",
"type": "node",
"request": "launch",
"cwd": "${workspaceFolder}/surfsense_web",
- "runtimeExecutable": "npm",
+ "runtimeExecutable": "pnpm",
"runtimeArgs": ["run", "dev"],
"console": "integratedTerminal",
"serverReadyAction": {
@@ -62,7 +75,7 @@
"type": "node",
"request": "launch",
"cwd": "${workspaceFolder}/surfsense_web",
- "runtimeExecutable": "npm",
+ "runtimeExecutable": "pnpm",
"runtimeArgs": ["run", "debug:server"],
"console": "integratedTerminal",
"serverReadyAction": {
@@ -87,7 +100,11 @@
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/surfsense_backend",
- "python": "${command:python.interpreterPath}"
+ "python": "uv",
+ "pythonArgs": [
+ "run",
+ "python"
+ ]
},
{
"name": "Celery: Beat Scheduler",
@@ -103,7 +120,11 @@
"console": "integratedTerminal",
"justMyCode": false,
"cwd": "${workspaceFolder}/surfsense_backend",
- "python": "${command:python.interpreterPath}"
+ "python": "uv",
+ "pythonArgs": [
+ "run",
+ "python"
+ ]
}
],
"compounds": [
diff --git a/surfsense_backend/.gitignore b/surfsense_backend/.gitignore
index 443c85e9c..1cd7fd32c 100644
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@@ -6,6 +6,7 @@ __pycache__/
.flashrank_cache
surf_new_backend.egg-info/
podcasts/
+video_presentation_audio/
sandbox_files/
temp_audio/
celerybeat-schedule*
diff --git a/surfsense_backend/alembic/versions/107_add_video_presentations_table.py b/surfsense_backend/alembic/versions/107_add_video_presentations_table.py
new file mode 100644
index 000000000..e6f928b50
--- /dev/null
+++ b/surfsense_backend/alembic/versions/107_add_video_presentations_table.py
@@ -0,0 +1,85 @@
+"""Add video_presentations table and video_presentation_status enum
+
+Revision ID: 107
+Revises: 106
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import JSONB
+
+from alembic import op
+
+revision: str = "107"
+down_revision: str | None = "106"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+video_presentation_status_enum = sa.Enum(
+ "pending",
+ "generating",
+ "ready",
+ "failed",
+ name="video_presentation_status",
+)
+
+
+def upgrade() -> None:
+ video_presentation_status_enum.create(op.get_bind(), checkfirst=True)
+
+ op.create_table(
+ "video_presentations",
+ sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
+ sa.Column("title", sa.String(length=500), nullable=False),
+ sa.Column("slides", JSONB(), nullable=True),
+ sa.Column("scene_codes", JSONB(), nullable=True),
+ sa.Column(
+ "status",
+ video_presentation_status_enum,
+ server_default="ready",
+ nullable=False,
+ ),
+ sa.Column("search_space_id", sa.Integer(), nullable=False),
+ sa.Column("thread_id", sa.Integer(), nullable=True),
+ sa.Column(
+ "created_at",
+ sa.TIMESTAMP(timezone=True),
+ server_default=sa.text("now()"),
+ nullable=False,
+ ),
+ sa.ForeignKeyConstraint(
+ ["search_space_id"],
+ ["searchspaces.id"],
+ ondelete="CASCADE",
+ ),
+ sa.ForeignKeyConstraint(
+ ["thread_id"],
+ ["new_chat_threads.id"],
+ ondelete="SET NULL",
+ ),
+ sa.PrimaryKeyConstraint("id"),
+ )
+ op.create_index(
+ "ix_video_presentations_status",
+ "video_presentations",
+ ["status"],
+ )
+ op.create_index(
+ "ix_video_presentations_thread_id",
+ "video_presentations",
+ ["thread_id"],
+ )
+ op.create_index(
+ "ix_video_presentations_created_at",
+ "video_presentations",
+ ["created_at"],
+ )
+
+
+def downgrade() -> None:
+ op.drop_index("ix_video_presentations_created_at", table_name="video_presentations")
+ op.drop_index("ix_video_presentations_thread_id", table_name="video_presentations")
+ op.drop_index("ix_video_presentations_status", table_name="video_presentations")
+ op.drop_table("video_presentations")
+ video_presentation_status_enum.drop(op.get_bind(), checkfirst=True)
diff --git a/surfsense_backend/app/agents/new_chat/system_prompt.py b/surfsense_backend/app/agents/new_chat/system_prompt.py
index cff13e8c6..f8ac62787 100644
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@@ -132,6 +132,17 @@ _TOOL_INSTRUCTIONS["generate_podcast"] = """
- After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes).
"""
+_TOOL_INSTRUCTIONS["generate_video_presentation"] = """
+- generate_video_presentation: Generate a video presentation from provided content.
+ - Use this when the user asks to create a video, presentation, slides, or slide deck.
+ - Trigger phrases: "give me a presentation", "create slides", "generate a video", "make a slide deck", "turn this into a presentation"
+ - Args:
+ - source_content: The text content to turn into a presentation. The more detailed, the better.
+ - video_title: Optional title (default: "SurfSense Presentation")
+ - user_prompt: Optional style instructions (e.g., "Make it technical and detailed")
+ - After calling this tool, inform the user that generation has started and they will see the presentation when it's ready.
+"""
+
_TOOL_INSTRUCTIONS["generate_report"] = """
- generate_report: Generate or revise a structured Markdown report artifact.
- WHEN TO CALL THIS TOOL — the message must contain a creation or modification VERB directed at producing a deliverable:
@@ -438,6 +449,16 @@ _TOOL_EXAMPLES["generate_podcast"] = """
- Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")`
"""
+_TOOL_EXAMPLES["generate_video_presentation"] = """
+- User: "Give me a presentation about AI trends based on what we discussed"
+ - First search for relevant content, then call: `generate_video_presentation(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", video_title="AI Trends Presentation")`
+- User: "Create slides summarizing this conversation"
+ - Call: `generate_video_presentation(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", video_title="Conversation Summary")`
+- User: "Make a video presentation about quantum computing"
+ - First search: `search_knowledge_base(query="quantum computing")`
+ - Then: `generate_video_presentation(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", video_title="Quantum Computing Explained")`
+"""
+
_TOOL_EXAMPLES["generate_report"] = """
- User: "Generate a report about AI trends"
- Call: `generate_report(topic="AI Trends Report", source_strategy="kb_search", search_queries=["AI trends recent developments", "artificial intelligence industry trends", "AI market growth and predictions"], report_style="detailed")`
@@ -499,6 +520,7 @@ _ALL_TOOL_NAMES_ORDERED = [
"search_knowledge_base",
"web_search",
"generate_podcast",
+ "generate_video_presentation",
"generate_report",
"link_preview",
"display_image",
diff --git a/surfsense_backend/app/agents/new_chat/tools/__init__.py b/surfsense_backend/app/agents/new_chat/tools/__init__.py
index 0a11951f0..5002e69bb 100644
--- a/surfsense_backend/app/agents/new_chat/tools/__init__.py
+++ b/surfsense_backend/app/agents/new_chat/tools/__init__.py
@@ -8,6 +8,7 @@ Available tools:
- search_knowledge_base: Search the user's personal knowledge base
- search_surfsense_docs: Search Surfsense documentation for usage help
- generate_podcast: Generate audio podcasts from content
+- generate_video_presentation: Generate video presentations with slides and narration
- generate_image: Generate images from text descriptions using AI models
- link_preview: Fetch rich previews for URLs
- display_image: Display images in chat
@@ -39,6 +40,7 @@ from .registry import (
from .scrape_webpage import create_scrape_webpage_tool
from .search_surfsense_docs import create_search_surfsense_docs_tool
from .user_memory import create_recall_memory_tool, create_save_memory_tool
+from .video_presentation import create_generate_video_presentation_tool
__all__ = [
# Registry
@@ -51,6 +53,7 @@ __all__ = [
"create_display_image_tool",
"create_generate_image_tool",
"create_generate_podcast_tool",
+ "create_generate_video_presentation_tool",
"create_link_preview_tool",
"create_recall_memory_tool",
"create_save_memory_tool",
diff --git a/surfsense_backend/app/agents/new_chat/tools/registry.py b/surfsense_backend/app/agents/new_chat/tools/registry.py
index 6f2e36b08..4feff7d90 100644
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@@ -73,6 +73,7 @@ from .shared_memory import (
create_save_shared_memory_tool,
)
from .user_memory import create_recall_memory_tool, create_save_memory_tool
+from .video_presentation import create_generate_video_presentation_tool
from .web_search import create_web_search_tool
# =============================================================================
@@ -136,6 +137,17 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
),
requires=["search_space_id", "db_session", "thread_id"],
),
+ # Video presentation generation tool
+ ToolDefinition(
+ name="generate_video_presentation",
+ description="Generate a video presentation with slides and narration from provided content",
+ factory=lambda deps: create_generate_video_presentation_tool(
+ search_space_id=deps["search_space_id"],
+ db_session=deps["db_session"],
+ thread_id=deps["thread_id"],
+ ),
+ requires=["search_space_id", "db_session", "thread_id"],
+ ),
# Report generation tool (inline, short-lived sessions for DB ops)
# Supports internal KB search via source_strategy so the agent doesn't
# need to call search_knowledge_base separately before generating.
diff --git a/surfsense_backend/app/agents/new_chat/tools/video_presentation.py b/surfsense_backend/app/agents/new_chat/tools/video_presentation.py
new file mode 100644
index 000000000..685399103
--- /dev/null
+++ b/surfsense_backend/app/agents/new_chat/tools/video_presentation.py
@@ -0,0 +1,171 @@
+"""
+Video presentation generation tool for the SurfSense agent.
+
+This module provides a factory function for creating the generate_video_presentation
+tool that submits a Celery task for background video presentation generation.
+The frontend polls for completion and auto-updates when the presentation is ready.
+
+Duplicate request prevention:
+- Only one video presentation can be generated at a time per search space
+- Uses Redis to track active video presentation tasks
+- Validates the Redis marker against actual DB status to avoid stale locks
+"""
+
+from typing import Any
+
+import redis
+from langchain_core.tools import tool
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.config import config
+from app.db import VideoPresentation, VideoPresentationStatus
+
+REDIS_URL = config.REDIS_APP_URL
+_redis_client: redis.Redis | None = None
+
+
+def get_redis_client() -> redis.Redis:
+ """Get or create Redis client for video presentation task tracking."""
+ global _redis_client
+ if _redis_client is None:
+ _redis_client = redis.from_url(REDIS_URL, decode_responses=True)
+ return _redis_client
+
+
+def _redis_key(search_space_id: int) -> str:
+ return f"video_presentation:generating:{search_space_id}"
+
+
+def get_generating_video_presentation_id(search_space_id: int) -> int | None:
+ """Get the video presentation ID currently being generated for this search space."""
+ try:
+ client = get_redis_client()
+ value = client.get(_redis_key(search_space_id))
+ return int(value) if value else None
+ except Exception:
+ return None
+
+
+def clear_generating_video_presentation(search_space_id: int) -> None:
+ """Clear the generating marker (used when we detect a stale lock)."""
+ try:
+ client = get_redis_client()
+ client.delete(_redis_key(search_space_id))
+ except Exception:
+ pass
+
+
+def set_generating_video_presentation(
+ search_space_id: int, video_presentation_id: int
+) -> None:
+ """Mark a video presentation as currently generating for this search space."""
+ try:
+ client = get_redis_client()
+ client.setex(_redis_key(search_space_id), 1800, str(video_presentation_id))
+ except Exception as e:
+ print(
+ f"[generate_video_presentation] Warning: Could not set generating video presentation in Redis: {e}"
+ )
+
+
+def create_generate_video_presentation_tool(
+ search_space_id: int,
+ db_session: AsyncSession,
+ thread_id: int | None = None,
+):
+ """
+ Factory function to create the generate_video_presentation tool with injected dependencies.
+
+ Pre-creates video presentation record with pending status so the ID is available
+ immediately for frontend polling.
+ """
+
+ @tool
+ async def generate_video_presentation(
+ source_content: str,
+ video_title: str = "SurfSense Presentation",
+ user_prompt: str | None = None,
+ ) -> dict[str, Any]:
+ """Generate a video presentation from the provided content.
+
+ Use this tool when the user asks to create a video, presentation, slides, or slide deck.
+
+ Args:
+ source_content: The text content to turn into a presentation.
+ video_title: Title for the presentation (default: "SurfSense Presentation")
+ user_prompt: Optional style/tone instructions.
+ """
+ try:
+ generating_id = get_generating_video_presentation_id(search_space_id)
+ if generating_id:
+ result = await db_session.execute(
+ select(VideoPresentation).filter(
+ VideoPresentation.id == generating_id
+ )
+ )
+ existing = result.scalars().first()
+
+ if existing and existing.status == VideoPresentationStatus.GENERATING:
+ print(
+ f"[generate_video_presentation] Blocked duplicate — "
+ f"presentation {generating_id} is actively generating"
+ )
+ return {
+ "status": VideoPresentationStatus.GENERATING.value,
+ "video_presentation_id": generating_id,
+ "title": video_title,
+ "message": "A video presentation is already being generated. Please wait for it to complete.",
+ }
+
+ print(
+ f"[generate_video_presentation] Stale Redis lock for presentation {generating_id} "
+ f"(status={existing.status if existing else 'not found'}). Clearing and proceeding."
+ )
+ clear_generating_video_presentation(search_space_id)
+
+ video_pres = VideoPresentation(
+ title=video_title,
+ status=VideoPresentationStatus.PENDING,
+ search_space_id=search_space_id,
+ thread_id=thread_id,
+ )
+ db_session.add(video_pres)
+ await db_session.commit()
+ await db_session.refresh(video_pres)
+
+ from app.tasks.celery_tasks.video_presentation_tasks import (
+ generate_video_presentation_task,
+ )
+
+ task = generate_video_presentation_task.delay(
+ video_presentation_id=video_pres.id,
+ source_content=source_content,
+ search_space_id=search_space_id,
+ user_prompt=user_prompt,
+ )
+
+ set_generating_video_presentation(search_space_id, video_pres.id)
+
+ print(
+ f"[generate_video_presentation] Created video presentation {video_pres.id}, task: {task.id}"
+ )
+
+ return {
+ "status": VideoPresentationStatus.PENDING.value,
+ "video_presentation_id": video_pres.id,
+ "title": video_title,
+ "message": "Video presentation generation started. This may take a few minutes.",
+ }
+
+ except Exception as e:
+ error_message = str(e)
+ print(f"[generate_video_presentation] Error: {error_message}")
+ return {
+ "status": VideoPresentationStatus.FAILED.value,
+ "error": error_message,
+ "title": video_title,
+ "video_presentation_id": None,
+ }
+
+ return generate_video_presentation
diff --git a/surfsense_backend/app/agents/video_presentation/__init__.py b/surfsense_backend/app/agents/video_presentation/__init__.py
new file mode 100644
index 000000000..caf885218
--- /dev/null
+++ b/surfsense_backend/app/agents/video_presentation/__init__.py
@@ -0,0 +1,10 @@
+"""Video Presentation LangGraph Agent.
+
+This module defines a graph for generating video presentations
+from source content, similar to the podcaster agent but producing
+slide-based video presentations with TTS narration.
+"""
+
+from .graph import graph
+
+__all__ = ["graph"]
diff --git a/surfsense_backend/app/agents/video_presentation/configuration.py b/surfsense_backend/app/agents/video_presentation/configuration.py
new file mode 100644
index 000000000..18724a2ab
--- /dev/null
+++ b/surfsense_backend/app/agents/video_presentation/configuration.py
@@ -0,0 +1,25 @@
+"""Define the configurable parameters for the video presentation agent."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, fields
+
+from langchain_core.runnables import RunnableConfig
+
+
+@dataclass(kw_only=True)
+class Configuration:
+ """The configuration for the video presentation agent."""
+
+ video_title: str
+ search_space_id: int
+ user_prompt: str | None = None
+
+ @classmethod
+ def from_runnable_config(
+ cls, config: RunnableConfig | None = None
+ ) -> Configuration:
+ """Create a Configuration instance from a RunnableConfig object."""
+ configurable = (config.get("configurable") or {}) if config else {}
+ _fields = {f.name for f in fields(cls) if f.init}
+ return cls(**{k: v for k, v in configurable.items() if k in _fields})
diff --git a/surfsense_backend/app/agents/video_presentation/graph.py b/surfsense_backend/app/agents/video_presentation/graph.py
new file mode 100644
index 000000000..2fe548028
--- /dev/null
+++ b/surfsense_backend/app/agents/video_presentation/graph.py
@@ -0,0 +1,30 @@
+from langgraph.graph import StateGraph
+
+from .configuration import Configuration
+from .nodes import (
+ create_presentation_slides,
+ create_slide_audio,
+ generate_slide_scene_codes,
+)
+from .state import State
+
+
+def build_graph():
+ workflow = StateGraph(State, config_schema=Configuration)
+
+ workflow.add_node("create_presentation_slides", create_presentation_slides)
+ workflow.add_node("create_slide_audio", create_slide_audio)
+ workflow.add_node("generate_slide_scene_codes", generate_slide_scene_codes)
+
+ workflow.add_edge("__start__", "create_presentation_slides")
+ workflow.add_edge("create_presentation_slides", "create_slide_audio")
+ workflow.add_edge("create_slide_audio", "generate_slide_scene_codes")
+ workflow.add_edge("generate_slide_scene_codes", "__end__")
+
+ graph = workflow.compile()
+ graph.name = "Surfsense Video Presentation"
+
+ return graph
+
+
+graph = build_graph()
diff --git a/surfsense_backend/app/agents/video_presentation/nodes.py b/surfsense_backend/app/agents/video_presentation/nodes.py
new file mode 100644
index 000000000..c11174c6f
--- /dev/null
+++ b/surfsense_backend/app/agents/video_presentation/nodes.py
@@ -0,0 +1,552 @@
+import asyncio
+import contextlib
+import json
+import math
+import os
+import shutil
+import uuid
+from pathlib import Path
+from typing import Any
+
+from ffmpeg.asyncio import FFmpeg
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.runnables import RunnableConfig
+from litellm import aspeech
+
+from app.config import config as app_config
+from app.services.kokoro_tts_service import get_kokoro_tts_service
+from app.services.llm_service import get_agent_llm
+
+from .configuration import Configuration
+from .prompts import (
+ DEFAULT_DURATION_IN_FRAMES,
+ FPS,
+ REFINE_SCENE_SYSTEM_PROMPT,
+ REMOTION_SCENE_SYSTEM_PROMPT,
+ THEME_PRESETS,
+ build_scene_generation_user_prompt,
+ build_theme_assignment_user_prompt,
+ get_slide_generation_prompt,
+ get_theme_assignment_system_prompt,
+ pick_theme_and_mode_fallback,
+)
+from .state import (
+ PresentationSlides,
+ SlideAudioResult,
+ SlideContent,
+ SlideSceneCode,
+ State,
+)
+from .utils import get_voice_for_provider
+
+MAX_REFINE_ATTEMPTS = 3
+
+
+async def create_presentation_slides(
+ state: State, config: RunnableConfig
+) -> dict[str, Any]:
+ """Parse source content into structured presentation slides using LLM."""
+
+ configuration = Configuration.from_runnable_config(config)
+ search_space_id = configuration.search_space_id
+ user_prompt = configuration.user_prompt
+
+ llm = await get_agent_llm(state.db_session, search_space_id)
+ if not llm:
+ error_message = f"No LLM configured for search space {search_space_id}"
+ print(error_message)
+ raise RuntimeError(error_message)
+
+ prompt = get_slide_generation_prompt(user_prompt)
+
+ messages = [
+ SystemMessage(content=prompt),
+ HumanMessage(
+ content=f"{state.source_content}"
+ ),
+ ]
+
+ llm_response = await llm.ainvoke(messages)
+
+ try:
+ presentation = PresentationSlides.model_validate(
+ json.loads(llm_response.content)
+ )
+ except (json.JSONDecodeError, ValueError) as e:
+ print(f"Direct JSON parsing failed, trying fallback approach: {e!s}")
+
+ try:
+ content = llm_response.content
+ json_start = content.find("{")
+ json_end = content.rfind("}") + 1
+ if json_start >= 0 and json_end > json_start:
+ json_str = content[json_start:json_end]
+ parsed_data = json.loads(json_str)
+ presentation = PresentationSlides.model_validate(parsed_data)
+ print("Successfully parsed presentation slides using fallback approach")
+ else:
+ error_message = f"Could not find valid JSON in LLM response. Raw response: {content}"
+ print(error_message)
+ raise ValueError(error_message)
+
+ except (json.JSONDecodeError, ValueError) as e2:
+ error_message = f"Error parsing LLM response (fallback also failed): {e2!s}"
+ print(f"Error parsing LLM response: {e2!s}")
+ print(f"Raw response: {llm_response.content}")
+ raise
+
+ return {"slides": presentation.slides}
+
+
+async def create_slide_audio(state: State, config: RunnableConfig) -> dict[str, Any]:
+ """Generate TTS audio for each slide.
+
+ Each slide's speaker_transcripts are generated as individual TTS chunks,
+ then concatenated with ffmpeg (matching the POC in RemotionTets/api/tts).
+ """
+
+ session_id = str(uuid.uuid4())
+ temp_dir = Path("temp_audio")
+ temp_dir.mkdir(exist_ok=True)
+ output_dir = Path("video_presentation_audio")
+ output_dir.mkdir(exist_ok=True)
+
+ slides = state.slides or []
+ voice = get_voice_for_provider(app_config.TTS_SERVICE, speaker_id=0)
+ ext = "wav" if app_config.TTS_SERVICE == "local/kokoro" else "mp3"
+
+ async def _generate_tts_chunk(text: str, chunk_path: str) -> str:
+ """Generate a single TTS chunk and write it to *chunk_path*."""
+ if app_config.TTS_SERVICE == "local/kokoro":
+ kokoro_service = await get_kokoro_tts_service(lang_code="a")
+ await kokoro_service.generate_speech(
+ text=text,
+ voice=voice,
+ speed=1.0,
+ output_path=chunk_path,
+ )
+ else:
+ kwargs: dict[str, Any] = {
+ "model": app_config.TTS_SERVICE,
+ "api_key": app_config.TTS_SERVICE_API_KEY,
+ "voice": voice,
+ "input": text,
+ "max_retries": 2,
+ "timeout": 600,
+ }
+ if app_config.TTS_SERVICE_API_BASE:
+ kwargs["api_base"] = app_config.TTS_SERVICE_API_BASE
+
+ response = await aspeech(**kwargs)
+ with open(chunk_path, "wb") as f:
+ f.write(response.content)
+
+ return chunk_path
+
+ async def _concat_with_ffmpeg(chunk_paths: list[str], output_file: str) -> None:
+ """Concatenate multiple audio chunks into one file using async ffmpeg."""
+ ffmpeg = FFmpeg().option("y")
+ for chunk in chunk_paths:
+ ffmpeg = ffmpeg.input(chunk)
+
+ filter_parts = [f"[{i}:0]" for i in range(len(chunk_paths))]
+ filter_str = (
+ "".join(filter_parts) + f"concat=n={len(chunk_paths)}:v=0:a=1[outa]"
+ )
+ ffmpeg = ffmpeg.option("filter_complex", filter_str)
+ ffmpeg = ffmpeg.output(output_file, map="[outa]")
+ await ffmpeg.execute()
+
+ async def generate_audio_for_slide(slide: SlideContent) -> SlideAudioResult:
+ has_transcripts = (
+ slide.speaker_transcripts and len(slide.speaker_transcripts) > 0
+ )
+
+ if not has_transcripts:
+ print(
+ f"Slide {slide.slide_number}: no speaker_transcripts, "
+ f"using default duration ({DEFAULT_DURATION_IN_FRAMES} frames)"
+ )
+ return SlideAudioResult(
+ slide_number=slide.slide_number,
+ audio_file="",
+ duration_seconds=DEFAULT_DURATION_IN_FRAMES / FPS,
+ duration_in_frames=DEFAULT_DURATION_IN_FRAMES,
+ )
+
+ output_file = str(output_dir / f"{session_id}_slide_{slide.slide_number}.{ext}")
+
+ chunk_paths: list[str] = []
+ try:
+ for i, text in enumerate(slide.speaker_transcripts):
+ chunk_path = str(
+ temp_dir
+ / f"{session_id}_slide_{slide.slide_number}_chunk_{i}.{ext}"
+ )
+ print(
+ f" Slide {slide.slide_number} chunk {i + 1}/"
+ f"{len(slide.speaker_transcripts)}: "
+ f'"{text[:60]}..."'
+ )
+ await _generate_tts_chunk(text, chunk_path)
+ chunk_paths.append(chunk_path)
+
+ if len(chunk_paths) == 1:
+ shutil.move(chunk_paths[0], output_file)
+ else:
+ print(
+ f" Concatenating {len(chunk_paths)} chunks for slide "
+ f"{slide.slide_number} with ffmpeg"
+ )
+ await _concat_with_ffmpeg(chunk_paths, output_file)
+
+ duration_seconds = await _get_audio_duration(output_file)
+ duration_in_frames = math.ceil(duration_seconds * FPS)
+
+ return SlideAudioResult(
+ slide_number=slide.slide_number,
+ audio_file=output_file,
+ duration_seconds=duration_seconds,
+ duration_in_frames=max(duration_in_frames, DEFAULT_DURATION_IN_FRAMES),
+ )
+
+ except Exception as e:
+ print(f"Error generating audio for slide {slide.slide_number}: {e!s}")
+ raise
+ finally:
+ for p in chunk_paths:
+ with contextlib.suppress(OSError):
+ os.remove(p)
+
+ tasks = [generate_audio_for_slide(slide) for slide in slides]
+ audio_results = await asyncio.gather(*tasks)
+
+ audio_results_sorted = sorted(audio_results, key=lambda r: r.slide_number)
+
+ print(
+ f"Generated audio for {len(audio_results_sorted)} slides "
+ f"(total duration: {sum(r.duration_seconds for r in audio_results_sorted):.1f}s)"
+ )
+
+ return {"slide_audio_results": audio_results_sorted}
+
+
+async def _get_audio_duration(file_path: str) -> float:
+ """Get audio duration in seconds using ffprobe (via python-ffmpeg).
+
+ Falls back to file-size estimation if ffprobe fails.
+ """
+ try:
+ import subprocess
+
+ proc = await asyncio.create_subprocess_exec(
+ "ffprobe",
+ "-v",
+ "error",
+ "-show_entries",
+ "format=duration",
+ "-of",
+ "default=noprint_wrappers=1:nokey=1",
+ file_path,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+ stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
+ if proc.returncode == 0 and stdout.strip():
+ return float(stdout.strip())
+ except Exception as e:
+ print(f"ffprobe failed for {file_path}: {e!s}, using file-size estimation")
+
+ try:
+ file_size = os.path.getsize(file_path)
+ if file_path.endswith(".wav"):
+ return file_size / (16000 * 2)
+ else:
+ return file_size / 16000
+ except Exception:
+ return DEFAULT_DURATION_IN_FRAMES / FPS
+
+
+async def _assign_themes_with_llm(
+ llm, slides: list[SlideContent]
+) -> dict[int, tuple[str, str]]:
+ """Ask the LLM to assign a theme+mode to each slide in one call.
+
+ Returns a dict mapping slide_number → (theme, mode).
+ Falls back to round-robin if the LLM response can't be parsed.
+ """
+ total = len(slides)
+ slide_summaries = [
+ {
+ "slide_number": s.slide_number,
+ "title": s.title,
+ "subtitle": s.subtitle or "",
+ "background_explanation": s.background_explanation or "",
+ }
+ for s in slides
+ ]
+
+ system = get_theme_assignment_system_prompt()
+ user = build_theme_assignment_user_prompt(slide_summaries)
+
+ try:
+ response = await llm.ainvoke(
+ [
+ SystemMessage(content=system),
+ HumanMessage(content=user),
+ ]
+ )
+
+ text = response.content.strip()
+ if text.startswith("```"):
+ lines = text.split("\n")
+ text = "\n".join(
+ line for line in lines if not line.strip().startswith("```")
+ ).strip()
+
+ assignments = json.loads(text)
+ valid_themes = set(THEME_PRESETS)
+ result: dict[int, tuple[str, str]] = {}
+ for entry in assignments:
+ sn = entry.get("slide_number")
+ theme = entry.get("theme", "").upper()
+ mode = entry.get("mode", "dark").lower()
+ if sn and theme in valid_themes and mode in ("dark", "light"):
+ result[sn] = (theme, mode)
+
+ if len(result) == total:
+ print(
+ "LLM theme assignment: "
+ + ", ".join(f"S{sn}={t}/{m}" for sn, (t, m) in sorted(result.items()))
+ )
+ return result
+
+ print(
+ f"LLM returned {len(result)}/{total} valid assignments, "
+ "filling gaps with fallback"
+ )
+ for s in slides:
+ if s.slide_number not in result:
+ result[s.slide_number] = pick_theme_and_mode_fallback(
+ s.slide_number - 1, total
+ )
+ return result
+
+ except Exception as e:
+ print(f"LLM theme assignment failed ({e!s}), using fallback")
+ return {
+ s.slide_number: pick_theme_and_mode_fallback(s.slide_number - 1, total)
+ for s in slides
+ }
+
+
+async def generate_slide_scene_codes(
+ state: State, config: RunnableConfig
+) -> dict[str, Any]:
+ """Generate Remotion component code for each slide using LLM.
+
+ First assigns a theme+mode to every slide via a single LLM call,
+ then generates scene code per slide with the assigned theme.
+ """
+
+ configuration = Configuration.from_runnable_config(config)
+ search_space_id = configuration.search_space_id
+
+ llm = await get_agent_llm(state.db_session, search_space_id)
+ if not llm:
+ raise RuntimeError(f"No LLM configured for search space {search_space_id}")
+
+ slides = state.slides or []
+ audio_results = state.slide_audio_results or []
+
+ audio_map: dict[int, SlideAudioResult] = {r.slide_number: r for r in audio_results}
+ total_slides = len(slides)
+
+ theme_assignments = await _assign_themes_with_llm(llm, slides)
+
+ scene_codes: list[SlideSceneCode] = []
+
+ for slide in slides:
+ audio = audio_map.get(slide.slide_number)
+ duration = audio.duration_in_frames if audio else DEFAULT_DURATION_IN_FRAMES
+
+ theme, mode = theme_assignments.get(
+ slide.slide_number,
+ pick_theme_and_mode_fallback(slide.slide_number - 1, total_slides),
+ )
+
+ user_prompt = build_scene_generation_user_prompt(
+ slide_number=slide.slide_number,
+ total_slides=total_slides,
+ title=slide.title,
+ subtitle=slide.subtitle,
+ content_in_markdown=slide.content_in_markdown,
+ background_explanation=slide.background_explanation,
+ duration_in_frames=duration,
+ theme=theme,
+ mode=mode,
+ )
+
+ messages = [
+ SystemMessage(content=REMOTION_SCENE_SYSTEM_PROMPT),
+ HumanMessage(content=user_prompt),
+ ]
+
+ print(
+ f"Generating scene code for slide {slide.slide_number}/{total_slides}: "
+ f'"{slide.title}" ({duration} frames)'
+ )
+
+ llm_response = await llm.ainvoke(messages)
+ code, scene_title = _extract_code_and_title(llm_response.content)
+
+ code = await _refine_if_needed(llm, code, slide.slide_number)
+
+ scene_codes.append(
+ SlideSceneCode(
+ slide_number=slide.slide_number,
+ code=code,
+ title=scene_title or slide.title,
+ )
+ )
+
+ print(f"Scene code ready for slide {slide.slide_number} ({len(code)} chars)")
+
+ return {"slide_scene_codes": scene_codes}
+
+
+def _extract_code_and_title(content: str) -> tuple[str, str | None]:
+ """Extract code and optional title from LLM response.
+
+ The LLM may return a JSON object like the POC's structured output:
+ { "code": "...", "title": "..." }
+ Or it may return raw code (with optional markdown fences).
+
+ Returns (code, title) where title may be None.
+ """
+ text = content.strip()
+
+ if text.startswith("{"):
+ try:
+ parsed = json.loads(text)
+ if isinstance(parsed, dict) and "code" in parsed:
+ return parsed["code"], parsed.get("title")
+ except (json.JSONDecodeError, ValueError):
+ pass
+
+ json_start = text.find("{")
+ json_end = text.rfind("}") + 1
+ if json_start >= 0 and json_end > json_start:
+ try:
+ parsed = json.loads(text[json_start:json_end])
+ if isinstance(parsed, dict) and "code" in parsed:
+ return parsed["code"], parsed.get("title")
+ except (json.JSONDecodeError, ValueError):
+ pass
+
+ code = text
+ if code.startswith("```"):
+ lines = code.split("\n")
+ start = 1
+ end = len(lines)
+ for i in range(len(lines) - 1, 0, -1):
+ if lines[i].strip().startswith("```"):
+ end = i
+ break
+ code = "\n".join(lines[start:end]).strip()
+
+ return code, None
+
+
+async def _refine_if_needed(llm, code: str, slide_number: int) -> str:
+ """Attempt basic syntax validation and auto-repair via LLM if needed.
+
+ Raises RuntimeError if the code is still invalid after MAX_REFINE_ATTEMPTS,
+ matching the POC's behavior where a failed slide aborts the pipeline.
+ """
+ error = _basic_syntax_check(code)
+ if error is None:
+ return code
+
+ for attempt in range(1, MAX_REFINE_ATTEMPTS + 1):
+ print(
+ f"Slide {slide_number}: syntax issue (attempt {attempt}/{MAX_REFINE_ATTEMPTS}): {error}"
+ )
+
+ messages = [
+ SystemMessage(content=REFINE_SCENE_SYSTEM_PROMPT),
+ HumanMessage(
+ content=(
+ f"Here is the broken Remotion component code:\n\n{code}\n\n"
+ f"Compilation error:\n{error}\n\nFix the code."
+ )
+ ),
+ ]
+
+ response = await llm.ainvoke(messages)
+ code, _ = _extract_code_and_title(response.content)
+
+ error = _basic_syntax_check(code)
+ if error is None:
+ print(f"Slide {slide_number}: fixed on attempt {attempt}")
+ return code
+
+ raise RuntimeError(
+ f"Slide {slide_number} failed to compile after {MAX_REFINE_ATTEMPTS} "
+ f"refine attempts. Last error: {error}"
+ )
+
+
+def _basic_syntax_check(code: str) -> str | None:
+ """Run a lightweight syntax check on the generated code.
+
+ Full Babel-based compilation happens on the frontend. This backend check
+ catches the most common LLM code-generation mistakes so the refine loop
+ can fix them before persisting.
+
+ Returns an error description or None if the code looks valid.
+ """
+ if not code or not code.strip():
+ return "Empty code"
+
+ if "export" not in code and "MyComposition" not in code:
+ return "Missing exported component (expected 'export const MyComposition')"
+
+ brace_count = 0
+ paren_count = 0
+ bracket_count = 0
+ for ch in code:
+ if ch == "{":
+ brace_count += 1
+ elif ch == "}":
+ brace_count -= 1
+ elif ch == "(":
+ paren_count += 1
+ elif ch == ")":
+ paren_count -= 1
+ elif ch == "[":
+ bracket_count += 1
+ elif ch == "]":
+ bracket_count -= 1
+
+ if brace_count < 0:
+ return "Unmatched closing brace '}'"
+ if paren_count < 0:
+ return "Unmatched closing parenthesis ')'"
+ if bracket_count < 0:
+ return "Unmatched closing bracket ']'"
+
+ if brace_count != 0:
+ return f"Unbalanced braces: {brace_count} unclosed"
+ if paren_count != 0:
+ return f"Unbalanced parentheses: {paren_count} unclosed"
+ if bracket_count != 0:
+ return f"Unbalanced brackets: {bracket_count} unclosed"
+
+ if "useCurrentFrame" not in code:
+ return "Missing useCurrentFrame() — required for Remotion animations"
+
+ if "AbsoluteFill" not in code:
+ return "Missing AbsoluteFill — required as the root layout component"
+
+ return None
diff --git a/surfsense_backend/app/agents/video_presentation/prompts.py b/surfsense_backend/app/agents/video_presentation/prompts.py
new file mode 100644
index 000000000..5533bb01c
--- /dev/null
+++ b/surfsense_backend/app/agents/video_presentation/prompts.py
@@ -0,0 +1,509 @@
+import datetime
+
+# TODO: move these to config file
+MAX_SLIDES = 5
+FPS = 30
+DEFAULT_DURATION_IN_FRAMES = 300
+
+THEME_PRESETS = [
+ "TERRA",
+ "OCEAN",
+ "SUNSET",
+ "EMERALD",
+ "ECLIPSE",
+ "ROSE",
+ "FROST",
+ "NEBULA",
+ "AURORA",
+ "CORAL",
+ "MIDNIGHT",
+ "AMBER",
+ "LAVENDER",
+ "STEEL",
+ "CITRUS",
+ "CHERRY",
+]
+
+THEME_DESCRIPTIONS: dict[str, str] = {
+ "TERRA": "Warm earthy tones — terracotta, olive. Heritage, tradition, organic warmth.",
+ "OCEAN": "Cool oceanic depth — teal, coral accents. Calm, marine, fluid elegance.",
+ "SUNSET": "Vibrant warm energy — orange, purple. Passion, creativity, bold expression.",
+ "EMERALD": "Fresh natural life — green, mint. Growth, health, sustainability.",
+ "ECLIPSE": "Dramatic luxury — black, gold. Premium, power, prestige.",
+ "ROSE": "Soft elegance — dusty pink, mauve. Beauty, care, refined femininity.",
+ "FROST": "Crisp clarity — ice blue, silver. Tech, data, precision analytics.",
+ "NEBULA": "Cosmic mystery — magenta, deep purple. AI, innovation, cutting-edge future.",
+ "AURORA": "Ethereal northern lights — green-teal, violet. Mystical, transformative, wonder.",
+ "CORAL": "Tropical warmth — coral, turquoise. Inviting, lively, community.",
+ "MIDNIGHT": "Deep sophistication — navy, silver. Contemplative, trust, authority.",
+ "AMBER": "Rich honey warmth — amber, brown. Comfort, wisdom, organic richness.",
+ "LAVENDER": "Gentle dreaminess — purple, lilac. Calm, imaginative, serene.",
+ "STEEL": "Industrial strength — gray, steel blue. Modern professional, reliability.",
+ "CITRUS": "Bright optimism — yellow, lime. Energy, joy, fresh starts.",
+ "CHERRY": "Bold impact — deep red, dark. Power, urgency, passionate conviction.",
+}
+
+
+# ---------------------------------------------------------------------------
+# LLM-based theme assignment (replaces keyword-based pick_theme_and_mode)
+# ---------------------------------------------------------------------------
+
+THEME_ASSIGNMENT_SYSTEM_PROMPT = """You are a visual design director assigning color themes to presentation slides.
+Given a list of slides, assign each slide a theme preset and color mode (dark or light).
+
+Available themes (name — description):
+{theme_list}
+
+Rules:
+1. Pick the theme that best matches each slide's mood, content, and visual direction.
+2. Maximize visual variety — avoid repeating the same theme on consecutive slides.
+3. Mix dark and light modes across the presentation for contrast and rhythm.
+4. Opening slides often benefit from a bold dark theme; closing/summary slides can go either way.
+5. The "background_explanation" field is the primary signal — it describes the intended mood and color direction.
+
+Return ONLY a JSON array (no markdown fences, no explanation):
+[
+ {{"slide_number": 1, "theme": "THEME_NAME", "mode": "dark"}},
+ {{"slide_number": 2, "theme": "THEME_NAME", "mode": "light"}}
+]
+""".strip()
+
+
+def build_theme_assignment_user_prompt(
+ slides: list[dict[str, str]],
+) -> str:
+ """Build the user prompt for LLM theme assignment.
+
+ *slides* is a list of dicts with keys: slide_number, title, subtitle,
+ background_explanation (mood).
+ """
+ lines = ["Assign a theme and mode to each of these slides:", ""]
+ for s in slides:
+ lines.append(
+ f'Slide {s["slide_number"]}: "{s["title"]}" '
+ f'(subtitle: "{s.get("subtitle", "")}") — '
+ f'Mood: "{s.get("background_explanation", "neutral")}"'
+ )
+ return "\n".join(lines)
+
+
+def get_theme_assignment_system_prompt() -> str:
+ """Return the theme assignment system prompt with the full theme list injected."""
+ theme_list = "\n".join(
+ f"- {name}: {desc}" for name, desc in THEME_DESCRIPTIONS.items()
+ )
+ return THEME_ASSIGNMENT_SYSTEM_PROMPT.format(theme_list=theme_list)
+
+
+def pick_theme_and_mode_fallback(
+ slide_index: int, total_slides: int
+) -> tuple[str, str]:
+ """Simple round-robin fallback when LLM theme assignment fails."""
+ theme = THEME_PRESETS[slide_index % len(THEME_PRESETS)]
+ mode = "dark" if slide_index % 2 == 0 else "light"
+ if total_slides == 1:
+ mode = "dark"
+ return theme, mode
+
+
+def get_slide_generation_prompt(user_prompt: str | None = None) -> str:
+ return f"""
+Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
+
+You are a content-to-slides converter. You receive raw source content (articles, notes, transcripts,
+product descriptions, chat conversations, etc.) and break it into a sequence of presentation slides
+for a video presentation with voiceover narration.
+
+{
+ f'''
+You **MUST** strictly adhere to the following user instruction while generating the slides:
+
+{user_prompt}
+
+'''
+ if user_prompt
+ else ""
+ }
+
+
+- '': A block of text containing the information to be presented. This could be
+ research findings, an article summary, a detailed outline, user chat history, or any relevant
+ raw information. The content serves as the factual basis for the video presentation.
+
+
+
+A JSON object containing the presentation slides:
+{{
+ "slides": [
+ {{
+ "slide_number": 1,
+ "title": "Concise slide title",
+ "subtitle": "One-line subtitle or tagline",
+ "content_in_markdown": "## Heading\\n- Bullet point 1\\n- **Bold text**\\n- Bullet point 3",
+ "speaker_transcripts": [
+ "First narration sentence for this slide.",
+ "Second narration sentence expanding on the point.",
+ "Third sentence wrapping up this slide."
+ ],
+ "background_explanation": "Emotional mood and color direction for this slide"
+ }}
+ ]
+}}
+
+
+
+=== SLIDE COUNT ===
+
+Dynamically decide the number of slides between 1 and {MAX_SLIDES} (inclusive).
+Base your decision entirely on the content's depth, richness, and how many distinct ideas it contains.
+Thin or simple content should produce fewer slides; dense or multi-faceted content may use more.
+Do NOT inflate or pad slides to reach {
+ MAX_SLIDES
+ } — only use what the content genuinely warrants.
+Do NOT treat {MAX_SLIDES} as a target; it is a hard ceiling, not a goal.
+
+=== SLIDE STRUCTURE ===
+
+- Each slide should cover ONE distinct key idea or section.
+- Keep slides focused: 2-5 bullet points of content per slide max.
+- The first slide should be a title/intro slide.
+- The last slide should be a summary or closing slide ONLY if there are 3+ slides.
+ For 1-2 slides, skip the closing slide — just cover the content.
+- Do NOT create a separate closing slide if its content would just repeat earlier slides.
+
+=== CONTENT FIELDS ===
+
+- Write speaker_transcripts as if a human presenter is narrating — natural, conversational, 2-4 sentences per slide.
+ These will be converted to TTS audio, so write in a way that sounds great when spoken aloud.
+- background_explanation should describe a visual style matching the slide's mood:
+ - Describe the emotional feel: "warm and organic", "dramatic and urgent", "clean and optimistic",
+ "technical and precise", "celebratory", "earthy and grounded", "cosmic and futuristic"
+ - Mention color direction: warm tones, cool tones, earth tones, neon accents, gold/black, etc.
+ - Vary the mood across slides — do NOT always say "dark blue gradient".
+- content_in_markdown should use proper markdown: ## headings, **bold**, - bullets, etc.
+
+=== NARRATION QUALITY ===
+
+- Speaker transcripts should explain the slide content in an engaging, presenter-like voice.
+- Keep narration concise: 2-4 sentences per slide (targeting ~10-15 seconds of audio per slide).
+- The narration should add context beyond what's on the slide — don't just read the bullets.
+- Use natural language: contractions, conversational tone, occasional enthusiasm.
+
+
+
+Input: "Quantum computing uses quantum bits or qubits which can exist in multiple states simultaneously due to superposition."
+
+Output:
+{{
+ "slides": [
+ {{
+ "slide_number": 1,
+ "title": "Quantum Computing",
+ "subtitle": "Beyond Classical Bits",
+ "content_in_markdown": "## The Quantum Leap\\n- Classical computers use **bits** (0 or 1)\\n- Quantum computers use **qubits**\\n- Qubits leverage **superposition**",
+ "speaker_transcripts": [
+ "Let's explore quantum computing, a technology that's fundamentally different from the computers we use every day.",
+ "While traditional computers work with bits that are either zero or one, quantum computers use something called qubits.",
+ "The magic of qubits is superposition — they can exist in multiple states at the same time."
+ ],
+ "background_explanation": "Cosmic and futuristic with deep purple and magenta tones, evoking the mystery of quantum mechanics"
+ }}
+ ]
+}}
+
+
+Transform the source material into well-structured presentation slides with engaging narration.
+Ensure each slide has a clear visual mood and natural-sounding speaker transcripts.
+
+"""
+
+
+# ---------------------------------------------------------------------------
+# Remotion scene code generation prompt
+# Ported from RemotionTets POC /api/generate system prompt
+# ---------------------------------------------------------------------------
+
+REMOTION_SCENE_SYSTEM_PROMPT = """
+You are a Remotion component generator that creates cinematic, modern motion graphics.
+Generate a single self-contained React component that uses Remotion.
+
+=== THEME PRESETS (pick ONE per slide — see user prompt for which to use) ===
+
+Each slide MUST use a DIFFERENT preset. The user prompt will tell you which preset to use.
+Use ALL colors from that preset — background, surface, text, accent, glow. Do NOT mix presets.
+
+TERRA (warm earth — terracotta + olive):
+ dark: bg #1C1510 surface #261E16 border #3D3024 text #E8DDD0 muted #9A8A78 accent #C2623D secondary #7D8C52 glow rgba(194,98,61,0.12)
+ light: bg #F7F0E8 surface #FFF8F0 border #DDD0BF text #2C1D0E muted #8A7A68 accent #B85430 secondary #6B7A42 glow rgba(184,84,48,0.08)
+ gradient-dark: radial-gradient(ellipse at 30% 80%, rgba(194,98,61,0.18), transparent 60%), linear-gradient(180deg, #1C1510, #261E16)
+ gradient-light: radial-gradient(ellipse at 70% 20%, rgba(107,122,66,0.12), transparent 55%), linear-gradient(180deg, #F7F0E8, #FFF8F0)
+
+OCEAN (cool depth — teal + coral):
+ dark: bg #0B1A1E surface #122428 border #1E3740 text #D5EAF0 muted #6A9AA8 accent #1DB6A8 secondary #E87461 glow rgba(29,182,168,0.12)
+ light: bg #F0F8FA surface #FFFFFF border #C8E0E8 text #0E2830 muted #5A8A98 accent #0EA69A secondary #D05F4E glow rgba(14,166,154,0.08)
+ gradient-dark: radial-gradient(ellipse at 80% 30%, rgba(29,182,168,0.20), transparent 55%), radial-gradient(circle at 20% 80%, rgba(232,116,97,0.10), transparent 50%), #0B1A1E
+ gradient-light: radial-gradient(ellipse at 20% 40%, rgba(14,166,154,0.10), transparent 55%), linear-gradient(180deg, #F0F8FA, #FFFFFF)
+
+SUNSET (warm energy — orange + purple):
+ dark: bg #1E130F surface #2A1B14 border #42291C text #F0DDD0 muted #A08878 accent #E86A20 secondary #A855C0 glow rgba(232,106,32,0.12)
+ light: bg #FFF5ED surface #FFFFFF border #EADAC8 text #2E1508 muted #907860 accent #D05A18 secondary #9045A8 glow rgba(208,90,24,0.08)
+ gradient-dark: linear-gradient(135deg, rgba(232,106,32,0.15) 0%, transparent 40%), radial-gradient(circle at 80% 70%, rgba(168,85,192,0.15), transparent 50%), #1E130F
+ gradient-light: linear-gradient(135deg, rgba(208,90,24,0.08) 0%, rgba(144,69,168,0.06) 100%), #FFF5ED
+
+EMERALD (fresh life — green + mint):
+ dark: bg #0B1E14 surface #12281A border #1E3C28 text #D0F0E0 muted #5EA880 accent #10B981 secondary #84CC16 glow rgba(16,185,129,0.12)
+ light: bg #F0FAF5 surface #FFFFFF border #C0E8D0 text #0E2C18 muted #489068 accent #059669 secondary #65A30D glow rgba(5,150,105,0.08)
+ gradient-dark: radial-gradient(ellipse at 50% 50%, rgba(16,185,129,0.18), transparent 60%), linear-gradient(180deg, #0B1E14, #12281A)
+ gradient-light: radial-gradient(ellipse at 60% 30%, rgba(101,163,13,0.10), transparent 55%), linear-gradient(180deg, #F0FAF5, #FFFFFF)
+
+ECLIPSE (dramatic — black + gold):
+ dark: bg #100C05 surface #1A1508 border #2E2510 text #D4B96A muted #8A7840 accent #E8B830 secondary #C09020 glow rgba(232,184,48,0.14)
+ light: bg #FAF6ED surface #FFFFFF border #E0D8C0 text #1A1408 muted #7A6818 accent #C09820 secondary #A08018 glow rgba(192,152,32,0.08)
+ gradient-dark: radial-gradient(circle at 50% 40%, rgba(232,184,48,0.20), transparent 50%), radial-gradient(ellipse at 50% 90%, rgba(192,144,32,0.08), transparent 50%), #100C05
+ gradient-light: radial-gradient(circle at 50% 40%, rgba(192,152,32,0.10), transparent 55%), linear-gradient(180deg, #FAF6ED, #FFFFFF)
+
+ROSE (soft elegance — dusty pink + mauve):
+ dark: bg #1E1018 surface #281820 border #3D2830 text #F0D8E0 muted #A08090 accent #E4508C secondary #B06498 glow rgba(228,80,140,0.12)
+ light: bg #FDF2F5 surface #FFFFFF border #F0D0D8 text #2C1018 muted #906878 accent #D43D78 secondary #9A5080 glow rgba(212,61,120,0.08)
+ gradient-dark: radial-gradient(ellipse at 70% 30%, rgba(228,80,140,0.18), transparent 55%), radial-gradient(circle at 20% 80%, rgba(176,100,152,0.10), transparent 50%), #1E1018
+ gradient-light: radial-gradient(ellipse at 30% 60%, rgba(212,61,120,0.08), transparent 55%), linear-gradient(180deg, #FDF2F5, #FFFFFF)
+
+FROST (crisp clarity — ice blue + silver):
+ dark: bg #0A1520 surface #101D2A border #1A3040 text #D0E5F5 muted #6090B0 accent #5AB4E8 secondary #8BA8C0 glow rgba(90,180,232,0.12)
+ light: bg #F0F6FC surface #FFFFFF border #C8D8E8 text #0C1820 muted #5080A0 accent #3A96D0 secondary #7090A8 glow rgba(58,150,208,0.08)
+ gradient-dark: radial-gradient(ellipse at 40% 20%, rgba(90,180,232,0.16), transparent 55%), linear-gradient(180deg, #0A1520, #101D2A)
+ gradient-light: radial-gradient(ellipse at 50% 50%, rgba(58,150,208,0.08), transparent 55%), linear-gradient(180deg, #F0F6FC, #FFFFFF)
+
+NEBULA (cosmic — magenta + deep purple):
+ dark: bg #150A1E surface #1E1028 border #351A48 text #E0D0F0 muted #8060A0 accent #C850E0 secondary #8030C0 glow rgba(200,80,224,0.14)
+ light: bg #F8F0FF surface #FFFFFF border #E0C8F0 text #1A0A24 muted #7050A0 accent #A840C0 secondary #6820A0 glow rgba(168,64,192,0.08)
+ gradient-dark: radial-gradient(circle at 60% 40%, rgba(200,80,224,0.18), transparent 50%), radial-gradient(ellipse at 30% 80%, rgba(128,48,192,0.12), transparent 50%), #150A1E
+ gradient-light: radial-gradient(circle at 40% 30%, rgba(168,64,192,0.10), transparent 55%), linear-gradient(180deg, #F8F0FF, #FFFFFF)
+
+AURORA (ethereal lights — green-teal + violet):
+ dark: bg #0A1A1A surface #102020 border #1A3838 text #D0F0F0 muted #60A0A0 accent #30D0B0 secondary #8040D0 glow rgba(48,208,176,0.12)
+ light: bg #F0FAF8 surface #FFFFFF border #C0E8E0 text #0A2020 muted #508080 accent #20B090 secondary #6830B0 glow rgba(32,176,144,0.08)
+ gradient-dark: radial-gradient(ellipse at 30% 70%, rgba(48,208,176,0.18), transparent 55%), radial-gradient(circle at 70% 30%, rgba(128,64,208,0.12), transparent 50%), #0A1A1A
+ gradient-light: radial-gradient(ellipse at 50% 40%, rgba(32,176,144,0.10), transparent 55%), linear-gradient(180deg, #F0FAF8, #FFFFFF)
+
+CORAL (tropical warmth — coral + turquoise):
+ dark: bg #1E0F0F surface #281818 border #402828 text #F0D8D8 muted #A07070 accent #F06050 secondary #30B8B0 glow rgba(240,96,80,0.12)
+ light: bg #FFF5F3 surface #FFFFFF border #F0D0C8 text #2E1010 muted #906060 accent #E04838 secondary #20A098 glow rgba(224,72,56,0.08)
+ gradient-dark: radial-gradient(ellipse at 60% 60%, rgba(240,96,80,0.18), transparent 55%), radial-gradient(circle at 30% 30%, rgba(48,184,176,0.10), transparent 50%), #1E0F0F
+ gradient-light: radial-gradient(ellipse at 40% 50%, rgba(224,72,56,0.08), transparent 55%), linear-gradient(180deg, #FFF5F3, #FFFFFF)
+
+MIDNIGHT (deep sophistication — navy + silver):
+ dark: bg #080C18 surface #0E1420 border #1A2438 text #C8D8F0 muted #5070A0 accent #4080E0 secondary #A0B0D0 glow rgba(64,128,224,0.12)
+ light: bg #F0F2F8 surface #FFFFFF border #C8D0E0 text #101828 muted #506080 accent #3060C0 secondary #8090B0 glow rgba(48,96,192,0.08)
+ gradient-dark: radial-gradient(ellipse at 50% 30%, rgba(64,128,224,0.16), transparent 55%), linear-gradient(180deg, #080C18, #0E1420)
+ gradient-light: radial-gradient(ellipse at 50% 50%, rgba(48,96,192,0.08), transparent 55%), linear-gradient(180deg, #F0F2F8, #FFFFFF)
+
+AMBER (rich honey warmth — amber + brown):
+ dark: bg #1A1208 surface #221A0E border #3A2C18 text #F0E0C0 muted #A09060 accent #E0A020 secondary #C08030 glow rgba(224,160,32,0.12)
+ light: bg #FFF8E8 surface #FFFFFF border #E8D8B8 text #2A1C08 muted #907840 accent #C88810 secondary #A86820 glow rgba(200,136,16,0.08)
+ gradient-dark: radial-gradient(ellipse at 40% 60%, rgba(224,160,32,0.18), transparent 55%), linear-gradient(180deg, #1A1208, #221A0E)
+ gradient-light: radial-gradient(ellipse at 60% 40%, rgba(200,136,16,0.10), transparent 55%), linear-gradient(180deg, #FFF8E8, #FFFFFF)
+
+LAVENDER (gentle dreaminess — purple + lilac):
+ dark: bg #14101E surface #1C1628 border #302840 text #E0D8F0 muted #8070A0 accent #A060E0 secondary #C090D0 glow rgba(160,96,224,0.12)
+ light: bg #F8F0FF surface #FFFFFF border #E0D0F0 text #1C1028 muted #706090 accent #8848C0 secondary #A878B8 glow rgba(136,72,192,0.08)
+ gradient-dark: radial-gradient(ellipse at 60% 40%, rgba(160,96,224,0.18), transparent 55%), radial-gradient(circle at 30% 70%, rgba(192,144,208,0.10), transparent 50%), #14101E
+ gradient-light: radial-gradient(ellipse at 40% 30%, rgba(136,72,192,0.10), transparent 55%), linear-gradient(180deg, #F8F0FF, #FFFFFF)
+
+STEEL (industrial strength — gray + steel blue):
+ dark: bg #101214 surface #181C20 border #282E38 text #D0D8E0 muted #708090 accent #5088B0 secondary #90A0B0 glow rgba(80,136,176,0.12)
+ light: bg #F2F4F6 surface #FFFFFF border #D0D8E0 text #181C24 muted #607080 accent #3870A0 secondary #708898 glow rgba(56,112,160,0.08)
+ gradient-dark: radial-gradient(ellipse at 50% 50%, rgba(80,136,176,0.14), transparent 55%), linear-gradient(180deg, #101214, #181C20)
+ gradient-light: radial-gradient(ellipse at 50% 40%, rgba(56,112,160,0.08), transparent 55%), linear-gradient(180deg, #F2F4F6, #FFFFFF)
+
+CITRUS (bright optimism — yellow + lime):
+ dark: bg #181808 surface #202010 border #383818 text #F0F0C0 muted #A0A060 accent #E8D020 secondary #90D030 glow rgba(232,208,32,0.12)
+ light: bg #FFFFF0 surface #FFFFFF border #E8E8C0 text #282808 muted #808040 accent #C8B010 secondary #70B020 glow rgba(200,176,16,0.08)
+ gradient-dark: radial-gradient(ellipse at 40% 40%, rgba(232,208,32,0.18), transparent 55%), radial-gradient(circle at 70% 70%, rgba(144,208,48,0.10), transparent 50%), #181808
+ gradient-light: radial-gradient(ellipse at 50% 30%, rgba(200,176,16,0.10), transparent 55%), linear-gradient(180deg, #FFFFF0, #FFFFFF)
+
+CHERRY (bold impact — deep red + dark):
+ dark: bg #1A0808 surface #241010 border #401818 text #F0D0D0 muted #A06060 accent #D02030 secondary #E05060 glow rgba(208,32,48,0.14)
+ light: bg #FFF0F0 surface #FFFFFF border #F0C8C8 text #280808 muted #904848 accent #B01828 secondary #C83848 glow rgba(176,24,40,0.08)
+ gradient-dark: radial-gradient(ellipse at 50% 40%, rgba(208,32,48,0.20), transparent 50%), linear-gradient(180deg, #1A0808, #241010)
+ gradient-light: radial-gradient(ellipse at 50% 50%, rgba(176,24,40,0.10), transparent 55%), linear-gradient(180deg, #FFF0F0, #FFFFFF)
+
+=== SHARED TOKENS (use with any theme above) ===
+
+SPACING: xs 8px, sm 16px, md 24px, lg 32px, xl 48px, 2xl 64px, 3xl 96px, 4xl 128px
+TYPOGRAPHY: fontFamily "Inter, system-ui, -apple-system, sans-serif"
+ caption 14px/1.4, body 18px/1.6, subhead 24px/1.4, title 40px/1.2 w600, headline 64px/1.1 w700, display 96px/1.0 w800
+ letterSpacing: tight "-0.02em", normal "0", wide "0.05em"
+BORDER RADIUS: 12px (cards), 8px (buttons), 9999px (pills)
+
+=== VISUAL VARIETY (CRITICAL) ===
+
+The user prompt assigns each slide a specific theme preset AND mode (dark/light).
+You MUST use EXACTLY the assigned preset and mode. Additionally:
+
+1. Use the preset's gradient as the AbsoluteFill background.
+2. Use the preset's accent/secondary colors for highlights, pill badges, and card accents.
+3. Use the preset's glow value for all boxShadow effects.
+4. LAYOUT VARIATION: Vary layout between slides:
+ - One slide: bold centered headline + subtle stat
+ - Another: two-column card layout
+ - Another: single large number or quote as hero
+ Do NOT use the same layout pattern for every slide.
+
+=== LAYOUT RULES (CRITICAL — elements must NEVER overlap) ===
+
+The canvas is 1920x1080. You MUST use a SINGLE-LAYER layout. NO stacking, NO multiple AbsoluteFill layers.
+
+STRUCTURE — every component must follow this exact pattern:
+
+ {/* ALL content goes here as direct children in normal flow */}
+
+
+ABSOLUTE RULES:
+- Use exactly ONE AbsoluteFill as the root. Set its background color/gradient via its style prop.
+- NEVER nest AbsoluteFill inside AbsoluteFill.
+- NEVER use position "absolute" or position "fixed" on ANY element.
+- NEVER use multiple layers or z-index.
+- ALL elements must be in normal document flow inside the single root AbsoluteFill.
+
+SPACING:
+- Root padding: 80px on all sides (safe area).
+- Use flexDirection "column" with gap for vertical stacking, flexDirection "row" with gap for horizontal.
+- Minimum gap between elements: 24px vertical, 32px horizontal.
+- Text hierarchy gaps: headline→subheading 16px, subheading→body 12px, body→button 32px.
+- Cards/panels: padding 32px-48px, borderRadius 12px.
+- NEVER use margin to space siblings — always use the parent's gap property.
+
+=== DESIGN STYLE ===
+
+- Premium aesthetic — use the exact colors from the assigned theme preset (do NOT invent your own)
+- Background: use the preset's gradient-dark or gradient-light value directly as the AbsoluteFill's background
+- Card/surface backgrounds: use the preset's surface color
+- Text colors: use the preset's text, muted values
+- Borders: use the preset's border color
+- Glows: use the preset's glow value for all boxShadow — do NOT substitute other colors
+- Generous whitespace — less is more, let elements breathe
+- NO decorative background shapes, blurs, or overlapping ornaments
+
+=== REMOTION RULES ===
+
+- Export the component as: export const MyComposition = () => { ... }
+- Use useCurrentFrame() and useVideoConfig() from "remotion"
+- Do NOT use Sequence
+- Do NOT manually calculate animation timings or frame offsets
+
+=== ANIMATION (use the stagger() helper for ALL element animations) ===
+
+A pre-built helper function called stagger() is available globally.
+It handles enter, hold, and exit phases automatically — you MUST use it.
+
+Signature:
+ stagger(frame, fps, index, total) → { opacity: number, transform: string }
+
+Parameters:
+ frame — from useCurrentFrame()
+ fps — from useVideoConfig()
+ index — 0-based index of this element in the entrance order
+ total — total number of animated elements in the scene
+
+It returns a style object with opacity and transform that you spread onto the element.
+Timing is handled for you: staggered spring entrances, ambient hold motion, and a graceful exit.
+
+Usage pattern:
+ const frame = useCurrentFrame();
+ const { fps } = useVideoConfig();
+
+
Headline
+
Subtitle
+
Card
+
Footer
+
+Rules:
+- Count ALL animated elements in your scene and pass that count as the "total" parameter.
+- Assign each element a sequential index starting from 0.
+- You can merge stagger's return with additional styles:
+
+- For non-animated static elements (backgrounds, borders), just use normal styles without stagger.
+- You may still use spring() and interpolate() for EXTRA custom effects (e.g., a number counter,
+ color shift, or typewriter effect), but stagger() must drive all entrance/exit animations.
+
+=== AVAILABLE GLOBALS (injected at runtime, do NOT import anything else) ===
+
+- React (available globally)
+- AbsoluteFill, useCurrentFrame, useVideoConfig, spring, interpolate, Easing from "remotion"
+- stagger(frame, fps, index, total) — animation helper described above
+
+=== CODE RULES ===
+
+- Output ONLY the raw code, no markdown fences, no explanations
+- Keep it fully self-contained, no external dependencies or images
+- Use inline styles only (no CSS imports, no className)
+- Target 1920x1080 resolution
+- Every container must use display "flex" with explicit gap values
+- NEVER use marginTop/marginBottom to space siblings — use the parent's gap instead
+""".strip()
+
+
+def build_scene_generation_user_prompt(
+ slide_number: int,
+ total_slides: int,
+ title: str,
+ subtitle: str,
+ content_in_markdown: str,
+ background_explanation: str,
+ duration_in_frames: int,
+ theme: str,
+ mode: str,
+) -> str:
+ """Build the user prompt for generating a single slide's Remotion scene code.
+
+ *theme* and *mode* are pre-assigned (by LLM or fallback) before this is called.
+ """
+ return "\n".join(
+ [
+ "Create a cinematic, visually striking Remotion scene.",
+ f"The video is {duration_in_frames} frames at {FPS}fps ({duration_in_frames / FPS:.1f}s total).",
+ "",
+ f"This is slide {slide_number} of {total_slides} in the video.",
+ "",
+ f"=== ASSIGNED THEME: {theme} / {mode.upper()} mode ===",
+ f"You MUST use the {theme} preset in {mode} mode from the theme presets above.",
+ f"Use its exact background gradient (gradient-{mode}), surface, text, accent, secondary, border, and glow colors.",
+ "Do NOT substitute, invent, or default to blue/violet colors.",
+ "",
+ f'The scene should communicate this message: "{title} — {subtitle}"',
+ "",
+ "Key ideas to convey (use as creative inspiration, NOT literal text to dump on screen):",
+ content_in_markdown,
+ "",
+ "Pick only the 1-2 most impactful phrases or numbers to display as text.",
+ "",
+ f"Mood & tone: {background_explanation}",
+ ]
+ )
+
+
+REFINE_SCENE_SYSTEM_PROMPT = """
+You are a code repair assistant. You will receive a Remotion React component that failed to compile,
+along with the exact error message from the Babel transpiler.
+
+Your job is to fix the code so it compiles and runs correctly.
+
+RULES:
+- Output ONLY the fixed raw code as a string — no markdown fences, no explanations.
+- Preserve the original intent, design, and animations as closely as possible.
+- The component must be exported as: export const MyComposition = () => { ... }
+- Only these globals are available at runtime (they are injected, not actually imported):
+ React, AbsoluteFill, useCurrentFrame, useVideoConfig, spring, interpolate, Easing,
+ stagger (a helper: stagger(frame, fps, index, total) → { opacity, transform })
+- Keep import statements at the top (they get stripped by the compiler) but do NOT import anything
+ other than "react" and "remotion".
+- Use inline styles only (no CSS, no className).
+- Common fixes:
+ - Mismatched braces/brackets in JSX style objects (e.g. }}, instead of }}>)
+ - Missing closing tags
+ - Trailing commas before > in JSX
+ - Undefined variables or typos
+ - Invalid JSX expressions
+- After fixing, mentally walk through every brace pair { } and JSX tag to verify they match.
+""".strip()
diff --git a/surfsense_backend/app/agents/video_presentation/state.py b/surfsense_backend/app/agents/video_presentation/state.py
new file mode 100644
index 000000000..53c989f75
--- /dev/null
+++ b/surfsense_backend/app/agents/video_presentation/state.py
@@ -0,0 +1,72 @@
+"""Define the state structures for the video presentation agent."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from pydantic import BaseModel, Field
+from sqlalchemy.ext.asyncio import AsyncSession
+
+
+class SlideContent(BaseModel):
+ """Represents a single parsed slide from content analysis."""
+
+ slide_number: int = Field(..., description="1-based slide number")
+ title: str = Field(..., description="Concise slide title")
+ subtitle: str = Field(..., description="One-line subtitle or tagline")
+ content_in_markdown: str = Field(
+ ..., description="Slide body content formatted as markdown"
+ )
+ speaker_transcripts: list[str] = Field(
+ ...,
+ description="2-4 short sentences a presenter would say while this slide is shown",
+ )
+ background_explanation: str = Field(
+ ...,
+ description="Emotional mood and color direction for this slide",
+ )
+
+
+class PresentationSlides(BaseModel):
+ """Represents the full set of parsed slides from the LLM."""
+
+ slides: list[SlideContent] = Field(
+ ..., description="Ordered array of presentation slides"
+ )
+
+
+class SlideAudioResult(BaseModel):
+ """Audio generation result for a single slide."""
+
+ slide_number: int
+ audio_file: str = Field(..., description="Path to the per-slide audio file")
+ duration_seconds: float = Field(..., description="Audio duration in seconds")
+ duration_in_frames: int = Field(
+ ..., description="Audio duration in frames (at 30fps)"
+ )
+
+
+class SlideSceneCode(BaseModel):
+ """Generated Remotion component code for a single slide."""
+
+ slide_number: int
+ code: str = Field(
+ ..., description="Raw Remotion React component source code for this slide"
+ )
+ title: str = Field(..., description="Short title for the composition")
+
+
+@dataclass
+class State:
+ """State for the video presentation agent graph.
+
+ Pipeline: parse slides → generate per-slide TTS audio → generate per-slide Remotion code
+ The frontend receives the slides + code + audio and handles compilation/rendering.
+ """
+
+ db_session: AsyncSession
+ source_content: str
+
+ slides: list[SlideContent] | None = None
+ slide_audio_results: list[SlideAudioResult] | None = None
+ slide_scene_codes: list[SlideSceneCode] | None = None
diff --git a/surfsense_backend/app/agents/video_presentation/utils.py b/surfsense_backend/app/agents/video_presentation/utils.py
new file mode 100644
index 000000000..58909e104
--- /dev/null
+++ b/surfsense_backend/app/agents/video_presentation/utils.py
@@ -0,0 +1,30 @@
+def get_voice_for_provider(provider: str, speaker_id: int = 0) -> dict | str:
+ """
+ Get the appropriate voice configuration based on the TTS provider.
+
+ Currently single-speaker only (speaker_id=0). Multi-speaker support
+ will be added in a future iteration.
+
+ Args:
+ provider: The TTS provider (e.g., "openai/tts-1", "vertex_ai/test")
+ speaker_id: The ID of the speaker (default 0, single speaker for now)
+
+ Returns:
+ Voice configuration - string for OpenAI, dict for Vertex AI
+ """
+ if provider == "local/kokoro":
+ return "af_heart"
+
+ provider_type = (
+ provider.split("/")[0].lower() if "/" in provider else provider.lower()
+ )
+
+ voices = {
+ "openai": "alloy",
+ "vertex_ai": {
+ "languageCode": "en-US",
+ "name": "en-US-Studio-O",
+ },
+ "azure": "alloy",
+ }
+ return voices.get(provider_type, {})
diff --git a/surfsense_backend/app/app.py b/surfsense_backend/app/app.py
index 6c6b12e3a..bba2f1f3a 100644
--- a/surfsense_backend/app/app.py
+++ b/surfsense_backend/app/app.py
@@ -341,7 +341,7 @@ if config.NEXT_FRONTEND_URL:
allowed_origins.append(www_url)
allowed_origins.extend(
- [ # For local development and desktop app
+ [ # For local development and desktop app
"http://localhost:3000",
"http://127.0.0.1:3000",
]
diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py
index 62414775a..69e117747 100644
--- a/surfsense_backend/app/celery_app.py
+++ b/surfsense_backend/app/celery_app.py
@@ -77,6 +77,7 @@ celery_app = Celery(
include=[
"app.tasks.celery_tasks.document_tasks",
"app.tasks.celery_tasks.podcast_tasks",
+ "app.tasks.celery_tasks.video_presentation_tasks",
"app.tasks.celery_tasks.connector_tasks",
"app.tasks.celery_tasks.schedule_checker_task",
"app.tasks.celery_tasks.document_reindex_tasks",
diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py
index 95ae8e728..2ce48c16d 100644
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@@ -103,6 +103,13 @@ class PodcastStatus(StrEnum):
FAILED = "failed"
+class VideoPresentationStatus(StrEnum):
+ PENDING = "pending"
+ GENERATING = "generating"
+ READY = "ready"
+ FAILED = "failed"
+
+
class DocumentStatus:
"""
Helper class for document processing status (stored as JSONB).
@@ -337,6 +344,12 @@ class Permission(StrEnum):
PODCASTS_UPDATE = "podcasts:update"
PODCASTS_DELETE = "podcasts:delete"
+ # Video Presentations
+ VIDEO_PRESENTATIONS_CREATE = "video_presentations:create"
+ VIDEO_PRESENTATIONS_READ = "video_presentations:read"
+ VIDEO_PRESENTATIONS_UPDATE = "video_presentations:update"
+ VIDEO_PRESENTATIONS_DELETE = "video_presentations:delete"
+
# Image Generations
IMAGE_GENERATIONS_CREATE = "image_generations:create"
IMAGE_GENERATIONS_READ = "image_generations:read"
@@ -403,6 +416,10 @@ DEFAULT_ROLE_PERMISSIONS = {
Permission.PODCASTS_CREATE.value,
Permission.PODCASTS_READ.value,
Permission.PODCASTS_UPDATE.value,
+ # Video Presentations (no delete)
+ Permission.VIDEO_PRESENTATIONS_CREATE.value,
+ Permission.VIDEO_PRESENTATIONS_READ.value,
+ Permission.VIDEO_PRESENTATIONS_UPDATE.value,
# Image Generations (create and read, no delete)
Permission.IMAGE_GENERATIONS_CREATE.value,
Permission.IMAGE_GENERATIONS_READ.value,
@@ -435,6 +452,8 @@ DEFAULT_ROLE_PERMISSIONS = {
Permission.LLM_CONFIGS_READ.value,
# Podcasts (read only)
Permission.PODCASTS_READ.value,
+ # Video Presentations (read only)
+ Permission.VIDEO_PRESENTATIONS_READ.value,
# Image Generations (read only)
Permission.IMAGE_GENERATIONS_READ.value,
# Connectors (read only)
@@ -1044,6 +1063,46 @@ class Podcast(BaseModel, TimestampMixin):
thread = relationship("NewChatThread")
+class VideoPresentation(BaseModel, TimestampMixin):
+ """Video presentation model for storing AI-generated video presentations.
+
+ The slides JSONB stores per-slide data including Remotion component code,
+ audio file paths, and durations. The frontend compiles the code and renders
+ the video using Remotion Player.
+ """
+
+ __tablename__ = "video_presentations"
+
+ title = Column(String(500), nullable=False)
+ slides = Column(JSONB, nullable=True)
+ scene_codes = Column(JSONB, nullable=True)
+ status = Column(
+ SQLAlchemyEnum(
+ VideoPresentationStatus,
+ name="video_presentation_status",
+ create_type=False,
+ values_callable=lambda x: [e.value for e in x],
+ ),
+ nullable=False,
+ default=VideoPresentationStatus.READY,
+ server_default="ready",
+ index=True,
+ )
+
+ search_space_id = Column(
+ Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
+ )
+ search_space = relationship("SearchSpace", back_populates="video_presentations")
+
+ thread_id = Column(
+ Integer,
+ ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
+ nullable=True,
+ index=True,
+ )
+ thread = relationship("NewChatThread")
+
+
class Report(BaseModel, TimestampMixin):
"""Report model for storing generated Markdown reports."""
@@ -1228,6 +1287,12 @@ class SearchSpace(BaseModel, TimestampMixin):
order_by="Podcast.id.desc()",
cascade="all, delete-orphan",
)
+ video_presentations = relationship(
+ "VideoPresentation",
+ back_populates="search_space",
+ order_by="VideoPresentation.id.desc()",
+ cascade="all, delete-orphan",
+ )
reports = relationship(
"Report",
back_populates="search_space",
diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py
index d7df2182a..66471b0ed 100644
--- a/surfsense_backend/app/routes/__init__.py
+++ b/surfsense_backend/app/routes/__init__.py
@@ -42,6 +42,7 @@ from .search_spaces_routes import router as search_spaces_router
from .slack_add_connector_route import router as slack_add_connector_router
from .surfsense_docs_routes import router as surfsense_docs_router
from .teams_add_connector_route import router as teams_add_connector_router
+from .video_presentations_routes import router as video_presentations_router
from .youtube_routes import router as youtube_router
router = APIRouter()
@@ -55,6 +56,9 @@ router.include_router(new_chat_router) # Chat with assistant-ui persistence
router.include_router(sandbox_router) # Sandbox file downloads (Daytona)
router.include_router(chat_comments_router)
router.include_router(podcasts_router) # Podcast task status and audio
+router.include_router(
+ video_presentations_router
+) # Video presentation status and streaming
router.include_router(reports_router) # Report CRUD and multi-format export
router.include_router(image_generation_router) # Image generation via litellm
router.include_router(search_source_connectors_router)
diff --git a/surfsense_backend/app/routes/video_presentations_routes.py b/surfsense_backend/app/routes/video_presentations_routes.py
new file mode 100644
index 000000000..ed694b9bf
--- /dev/null
+++ b/surfsense_backend/app/routes/video_presentations_routes.py
@@ -0,0 +1,242 @@
+"""
+Video presentation routes for CRUD operations and per-slide audio streaming.
+
+These routes support the video presentation generation feature in new-chat.
+Frontend polls GET /video-presentations/{id} to check status field.
+When ready, the slides JSONB contains per-slide Remotion code and audio file paths.
+The frontend compiles the Remotion code via Babel and renders with Remotion Player.
+"""
+
+import os
+from pathlib import Path
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import StreamingResponse
+from sqlalchemy import select
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import (
+ Permission,
+ SearchSpace,
+ SearchSpaceMembership,
+ User,
+ VideoPresentation,
+ get_async_session,
+)
+from app.schemas import VideoPresentationRead
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+router = APIRouter()
+
+
+@router.get("/video-presentations", response_model=list[VideoPresentationRead])
+async def read_video_presentations(
+ skip: int = 0,
+ limit: int = 100,
+ search_space_id: int | None = None,
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+):
+ """
+ List video presentations the user has access to.
+ Requires VIDEO_PRESENTATIONS_READ permission for the search space(s).
+ """
+ if skip < 0 or limit < 1:
+ raise HTTPException(status_code=400, detail="Invalid pagination parameters")
+ try:
+ if search_space_id is not None:
+ await check_permission(
+ session,
+ user,
+ search_space_id,
+ Permission.VIDEO_PRESENTATIONS_READ.value,
+ "You don't have permission to read video presentations in this search space",
+ )
+ result = await session.execute(
+ select(VideoPresentation)
+ .filter(VideoPresentation.search_space_id == search_space_id)
+ .offset(skip)
+ .limit(limit)
+ )
+ else:
+ result = await session.execute(
+ select(VideoPresentation)
+ .join(SearchSpace)
+ .join(SearchSpaceMembership)
+ .filter(SearchSpaceMembership.user_id == user.id)
+ .offset(skip)
+ .limit(limit)
+ )
+ return [
+ VideoPresentationRead.from_orm_with_slides(vp)
+ for vp in result.scalars().all()
+ ]
+ except HTTPException:
+ raise
+ except SQLAlchemyError:
+ raise HTTPException(
+ status_code=500,
+ detail="Database error occurred while fetching video presentations",
+ ) from None
+
+
+@router.get(
+ "/video-presentations/{video_presentation_id}",
+ response_model=VideoPresentationRead,
+)
+async def read_video_presentation(
+ video_presentation_id: int,
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+):
+ """
+ Get a specific video presentation by ID.
+ Requires authentication with VIDEO_PRESENTATIONS_READ permission.
+
+ When status is "ready", the response includes:
+ - slides: parsed slide data with per-slide audio_url and durations
+ - scene_codes: Remotion component source code per slide
+ """
+ try:
+ result = await session.execute(
+ select(VideoPresentation).filter(
+ VideoPresentation.id == video_presentation_id
+ )
+ )
+ video_pres = result.scalars().first()
+
+ if not video_pres:
+ raise HTTPException(status_code=404, detail="Video presentation not found")
+
+ await check_permission(
+ session,
+ user,
+ video_pres.search_space_id,
+ Permission.VIDEO_PRESENTATIONS_READ.value,
+ "You don't have permission to read video presentations in this search space",
+ )
+
+ return VideoPresentationRead.from_orm_with_slides(video_pres)
+ except HTTPException as he:
+ raise he
+ except SQLAlchemyError:
+ raise HTTPException(
+ status_code=500,
+ detail="Database error occurred while fetching video presentation",
+ ) from None
+
+
+@router.delete("/video-presentations/{video_presentation_id}", response_model=dict)
+async def delete_video_presentation(
+ video_presentation_id: int,
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+):
+ """
+ Delete a video presentation.
+ Requires VIDEO_PRESENTATIONS_DELETE permission for the search space.
+ """
+ try:
+ result = await session.execute(
+ select(VideoPresentation).filter(
+ VideoPresentation.id == video_presentation_id
+ )
+ )
+ db_video_pres = result.scalars().first()
+
+ if not db_video_pres:
+ raise HTTPException(status_code=404, detail="Video presentation not found")
+
+ await check_permission(
+ session,
+ user,
+ db_video_pres.search_space_id,
+ Permission.VIDEO_PRESENTATIONS_DELETE.value,
+ "You don't have permission to delete video presentations in this search space",
+ )
+
+ await session.delete(db_video_pres)
+ await session.commit()
+ return {"message": "Video presentation deleted successfully"}
+ except HTTPException as he:
+ raise he
+ except SQLAlchemyError:
+ await session.rollback()
+ raise HTTPException(
+ status_code=500,
+ detail="Database error occurred while deleting video presentation",
+ ) from None
+
+
+@router.get("/video-presentations/{video_presentation_id}/slides/{slide_number}/audio")
+async def stream_slide_audio(
+ video_presentation_id: int,
+ slide_number: int,
+ session: AsyncSession = Depends(get_async_session),
+ user: User = Depends(current_active_user),
+):
+ """
+ Stream the audio file for a specific slide in a video presentation.
+ The slide_number is 1-based. Audio path is read from the slides JSONB.
+ """
+ try:
+ result = await session.execute(
+ select(VideoPresentation).filter(
+ VideoPresentation.id == video_presentation_id
+ )
+ )
+ video_pres = result.scalars().first()
+
+ if not video_pres:
+ raise HTTPException(status_code=404, detail="Video presentation not found")
+
+ await check_permission(
+ session,
+ user,
+ video_pres.search_space_id,
+ Permission.VIDEO_PRESENTATIONS_READ.value,
+ "You don't have permission to access video presentations in this search space",
+ )
+
+ slides = video_pres.slides or []
+ slide_data = None
+ for s in slides:
+ if s.get("slide_number") == slide_number:
+ slide_data = s
+ break
+
+ if not slide_data:
+ raise HTTPException(
+ status_code=404,
+ detail=f"Slide {slide_number} not found",
+ )
+
+ file_path = slide_data.get("audio_file")
+ if not file_path or not os.path.isfile(file_path):
+ raise HTTPException(status_code=404, detail="Slide audio file not found")
+
+ ext = Path(file_path).suffix.lower()
+ media_type = "audio/wav" if ext == ".wav" else "audio/mpeg"
+
+ def iterfile():
+ with open(file_path, mode="rb") as file_like:
+ yield from file_like
+
+ return StreamingResponse(
+ iterfile(),
+ media_type=media_type,
+ headers={
+ "Accept-Ranges": "bytes",
+ "Content-Disposition": f"inline; filename={Path(file_path).name}",
+ },
+ )
+
+ except HTTPException as he:
+ raise he
+ except Exception as e:
+ raise HTTPException(
+ status_code=500,
+ detail=f"Error streaming slide audio: {e!s}",
+ ) from e
diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py
index 7e3ba1936..11d3bfc06 100644
--- a/surfsense_backend/app/schemas/__init__.py
+++ b/surfsense_backend/app/schemas/__init__.py
@@ -101,6 +101,12 @@ from .search_space import (
SearchSpaceWithStats,
)
from .users import UserCreate, UserRead, UserUpdate
+from .video_presentations import (
+ VideoPresentationBase,
+ VideoPresentationCreate,
+ VideoPresentationRead,
+ VideoPresentationUpdate,
+)
__all__ = [
# Chat schemas (assistant-ui integration)
@@ -220,4 +226,9 @@ __all__ = [
"UserRead",
"UserSearchSpaceAccess",
"UserUpdate",
+ # Video Presentation schemas
+ "VideoPresentationBase",
+ "VideoPresentationCreate",
+ "VideoPresentationRead",
+ "VideoPresentationUpdate",
]
diff --git a/surfsense_backend/app/schemas/video_presentations.py b/surfsense_backend/app/schemas/video_presentations.py
new file mode 100644
index 000000000..ec29147ef
--- /dev/null
+++ b/surfsense_backend/app/schemas/video_presentations.py
@@ -0,0 +1,103 @@
+"""Video presentation schemas for API responses."""
+
+from datetime import datetime
+from enum import StrEnum
+from typing import Any
+
+from pydantic import BaseModel
+
+
+class VideoPresentationStatusEnum(StrEnum):
+ PENDING = "pending"
+ GENERATING = "generating"
+ READY = "ready"
+ FAILED = "failed"
+
+
+class VideoPresentationBase(BaseModel):
+ """Base video presentation schema."""
+
+ title: str
+ slides: list[dict[str, Any]] | None = None
+ scene_codes: list[dict[str, Any]] | None = None
+ search_space_id: int
+
+
+class VideoPresentationCreate(VideoPresentationBase):
+ """Schema for creating a video presentation."""
+
+ pass
+
+
+class VideoPresentationUpdate(BaseModel):
+ """Schema for updating a video presentation."""
+
+ title: str | None = None
+ slides: list[dict[str, Any]] | None = None
+ scene_codes: list[dict[str, Any]] | None = None
+
+
+class VideoPresentationRead(VideoPresentationBase):
+ """Schema for reading a video presentation."""
+
+ id: int
+ status: VideoPresentationStatusEnum = VideoPresentationStatusEnum.READY
+ created_at: datetime
+ slide_count: int | None = None
+
+ class Config:
+ from_attributes = True
+
+ @classmethod
+ def from_orm_with_slides(cls, obj):
+ """Create VideoPresentationRead with slide_count computed.
+
+ Replaces raw server file paths in `audio_file` with API streaming
+ URLs so the frontend can use them directly in Remotion .
+ """
+ slides = obj.slides
+ if slides:
+ slides = _replace_audio_paths_with_urls(obj.id, slides)
+
+ data = {
+ "id": obj.id,
+ "title": obj.title,
+ "slides": slides,
+ "scene_codes": obj.scene_codes,
+ "search_space_id": obj.search_space_id,
+ "status": obj.status,
+ "created_at": obj.created_at,
+ "slide_count": len(obj.slides) if obj.slides else None,
+ }
+ return cls(**data)
+
+
+def _replace_audio_paths_with_urls(
+ video_presentation_id: int,
+ slides: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+ """Replace server-local audio_file paths with streaming API URLs.
+
+ Transforms:
+ "audio_file": "video_presentation_audio/abc_slide_1.mp3"
+ Into:
+ "audio_url": "/api/v1/video-presentations/42/slides/1/audio"
+
+ The frontend passes this URL to Remotion's .
+ """
+ result = []
+ for slide in slides:
+ slide_copy = dict(slide)
+ slide_number = slide_copy.get("slide_number")
+ audio_file = slide_copy.pop("audio_file", None)
+
+ if audio_file and slide_number is not None:
+ slide_copy["audio_url"] = (
+ f"/api/v1/video-presentations/{video_presentation_id}"
+ f"/slides/{slide_number}/audio"
+ )
+ else:
+ slide_copy["audio_url"] = None
+
+ result.append(slide_copy)
+ return result
diff --git a/surfsense_backend/app/tasks/celery_tasks/video_presentation_tasks.py b/surfsense_backend/app/tasks/celery_tasks/video_presentation_tasks.py
new file mode 100644
index 000000000..d92dc29de
--- /dev/null
+++ b/surfsense_backend/app/tasks/celery_tasks/video_presentation_tasks.py
@@ -0,0 +1,178 @@
+"""Celery tasks for video presentation generation."""
+
+import asyncio
+import logging
+import sys
+
+from sqlalchemy import select
+
+from app.agents.video_presentation.graph import graph as video_presentation_graph
+from app.agents.video_presentation.state import State as VideoPresentationState
+from app.celery_app import celery_app
+from app.config import config
+from app.db import VideoPresentation, VideoPresentationStatus
+from app.tasks.celery_tasks import get_celery_session_maker
+
+logger = logging.getLogger(__name__)
+
+if sys.platform.startswith("win"):
+ try:
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+ except AttributeError:
+ logger.warning(
+ "WindowsProactorEventLoopPolicy is unavailable; async subprocess support may fail."
+ )
+
+
+def _clear_generating_video_presentation(search_space_id: int) -> None:
+ """Clear the generating video presentation marker from Redis when task completes."""
+ import redis
+
+ try:
+ client = redis.from_url(config.REDIS_APP_URL, decode_responses=True)
+ key = f"video_presentation:generating:{search_space_id}"
+ client.delete(key)
+ logger.info(
+ f"Cleared generating video presentation key for search_space_id={search_space_id}"
+ )
+ except Exception as e:
+ logger.warning(f"Could not clear generating video presentation key: {e}")
+
+
+@celery_app.task(name="generate_video_presentation", bind=True)
+def generate_video_presentation_task(
+ self,
+ video_presentation_id: int,
+ source_content: str,
+ search_space_id: int,
+ user_prompt: str | None = None,
+) -> dict:
+ """
+ Celery task to generate video presentation from source content.
+ Updates existing video presentation record created by the tool.
+ """
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+
+ try:
+ result = loop.run_until_complete(
+ _generate_video_presentation(
+ video_presentation_id,
+ source_content,
+ search_space_id,
+ user_prompt,
+ )
+ )
+ loop.run_until_complete(loop.shutdown_asyncgens())
+ return result
+ except Exception as e:
+ logger.error(f"Error generating video presentation: {e!s}")
+ loop.run_until_complete(_mark_video_presentation_failed(video_presentation_id))
+ return {"status": "failed", "video_presentation_id": video_presentation_id}
+ finally:
+ _clear_generating_video_presentation(search_space_id)
+ asyncio.set_event_loop(None)
+ loop.close()
+
+
+async def _mark_video_presentation_failed(video_presentation_id: int) -> None:
+ """Mark a video presentation as failed in the database."""
+ async with get_celery_session_maker()() as session:
+ try:
+ result = await session.execute(
+ select(VideoPresentation).filter(
+ VideoPresentation.id == video_presentation_id
+ )
+ )
+ video_pres = result.scalars().first()
+ if video_pres:
+ video_pres.status = VideoPresentationStatus.FAILED
+ await session.commit()
+ except Exception as e:
+ logger.error(f"Failed to mark video presentation as failed: {e}")
+
+
+async def _generate_video_presentation(
+ video_presentation_id: int,
+ source_content: str,
+ search_space_id: int,
+ user_prompt: str | None = None,
+) -> dict:
+ """Generate video presentation and update existing record."""
+ async with get_celery_session_maker()() as session:
+ result = await session.execute(
+ select(VideoPresentation).filter(
+ VideoPresentation.id == video_presentation_id
+ )
+ )
+ video_pres = result.scalars().first()
+
+ if not video_pres:
+ raise ValueError(f"VideoPresentation {video_presentation_id} not found")
+
+ try:
+ video_pres.status = VideoPresentationStatus.GENERATING
+ await session.commit()
+
+ graph_config = {
+ "configurable": {
+ "video_title": video_pres.title,
+ "search_space_id": search_space_id,
+ "user_prompt": user_prompt,
+ }
+ }
+
+ initial_state = VideoPresentationState(
+ source_content=source_content,
+ db_session=session,
+ )
+
+ graph_result = await video_presentation_graph.ainvoke(
+ initial_state, config=graph_config
+ )
+
+ # Serialize slides (parsed content + audio info merged)
+ slides_raw = graph_result.get("slides", [])
+ audio_results_raw = graph_result.get("slide_audio_results", [])
+ scene_codes_raw = graph_result.get("slide_scene_codes", [])
+
+ audio_map = {}
+ for ar in audio_results_raw:
+ data = ar.model_dump() if hasattr(ar, "model_dump") else ar
+ audio_map[data.get("slide_number", 0)] = data
+
+ serializable_slides = []
+ for slide in slides_raw:
+ slide_data = (
+ slide.model_dump() if hasattr(slide, "model_dump") else dict(slide)
+ )
+ audio_data = audio_map.get(slide_data.get("slide_number", 0), {})
+ slide_data["audio_file"] = audio_data.get("audio_file")
+ slide_data["duration_seconds"] = audio_data.get("duration_seconds")
+ slide_data["duration_in_frames"] = audio_data.get("duration_in_frames")
+ serializable_slides.append(slide_data)
+
+ serializable_scene_codes = []
+ for sc in scene_codes_raw:
+ sc_data = sc.model_dump() if hasattr(sc, "model_dump") else dict(sc)
+ serializable_scene_codes.append(sc_data)
+
+ video_pres.slides = serializable_slides
+ video_pres.scene_codes = serializable_scene_codes
+ video_pres.status = VideoPresentationStatus.READY
+ await session.commit()
+
+ logger.info(f"Successfully generated video presentation: {video_pres.id}")
+
+ return {
+ "status": "ready",
+ "video_presentation_id": video_pres.id,
+ "title": video_pres.title,
+ "slide_count": len(serializable_slides),
+ }
+
+ except Exception as e:
+ logger.error(f"Error in _generate_video_presentation: {e!s}")
+ video_pres.status = VideoPresentationStatus.FAILED
+ await session.commit()
+ raise
diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py
index 7abd22d4a..fbe8068f8 100644
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@@ -613,6 +613,41 @@ async def _stream_agent_events(
status="completed",
items=completed_items,
)
+ elif tool_name == "generate_video_presentation":
+ vp_status = (
+ tool_output.get("status", "unknown")
+ if isinstance(tool_output, dict)
+ else "unknown"
+ )
+ vp_title = (
+ tool_output.get("title", "Presentation")
+ if isinstance(tool_output, dict)
+ else "Presentation"
+ )
+ if vp_status in ("pending", "generating"):
+ completed_items = [
+ f"Title: {vp_title}",
+ "Presentation generation started",
+ "Processing in background...",
+ ]
+ elif vp_status == "failed":
+ error_msg = (
+ tool_output.get("error", "Unknown error")
+ if isinstance(tool_output, dict)
+ else "Unknown error"
+ )
+ completed_items = [
+ f"Title: {vp_title}",
+ f"Error: {error_msg[:50]}",
+ ]
+ else:
+ completed_items = last_active_step_items
+ yield streaming_service.format_thinking_step(
+ step_id=original_step_id,
+ title="Generating video presentation",
+ status="completed",
+ items=completed_items,
+ )
elif tool_name == "generate_report":
report_status = (
tool_output.get("status", "unknown")
@@ -756,6 +791,34 @@ async def _stream_agent_events(
f"Podcast generation failed: {error_msg}",
"error",
)
+ elif tool_name == "generate_video_presentation":
+ yield streaming_service.format_tool_output_available(
+ tool_call_id,
+ tool_output
+ if isinstance(tool_output, dict)
+ else {"result": tool_output},
+ )
+ if (
+ isinstance(tool_output, dict)
+ and tool_output.get("status") == "pending"
+ ):
+ yield streaming_service.format_terminal_info(
+ f"Video presentation queued: {tool_output.get('title', 'Presentation')}",
+ "success",
+ )
+ elif (
+ isinstance(tool_output, dict)
+ and tool_output.get("status") == "failed"
+ ):
+ error_msg = (
+ tool_output.get("error", "Unknown error")
+ if isinstance(tool_output, dict)
+ else "Unknown error"
+ )
+ yield streaming_service.format_terminal_info(
+ f"Presentation generation failed: {error_msg}",
+ "error",
+ )
elif tool_name == "link_preview":
yield streaming_service.format_tool_output_available(
tool_call_id,
diff --git a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
index 75d8f4ca3..cc9881d3b 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/documents/(manage)/components/DocumentsTableShell.tsx
@@ -472,21 +472,21 @@ export function DocumentsTableShell({
setBulkDeleteConfirmOpen(false);
}, [deletableSelectedIds, bulkDeleteDocuments, deleteDocument]);
+ const bulkDeleteBar = hasDeletableSelection ? (
+