feat: init video presentation agent

2026-07-10 22:32:16 +02:00 · 2026-03-21 22:13:41 -07:00 · 2026-03-21 22:13:41 -07:00 · b28f135a96
commit b28f135a96
parent 40d949b7d5
37 changed files with 3567 additions and 24 deletions
--- a/surfsense_backend/app/agents/new_chat/system_prompt.py
+++ b/surfsense_backend/app/agents/new_chat/system_prompt.py
@ -132,6 +132,17 @@ _TOOL_INSTRUCTIONS["generate_podcast"] = """
  - After calling this tool, inform the user that podcast generation has started and they will see the player when it's ready (takes 3-5 minutes).
 """

+_TOOL_INSTRUCTIONS["generate_video_presentation"] = """
+- generate_video_presentation: Generate a video presentation from provided content.
+  - Use this when the user asks to create a video, presentation, slides, or slide deck.
+  - Trigger phrases: "give me a presentation", "create slides", "generate a video", "make a slide deck", "turn this into a presentation"
+  - Args:
+    - source_content: The text content to turn into a presentation. The more detailed, the better.
+    - video_title: Optional title (default: "SurfSense Presentation")
+    - user_prompt: Optional style instructions (e.g., "Make it technical and detailed")
+  - After calling this tool, inform the user that generation has started and they will see the presentation when it's ready.
+"""
+
 _TOOL_INSTRUCTIONS["generate_report"] = """
 - generate_report: Generate or revise a structured Markdown report artifact.
  - WHEN TO CALL THIS TOOL — the message must contain a creation or modification VERB directed at producing a deliverable:
@ -438,6 +449,16 @@ _TOOL_EXAMPLES["generate_podcast"] = """
  - Then: `generate_podcast(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", podcast_title="Quantum Computing Explained")`
 """

+_TOOL_EXAMPLES["generate_video_presentation"] = """
+- User: "Give me a presentation about AI trends based on what we discussed"
+  - First search for relevant content, then call: `generate_video_presentation(source_content="Based on our conversation and search results: [detailed summary of chat + search findings]", video_title="AI Trends Presentation")`
+- User: "Create slides summarizing this conversation"
+  - Call: `generate_video_presentation(source_content="Complete conversation summary:\\n\\nUser asked about [topic 1]:\\n[Your detailed response]\\n\\nUser then asked about [topic 2]:\\n[Your detailed response]\\n\\n[Continue for all exchanges in the conversation]", video_title="Conversation Summary")`
+- User: "Make a video presentation about quantum computing"
+  - First search: `search_knowledge_base(query="quantum computing")`
+  - Then: `generate_video_presentation(source_content="Key insights about quantum computing from the knowledge base:\\n\\n[Comprehensive summary of all relevant search results with key facts, concepts, and findings]", video_title="Quantum Computing Explained")`
+"""
+
 _TOOL_EXAMPLES["generate_report"] = """
 - User: "Generate a report about AI trends"
  - Call: `generate_report(topic="AI Trends Report", source_strategy="kb_search", search_queries=["AI trends recent developments", "artificial intelligence industry trends", "AI market growth and predictions"], report_style="detailed")`
@ -499,6 +520,7 @@ _ALL_TOOL_NAMES_ORDERED = [
    "search_knowledge_base",
    "web_search",
    "generate_podcast",
+    "generate_video_presentation",
    "generate_report",
    "link_preview",
    "display_image",
--- a/surfsense_backend/app/agents/new_chat/tools/init.py
+++ b/surfsense_backend/app/agents/new_chat/tools/init.py
@ -8,6 +8,7 @@ Available tools:
 - search_knowledge_base: Search the user's personal knowledge base
 - search_surfsense_docs: Search Surfsense documentation for usage help
 - generate_podcast: Generate audio podcasts from content
+- generate_video_presentation: Generate video presentations with slides and narration
 - generate_image: Generate images from text descriptions using AI models
 - link_preview: Fetch rich previews for URLs
 - display_image: Display images in chat
@ -39,6 +40,7 @@ from .registry import (
 from .scrape_webpage import create_scrape_webpage_tool
 from .search_surfsense_docs import create_search_surfsense_docs_tool
 from .user_memory import create_recall_memory_tool, create_save_memory_tool
+from .video_presentation import create_generate_video_presentation_tool

 __all__ = [
    # Registry
@ -51,6 +53,7 @@ __all__ = [
    "create_display_image_tool",
    "create_generate_image_tool",
    "create_generate_podcast_tool",
+    "create_generate_video_presentation_tool",
    "create_link_preview_tool",
    "create_recall_memory_tool",
    "create_save_memory_tool",
--- a/surfsense_backend/app/agents/new_chat/tools/registry.py
+++ b/surfsense_backend/app/agents/new_chat/tools/registry.py
@ -73,6 +73,7 @@ from .shared_memory import (
    create_save_shared_memory_tool,
 )
 from .user_memory import create_recall_memory_tool, create_save_memory_tool
+from .video_presentation import create_generate_video_presentation_tool
 from .web_search import create_web_search_tool

 # =============================================================================
@ -136,6 +137,17 @@ BUILTIN_TOOLS: list[ToolDefinition] = [
        ),
        requires=["search_space_id", "db_session", "thread_id"],
    ),
+    # Video presentation generation tool
+    ToolDefinition(
+        name="generate_video_presentation",
+        description="Generate a video presentation with slides and narration from provided content",
+        factory=lambda deps: create_generate_video_presentation_tool(
+            search_space_id=deps["search_space_id"],
+            db_session=deps["db_session"],
+            thread_id=deps["thread_id"],
+        ),
+        requires=["search_space_id", "db_session", "thread_id"],
+    ),
    # Report generation tool (inline, short-lived sessions for DB ops)
    # Supports internal KB search via source_strategy so the agent doesn't
    # need to call search_knowledge_base separately before generating.
--- a/surfsense_backend/app/agents/new_chat/tools/video_presentation.py
+++ b/surfsense_backend/app/agents/new_chat/tools/video_presentation.py
@ -0,0 +1,171 @@
+"""
+Video presentation generation tool for the SurfSense agent.
+
+This module provides a factory function for creating the generate_video_presentation
+tool that submits a Celery task for background video presentation generation.
+The frontend polls for completion and auto-updates when the presentation is ready.
+
+Duplicate request prevention:
+- Only one video presentation can be generated at a time per search space
+- Uses Redis to track active video presentation tasks
+- Validates the Redis marker against actual DB status to avoid stale locks
+"""
+
+from typing import Any
+
+import redis
+from langchain_core.tools import tool
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.config import config
+from app.db import VideoPresentation, VideoPresentationStatus
+
+REDIS_URL = config.REDIS_APP_URL
+_redis_client: redis.Redis | None = None
+
+
+def get_redis_client() -> redis.Redis:
+    """Get or create Redis client for video presentation task tracking."""
+    global _redis_client
+    if _redis_client is None:
+        _redis_client = redis.from_url(REDIS_URL, decode_responses=True)
+    return _redis_client
+
+
+def _redis_key(search_space_id: int) -> str:
+    return f"video_presentation:generating:{search_space_id}"
+
+
+def get_generating_video_presentation_id(search_space_id: int) -> int | None:
+    """Get the video presentation ID currently being generated for this search space."""
+    try:
+        client = get_redis_client()
+        value = client.get(_redis_key(search_space_id))
+        return int(value) if value else None
+    except Exception:
+        return None
+
+
+def clear_generating_video_presentation(search_space_id: int) -> None:
+    """Clear the generating marker (used when we detect a stale lock)."""
+    try:
+        client = get_redis_client()
+        client.delete(_redis_key(search_space_id))
+    except Exception:
+        pass
+
+
+def set_generating_video_presentation(
+    search_space_id: int, video_presentation_id: int
+) -> None:
+    """Mark a video presentation as currently generating for this search space."""
+    try:
+        client = get_redis_client()
+        client.setex(_redis_key(search_space_id), 1800, str(video_presentation_id))
+    except Exception as e:
+        print(
+            f"[generate_video_presentation] Warning: Could not set generating video presentation in Redis: {e}"
+        )
+
+
+def create_generate_video_presentation_tool(
+    search_space_id: int,
+    db_session: AsyncSession,
+    thread_id: int | None = None,
+):
+    """
+    Factory function to create the generate_video_presentation tool with injected dependencies.
+
+    Pre-creates video presentation record with pending status so the ID is available
+    immediately for frontend polling.
+    """
+
+    @tool
+    async def generate_video_presentation(
+        source_content: str,
+        video_title: str = "SurfSense Presentation",
+        user_prompt: str | None = None,
+    ) -> dict[str, Any]:
+        """Generate a video presentation from the provided content.
+
+        Use this tool when the user asks to create a video, presentation, slides, or slide deck.
+
+        Args:
+            source_content: The text content to turn into a presentation.
+            video_title: Title for the presentation (default: "SurfSense Presentation")
+            user_prompt: Optional style/tone instructions.
+        """
+        try:
+            generating_id = get_generating_video_presentation_id(search_space_id)
+            if generating_id:
+                result = await db_session.execute(
+                    select(VideoPresentation).filter(
+                        VideoPresentation.id == generating_id
+                    )
+                )
+                existing = result.scalars().first()
+
+                if existing and existing.status == VideoPresentationStatus.GENERATING:
+                    print(
+                        f"[generate_video_presentation] Blocked duplicate — "
+                        f"presentation {generating_id} is actively generating"
+                    )
+                    return {
+                        "status": VideoPresentationStatus.GENERATING.value,
+                        "video_presentation_id": generating_id,
+                        "title": video_title,
+                        "message": "A video presentation is already being generated. Please wait for it to complete.",
+                    }
+
+                print(
+                    f"[generate_video_presentation] Stale Redis lock for presentation {generating_id} "
+                    f"(status={existing.status if existing else 'not found'}). Clearing and proceeding."
+                )
+                clear_generating_video_presentation(search_space_id)
+
+            video_pres = VideoPresentation(
+                title=video_title,
+                status=VideoPresentationStatus.PENDING,
+                search_space_id=search_space_id,
+                thread_id=thread_id,
+            )
+            db_session.add(video_pres)
+            await db_session.commit()
+            await db_session.refresh(video_pres)
+
+            from app.tasks.celery_tasks.video_presentation_tasks import (
+                generate_video_presentation_task,
+            )
+
+            task = generate_video_presentation_task.delay(
+                video_presentation_id=video_pres.id,
+                source_content=source_content,
+                search_space_id=search_space_id,
+                user_prompt=user_prompt,
+            )
+
+            set_generating_video_presentation(search_space_id, video_pres.id)
+
+            print(
+                f"[generate_video_presentation] Created video presentation {video_pres.id}, task: {task.id}"
+            )
+
+            return {
+                "status": VideoPresentationStatus.PENDING.value,
+                "video_presentation_id": video_pres.id,
+                "title": video_title,
+                "message": "Video presentation generation started. This may take a few minutes.",
+            }
+
+        except Exception as e:
+            error_message = str(e)
+            print(f"[generate_video_presentation] Error: {error_message}")
+            return {
+                "status": VideoPresentationStatus.FAILED.value,
+                "error": error_message,
+                "title": video_title,
+                "video_presentation_id": None,
+            }
+
+    return generate_video_presentation
--- a/surfsense_backend/app/agents/video_presentation/init.py
+++ b/surfsense_backend/app/agents/video_presentation/init.py
@ -0,0 +1,10 @@
+"""Video Presentation LangGraph Agent.
+
+This module defines a graph for generating video presentations
+from source content, similar to the podcaster agent but producing
+slide-based video presentations with TTS narration.
+"""
+
+from .graph import graph
+
+__all__ = ["graph"]
--- a/surfsense_backend/app/agents/video_presentation/configuration.py
+++ b/surfsense_backend/app/agents/video_presentation/configuration.py
@ -0,0 +1,25 @@
+"""Define the configurable parameters for the video presentation agent."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, fields
+
+from langchain_core.runnables import RunnableConfig
+
+
+@dataclass(kw_only=True)
+class Configuration:
+    """The configuration for the video presentation agent."""
+
+    video_title: str
+    search_space_id: int
+    user_prompt: str | None = None
+
+    @classmethod
+    def from_runnable_config(
+        cls, config: RunnableConfig | None = None
+    ) -> Configuration:
+        """Create a Configuration instance from a RunnableConfig object."""
+        configurable = (config.get("configurable") or {}) if config else {}
+        _fields = {f.name for f in fields(cls) if f.init}
+        return cls(**{k: v for k, v in configurable.items() if k in _fields})
--- a/surfsense_backend/app/agents/video_presentation/graph.py
+++ b/surfsense_backend/app/agents/video_presentation/graph.py
@ -0,0 +1,30 @@
+from langgraph.graph import StateGraph
+
+from .configuration import Configuration
+from .nodes import (
+    create_presentation_slides,
+    create_slide_audio,
+    generate_slide_scene_codes,
+)
+from .state import State
+
+
+def build_graph():
+    workflow = StateGraph(State, config_schema=Configuration)
+
+    workflow.add_node("create_presentation_slides", create_presentation_slides)
+    workflow.add_node("create_slide_audio", create_slide_audio)
+    workflow.add_node("generate_slide_scene_codes", generate_slide_scene_codes)
+
+    workflow.add_edge("__start__", "create_presentation_slides")
+    workflow.add_edge("create_presentation_slides", "create_slide_audio")
+    workflow.add_edge("create_slide_audio", "generate_slide_scene_codes")
+    workflow.add_edge("generate_slide_scene_codes", "__end__")
+
+    graph = workflow.compile()
+    graph.name = "Surfsense Video Presentation"
+
+    return graph
+
+
+graph = build_graph()
--- a/surfsense_backend/app/agents/video_presentation/nodes.py
+++ b/surfsense_backend/app/agents/video_presentation/nodes.py
@ -0,0 +1,552 @@
+import asyncio
+import contextlib
+import json
+import math
+import os
+import shutil
+import uuid
+from pathlib import Path
+from typing import Any
+
+from ffmpeg.asyncio import FFmpeg
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.runnables import RunnableConfig
+from litellm import aspeech
+
+from app.config import config as app_config
+from app.services.kokoro_tts_service import get_kokoro_tts_service
+from app.services.llm_service import get_agent_llm
+
+from .configuration import Configuration
+from .prompts import (
+    DEFAULT_DURATION_IN_FRAMES,
+    FPS,
+    REFINE_SCENE_SYSTEM_PROMPT,
+    REMOTION_SCENE_SYSTEM_PROMPT,
+    THEME_PRESETS,
+    build_scene_generation_user_prompt,
+    build_theme_assignment_user_prompt,
+    get_slide_generation_prompt,
+    get_theme_assignment_system_prompt,
+    pick_theme_and_mode_fallback,
+)
+from .state import (
+    PresentationSlides,
+    SlideAudioResult,
+    SlideContent,
+    SlideSceneCode,
+    State,
+)
+from .utils import get_voice_for_provider
+
+MAX_REFINE_ATTEMPTS = 3
+
+
+async def create_presentation_slides(
+    state: State, config: RunnableConfig
+) -> dict[str, Any]:
+    """Parse source content into structured presentation slides using LLM."""
+
+    configuration = Configuration.from_runnable_config(config)
+    search_space_id = configuration.search_space_id
+    user_prompt = configuration.user_prompt
+
+    llm = await get_agent_llm(state.db_session, search_space_id)
+    if not llm:
+        error_message = f"No LLM configured for search space {search_space_id}"
+        print(error_message)
+        raise RuntimeError(error_message)
+
+    prompt = get_slide_generation_prompt(user_prompt)
+
+    messages = [
+        SystemMessage(content=prompt),
+        HumanMessage(
+            content=f"<source_content>{state.source_content}</source_content>"
+        ),
+    ]
+
+    llm_response = await llm.ainvoke(messages)
+
+    try:
+        presentation = PresentationSlides.model_validate(
+            json.loads(llm_response.content)
+        )
+    except (json.JSONDecodeError, ValueError) as e:
+        print(f"Direct JSON parsing failed, trying fallback approach: {e!s}")
+
+        try:
+            content = llm_response.content
+            json_start = content.find("{")
+            json_end = content.rfind("}") + 1
+            if json_start >= 0 and json_end > json_start:
+                json_str = content[json_start:json_end]
+                parsed_data = json.loads(json_str)
+                presentation = PresentationSlides.model_validate(parsed_data)
+                print("Successfully parsed presentation slides using fallback approach")
+            else:
+                error_message = f"Could not find valid JSON in LLM response. Raw response: {content}"
+                print(error_message)
+                raise ValueError(error_message)
+
+        except (json.JSONDecodeError, ValueError) as e2:
+            error_message = f"Error parsing LLM response (fallback also failed): {e2!s}"
+            print(f"Error parsing LLM response: {e2!s}")
+            print(f"Raw response: {llm_response.content}")
+            raise
+
+    return {"slides": presentation.slides}
+
+
+async def create_slide_audio(state: State, config: RunnableConfig) -> dict[str, Any]:
+    """Generate TTS audio for each slide.
+
+    Each slide's speaker_transcripts are generated as individual TTS chunks,
+    then concatenated with ffmpeg (matching the POC in RemotionTets/api/tts).
+    """
+
+    session_id = str(uuid.uuid4())
+    temp_dir = Path("temp_audio")
+    temp_dir.mkdir(exist_ok=True)
+    output_dir = Path("video_presentation_audio")
+    output_dir.mkdir(exist_ok=True)
+
+    slides = state.slides or []
+    voice = get_voice_for_provider(app_config.TTS_SERVICE, speaker_id=0)
+    ext = "wav" if app_config.TTS_SERVICE == "local/kokoro" else "mp3"
+
+    async def _generate_tts_chunk(text: str, chunk_path: str) -> str:
+        """Generate a single TTS chunk and write it to *chunk_path*."""
+        if app_config.TTS_SERVICE == "local/kokoro":
+            kokoro_service = await get_kokoro_tts_service(lang_code="a")
+            await kokoro_service.generate_speech(
+                text=text,
+                voice=voice,
+                speed=1.0,
+                output_path=chunk_path,
+            )
+        else:
+            kwargs: dict[str, Any] = {
+                "model": app_config.TTS_SERVICE,
+                "api_key": app_config.TTS_SERVICE_API_KEY,
+                "voice": voice,
+                "input": text,
+                "max_retries": 2,
+                "timeout": 600,
+            }
+            if app_config.TTS_SERVICE_API_BASE:
+                kwargs["api_base"] = app_config.TTS_SERVICE_API_BASE
+
+            response = await aspeech(**kwargs)
+            with open(chunk_path, "wb") as f:
+                f.write(response.content)
+
+        return chunk_path
+
+    async def _concat_with_ffmpeg(chunk_paths: list[str], output_file: str) -> None:
+        """Concatenate multiple audio chunks into one file using async ffmpeg."""
+        ffmpeg = FFmpeg().option("y")
+        for chunk in chunk_paths:
+            ffmpeg = ffmpeg.input(chunk)
+
+        filter_parts = [f"[{i}:0]" for i in range(len(chunk_paths))]
+        filter_str = (
+            "".join(filter_parts) + f"concat=n={len(chunk_paths)}:v=0:a=1[outa]"
+        )
+        ffmpeg = ffmpeg.option("filter_complex", filter_str)
+        ffmpeg = ffmpeg.output(output_file, map="[outa]")
+        await ffmpeg.execute()
+
+    async def generate_audio_for_slide(slide: SlideContent) -> SlideAudioResult:
+        has_transcripts = (
+            slide.speaker_transcripts and len(slide.speaker_transcripts) > 0
+        )
+
+        if not has_transcripts:
+            print(
+                f"Slide {slide.slide_number}: no speaker_transcripts, "
+                f"using default duration ({DEFAULT_DURATION_IN_FRAMES} frames)"
+            )
+            return SlideAudioResult(
+                slide_number=slide.slide_number,
+                audio_file="",
+                duration_seconds=DEFAULT_DURATION_IN_FRAMES / FPS,
+                duration_in_frames=DEFAULT_DURATION_IN_FRAMES,
+            )
+
+        output_file = str(output_dir / f"{session_id}_slide_{slide.slide_number}.{ext}")
+
+        chunk_paths: list[str] = []
+        try:
+            for i, text in enumerate(slide.speaker_transcripts):
+                chunk_path = str(
+                    temp_dir
+                    / f"{session_id}_slide_{slide.slide_number}_chunk_{i}.{ext}"
+                )
+                print(
+                    f"  Slide {slide.slide_number} chunk {i + 1}/"
+                    f"{len(slide.speaker_transcripts)}: "
+                    f'"{text[:60]}..."'
+                )
+                await _generate_tts_chunk(text, chunk_path)
+                chunk_paths.append(chunk_path)
+
+            if len(chunk_paths) == 1:
+                shutil.move(chunk_paths[0], output_file)
+            else:
+                print(
+                    f"  Concatenating {len(chunk_paths)} chunks for slide "
+                    f"{slide.slide_number} with ffmpeg"
+                )
+                await _concat_with_ffmpeg(chunk_paths, output_file)
+
+            duration_seconds = await _get_audio_duration(output_file)
+            duration_in_frames = math.ceil(duration_seconds * FPS)
+
+            return SlideAudioResult(
+                slide_number=slide.slide_number,
+                audio_file=output_file,
+                duration_seconds=duration_seconds,
+                duration_in_frames=max(duration_in_frames, DEFAULT_DURATION_IN_FRAMES),
+            )
+
+        except Exception as e:
+            print(f"Error generating audio for slide {slide.slide_number}: {e!s}")
+            raise
+        finally:
+            for p in chunk_paths:
+                with contextlib.suppress(OSError):
+                    os.remove(p)
+
+    tasks = [generate_audio_for_slide(slide) for slide in slides]
+    audio_results = await asyncio.gather(*tasks)
+
+    audio_results_sorted = sorted(audio_results, key=lambda r: r.slide_number)
+
+    print(
+        f"Generated audio for {len(audio_results_sorted)} slides "
+        f"(total duration: {sum(r.duration_seconds for r in audio_results_sorted):.1f}s)"
+    )
+
+    return {"slide_audio_results": audio_results_sorted}
+
+
+async def _get_audio_duration(file_path: str) -> float:
+    """Get audio duration in seconds using ffprobe (via python-ffmpeg).
+
+    Falls back to file-size estimation if ffprobe fails.
+    """
+    try:
+        import subprocess
+
+        proc = await asyncio.create_subprocess_exec(
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=noprint_wrappers=1:nokey=1",
+            file_path,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+        )
+        stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
+        if proc.returncode == 0 and stdout.strip():
+            return float(stdout.strip())
+    except Exception as e:
+        print(f"ffprobe failed for {file_path}: {e!s}, using file-size estimation")
+
+    try:
+        file_size = os.path.getsize(file_path)
+        if file_path.endswith(".wav"):
+            return file_size / (16000 * 2)
+        else:
+            return file_size / 16000
+    except Exception:
+        return DEFAULT_DURATION_IN_FRAMES / FPS
+
+
+async def _assign_themes_with_llm(
+    llm, slides: list[SlideContent]
+) -> dict[int, tuple[str, str]]:
+    """Ask the LLM to assign a theme+mode to each slide in one call.
+
+    Returns a dict mapping slide_number → (theme, mode).
+    Falls back to round-robin if the LLM response can't be parsed.
+    """
+    total = len(slides)
+    slide_summaries = [
+        {
+            "slide_number": s.slide_number,
+            "title": s.title,
+            "subtitle": s.subtitle or "",
+            "background_explanation": s.background_explanation or "",
+        }
+        for s in slides
+    ]
+
+    system = get_theme_assignment_system_prompt()
+    user = build_theme_assignment_user_prompt(slide_summaries)
+
+    try:
+        response = await llm.ainvoke(
+            [
+                SystemMessage(content=system),
+                HumanMessage(content=user),
+            ]
+        )
+
+        text = response.content.strip()
+        if text.startswith("```"):
+            lines = text.split("\n")
+            text = "\n".join(
+                line for line in lines if not line.strip().startswith("```")
+            ).strip()
+
+        assignments = json.loads(text)
+        valid_themes = set(THEME_PRESETS)
+        result: dict[int, tuple[str, str]] = {}
+        for entry in assignments:
+            sn = entry.get("slide_number")
+            theme = entry.get("theme", "").upper()
+            mode = entry.get("mode", "dark").lower()
+            if sn and theme in valid_themes and mode in ("dark", "light"):
+                result[sn] = (theme, mode)
+
+        if len(result) == total:
+            print(
+                "LLM theme assignment: "
+                + ", ".join(f"S{sn}={t}/{m}" for sn, (t, m) in sorted(result.items()))
+            )
+            return result
+
+        print(
+            f"LLM returned {len(result)}/{total} valid assignments, "
+            "filling gaps with fallback"
+        )
+        for s in slides:
+            if s.slide_number not in result:
+                result[s.slide_number] = pick_theme_and_mode_fallback(
+                    s.slide_number - 1, total
+                )
+        return result
+
+    except Exception as e:
+        print(f"LLM theme assignment failed ({e!s}), using fallback")
+        return {
+            s.slide_number: pick_theme_and_mode_fallback(s.slide_number - 1, total)
+            for s in slides
+        }
+
+
+async def generate_slide_scene_codes(
+    state: State, config: RunnableConfig
+) -> dict[str, Any]:
+    """Generate Remotion component code for each slide using LLM.
+
+    First assigns a theme+mode to every slide via a single LLM call,
+    then generates scene code per slide with the assigned theme.
+    """
+
+    configuration = Configuration.from_runnable_config(config)
+    search_space_id = configuration.search_space_id
+
+    llm = await get_agent_llm(state.db_session, search_space_id)
+    if not llm:
+        raise RuntimeError(f"No LLM configured for search space {search_space_id}")
+
+    slides = state.slides or []
+    audio_results = state.slide_audio_results or []
+
+    audio_map: dict[int, SlideAudioResult] = {r.slide_number: r for r in audio_results}
+    total_slides = len(slides)
+
+    theme_assignments = await _assign_themes_with_llm(llm, slides)
+
+    scene_codes: list[SlideSceneCode] = []
+
+    for slide in slides:
+        audio = audio_map.get(slide.slide_number)
+        duration = audio.duration_in_frames if audio else DEFAULT_DURATION_IN_FRAMES
+
+        theme, mode = theme_assignments.get(
+            slide.slide_number,
+            pick_theme_and_mode_fallback(slide.slide_number - 1, total_slides),
+        )
+
+        user_prompt = build_scene_generation_user_prompt(
+            slide_number=slide.slide_number,
+            total_slides=total_slides,
+            title=slide.title,
+            subtitle=slide.subtitle,
+            content_in_markdown=slide.content_in_markdown,
+            background_explanation=slide.background_explanation,
+            duration_in_frames=duration,
+            theme=theme,
+            mode=mode,
+        )
+
+        messages = [
+            SystemMessage(content=REMOTION_SCENE_SYSTEM_PROMPT),
+            HumanMessage(content=user_prompt),
+        ]
+
+        print(
+            f"Generating scene code for slide {slide.slide_number}/{total_slides}: "
+            f'"{slide.title}" ({duration} frames)'
+        )
+
+        llm_response = await llm.ainvoke(messages)
+        code, scene_title = _extract_code_and_title(llm_response.content)
+
+        code = await _refine_if_needed(llm, code, slide.slide_number)
+
+        scene_codes.append(
+            SlideSceneCode(
+                slide_number=slide.slide_number,
+                code=code,
+                title=scene_title or slide.title,
+            )
+        )
+
+        print(f"Scene code ready for slide {slide.slide_number} ({len(code)} chars)")
+
+    return {"slide_scene_codes": scene_codes}
+
+
+def _extract_code_and_title(content: str) -> tuple[str, str | None]:
+    """Extract code and optional title from LLM response.
+
+    The LLM may return a JSON object like the POC's structured output:
+      { "code": "...", "title": "..." }
+    Or it may return raw code (with optional markdown fences).
+
+    Returns (code, title) where title may be None.
+    """
+    text = content.strip()
+
+    if text.startswith("{"):
+        try:
+            parsed = json.loads(text)
+            if isinstance(parsed, dict) and "code" in parsed:
+                return parsed["code"], parsed.get("title")
+        except (json.JSONDecodeError, ValueError):
+            pass
+
+        json_start = text.find("{")
+        json_end = text.rfind("}") + 1
+        if json_start >= 0 and json_end > json_start:
+            try:
+                parsed = json.loads(text[json_start:json_end])
+                if isinstance(parsed, dict) and "code" in parsed:
+                    return parsed["code"], parsed.get("title")
+            except (json.JSONDecodeError, ValueError):
+                pass
+
+    code = text
+    if code.startswith("```"):
+        lines = code.split("\n")
+        start = 1
+        end = len(lines)
+        for i in range(len(lines) - 1, 0, -1):
+            if lines[i].strip().startswith("```"):
+                end = i
+                break
+        code = "\n".join(lines[start:end]).strip()
+
+    return code, None
+
+
+async def _refine_if_needed(llm, code: str, slide_number: int) -> str:
+    """Attempt basic syntax validation and auto-repair via LLM if needed.
+
+    Raises RuntimeError if the code is still invalid after MAX_REFINE_ATTEMPTS,
+    matching the POC's behavior where a failed slide aborts the pipeline.
+    """
+    error = _basic_syntax_check(code)
+    if error is None:
+        return code
+
+    for attempt in range(1, MAX_REFINE_ATTEMPTS + 1):
+        print(
+            f"Slide {slide_number}: syntax issue (attempt {attempt}/{MAX_REFINE_ATTEMPTS}): {error}"
+        )
+
+        messages = [
+            SystemMessage(content=REFINE_SCENE_SYSTEM_PROMPT),
+            HumanMessage(
+                content=(
+                    f"Here is the broken Remotion component code:\n\n{code}\n\n"
+                    f"Compilation error:\n{error}\n\nFix the code."
+                )
+            ),
+        ]
+
+        response = await llm.ainvoke(messages)
+        code, _ = _extract_code_and_title(response.content)
+
+        error = _basic_syntax_check(code)
+        if error is None:
+            print(f"Slide {slide_number}: fixed on attempt {attempt}")
+            return code
+
+    raise RuntimeError(
+        f"Slide {slide_number} failed to compile after {MAX_REFINE_ATTEMPTS} "
+        f"refine attempts. Last error: {error}"
+    )
+
+
+def _basic_syntax_check(code: str) -> str | None:
+    """Run a lightweight syntax check on the generated code.
+
+    Full Babel-based compilation happens on the frontend. This backend check
+    catches the most common LLM code-generation mistakes so the refine loop
+    can fix them before persisting.
+
+    Returns an error description or None if the code looks valid.
+    """
+    if not code or not code.strip():
+        return "Empty code"
+
+    if "export" not in code and "MyComposition" not in code:
+        return "Missing exported component (expected 'export const MyComposition')"
+
+    brace_count = 0
+    paren_count = 0
+    bracket_count = 0
+    for ch in code:
+        if ch == "{":
+            brace_count += 1
+        elif ch == "}":
+            brace_count -= 1
+        elif ch == "(":
+            paren_count += 1
+        elif ch == ")":
+            paren_count -= 1
+        elif ch == "[":
+            bracket_count += 1
+        elif ch == "]":
+            bracket_count -= 1
+
+        if brace_count < 0:
+            return "Unmatched closing brace '}'"
+        if paren_count < 0:
+            return "Unmatched closing parenthesis ')'"
+        if bracket_count < 0:
+            return "Unmatched closing bracket ']'"
+
+    if brace_count != 0:
+        return f"Unbalanced braces: {brace_count} unclosed"
+    if paren_count != 0:
+        return f"Unbalanced parentheses: {paren_count} unclosed"
+    if bracket_count != 0:
+        return f"Unbalanced brackets: {bracket_count} unclosed"
+
+    if "useCurrentFrame" not in code:
+        return "Missing useCurrentFrame() — required for Remotion animations"
+
+    if "AbsoluteFill" not in code:
+        return "Missing AbsoluteFill — required as the root layout component"
+
+    return None
--- a/surfsense_backend/app/agents/video_presentation/prompts.py
+++ b/surfsense_backend/app/agents/video_presentation/prompts.py
@ -0,0 +1,509 @@
+import datetime
+
+# TODO: move these to config file
+MAX_SLIDES = 5
+FPS = 30
+DEFAULT_DURATION_IN_FRAMES = 300
+
+THEME_PRESETS = [
+    "TERRA",
+    "OCEAN",
+    "SUNSET",
+    "EMERALD",
+    "ECLIPSE",
+    "ROSE",
+    "FROST",
+    "NEBULA",
+    "AURORA",
+    "CORAL",
+    "MIDNIGHT",
+    "AMBER",
+    "LAVENDER",
+    "STEEL",
+    "CITRUS",
+    "CHERRY",
+]
+
+THEME_DESCRIPTIONS: dict[str, str] = {
+    "TERRA": "Warm earthy tones — terracotta, olive. Heritage, tradition, organic warmth.",
+    "OCEAN": "Cool oceanic depth — teal, coral accents. Calm, marine, fluid elegance.",
+    "SUNSET": "Vibrant warm energy — orange, purple. Passion, creativity, bold expression.",
+    "EMERALD": "Fresh natural life — green, mint. Growth, health, sustainability.",
+    "ECLIPSE": "Dramatic luxury — black, gold. Premium, power, prestige.",
+    "ROSE": "Soft elegance — dusty pink, mauve. Beauty, care, refined femininity.",
+    "FROST": "Crisp clarity — ice blue, silver. Tech, data, precision analytics.",
+    "NEBULA": "Cosmic mystery — magenta, deep purple. AI, innovation, cutting-edge future.",
+    "AURORA": "Ethereal northern lights — green-teal, violet. Mystical, transformative, wonder.",
+    "CORAL": "Tropical warmth — coral, turquoise. Inviting, lively, community.",
+    "MIDNIGHT": "Deep sophistication — navy, silver. Contemplative, trust, authority.",
+    "AMBER": "Rich honey warmth — amber, brown. Comfort, wisdom, organic richness.",
+    "LAVENDER": "Gentle dreaminess — purple, lilac. Calm, imaginative, serene.",
+    "STEEL": "Industrial strength — gray, steel blue. Modern professional, reliability.",
+    "CITRUS": "Bright optimism — yellow, lime. Energy, joy, fresh starts.",
+    "CHERRY": "Bold impact — deep red, dark. Power, urgency, passionate conviction.",
+}
+
+
+# ---------------------------------------------------------------------------
+# LLM-based theme assignment (replaces keyword-based pick_theme_and_mode)
+# ---------------------------------------------------------------------------
+
+THEME_ASSIGNMENT_SYSTEM_PROMPT = """You are a visual design director assigning color themes to presentation slides.
+Given a list of slides, assign each slide a theme preset and color mode (dark or light).
+
+Available themes (name — description):
+{theme_list}
+
+Rules:
+1. Pick the theme that best matches each slide's mood, content, and visual direction.
+2. Maximize visual variety — avoid repeating the same theme on consecutive slides.
+3. Mix dark and light modes across the presentation for contrast and rhythm.
+4. Opening slides often benefit from a bold dark theme; closing/summary slides can go either way.
+5. The "background_explanation" field is the primary signal — it describes the intended mood and color direction.
+
+Return ONLY a JSON array (no markdown fences, no explanation):
+[
+  {{"slide_number": 1, "theme": "THEME_NAME", "mode": "dark"}},
+  {{"slide_number": 2, "theme": "THEME_NAME", "mode": "light"}}
+]
+""".strip()
+
+
+def build_theme_assignment_user_prompt(
+    slides: list[dict[str, str]],
+) -> str:
+    """Build the user prompt for LLM theme assignment.
+
+    *slides* is a list of dicts with keys: slide_number, title, subtitle,
+    background_explanation (mood).
+    """
+    lines = ["Assign a theme and mode to each of these slides:", ""]
+    for s in slides:
+        lines.append(
+            f'Slide {s["slide_number"]}: "{s["title"]}" '
+            f'(subtitle: "{s.get("subtitle", "")}") — '
+            f'Mood: "{s.get("background_explanation", "neutral")}"'
+        )
+    return "\n".join(lines)
+
+
+def get_theme_assignment_system_prompt() -> str:
+    """Return the theme assignment system prompt with the full theme list injected."""
+    theme_list = "\n".join(
+        f"- {name}: {desc}" for name, desc in THEME_DESCRIPTIONS.items()
+    )
+    return THEME_ASSIGNMENT_SYSTEM_PROMPT.format(theme_list=theme_list)
+
+
+def pick_theme_and_mode_fallback(
+    slide_index: int, total_slides: int
+) -> tuple[str, str]:
+    """Simple round-robin fallback when LLM theme assignment fails."""
+    theme = THEME_PRESETS[slide_index % len(THEME_PRESETS)]
+    mode = "dark" if slide_index % 2 == 0 else "light"
+    if total_slides == 1:
+        mode = "dark"
+    return theme, mode
+
+
+def get_slide_generation_prompt(user_prompt: str | None = None) -> str:
+    return f"""
+Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")}
+<video_presentation_system>
+You are a content-to-slides converter. You receive raw source content (articles, notes, transcripts,
+product descriptions, chat conversations, etc.) and break it into a sequence of presentation slides
+for a video presentation with voiceover narration.
+
+{
+        f'''
+You **MUST** strictly adhere to the following user instruction while generating the slides:
+<user_instruction>
+{user_prompt}
+</user_instruction>
+'''
+        if user_prompt
+        else ""
+    }
+
+<input>
+- '<source_content>': A block of text containing the information to be presented. This could be
+  research findings, an article summary, a detailed outline, user chat history, or any relevant
+  raw information. The content serves as the factual basis for the video presentation.
+</input>
+
+<output_format>
+A JSON object containing the presentation slides:
+{{
+  "slides": [
+    {{
+      "slide_number": 1,
+      "title": "Concise slide title",
+      "subtitle": "One-line subtitle or tagline",
+      "content_in_markdown": "## Heading\\n- Bullet point 1\\n- **Bold text**\\n- Bullet point 3",
+      "speaker_transcripts": [
+        "First narration sentence for this slide.",
+        "Second narration sentence expanding on the point.",
+        "Third sentence wrapping up this slide."
+      ],
+      "background_explanation": "Emotional mood and color direction for this slide"
+    }}
+  ]
+}}
+</output_format>
+
+<guidelines>
+=== SLIDE COUNT ===
+
+Dynamically decide the number of slides between 1 and {MAX_SLIDES} (inclusive).
+Base your decision entirely on the content's depth, richness, and how many distinct ideas it contains.
+Thin or simple content should produce fewer slides; dense or multi-faceted content may use more.
+Do NOT inflate or pad slides to reach {
+        MAX_SLIDES
+    } — only use what the content genuinely warrants.
+Do NOT treat {MAX_SLIDES} as a target; it is a hard ceiling, not a goal.
+
+=== SLIDE STRUCTURE ===
+
+- Each slide should cover ONE distinct key idea or section.
+- Keep slides focused: 2-5 bullet points of content per slide max.
+- The first slide should be a title/intro slide.
+- The last slide should be a summary or closing slide ONLY if there are 3+ slides.
+  For 1-2 slides, skip the closing slide — just cover the content.
+- Do NOT create a separate closing slide if its content would just repeat earlier slides.
+
+=== CONTENT FIELDS ===
+
+- Write speaker_transcripts as if a human presenter is narrating — natural, conversational, 2-4 sentences per slide.
+  These will be converted to TTS audio, so write in a way that sounds great when spoken aloud.
+- background_explanation should describe a visual style matching the slide's mood:
+    - Describe the emotional feel: "warm and organic", "dramatic and urgent", "clean and optimistic",
+      "technical and precise", "celebratory", "earthy and grounded", "cosmic and futuristic"
+    - Mention color direction: warm tones, cool tones, earth tones, neon accents, gold/black, etc.
+    - Vary the mood across slides — do NOT always say "dark blue gradient".
+- content_in_markdown should use proper markdown: ## headings, **bold**, - bullets, etc.
+
+=== NARRATION QUALITY ===
+
+- Speaker transcripts should explain the slide content in an engaging, presenter-like voice.
+- Keep narration concise: 2-4 sentences per slide (targeting ~10-15 seconds of audio per slide).
+- The narration should add context beyond what's on the slide — don't just read the bullets.
+- Use natural language: contractions, conversational tone, occasional enthusiasm.
+</guidelines>
+
+<examples>
+Input: "Quantum computing uses quantum bits or qubits which can exist in multiple states simultaneously due to superposition."
+
+Output:
+{{
+  "slides": [
+    {{
+      "slide_number": 1,
+      "title": "Quantum Computing",
+      "subtitle": "Beyond Classical Bits",
+      "content_in_markdown": "## The Quantum Leap\\n- Classical computers use **bits** (0 or 1)\\n- Quantum computers use **qubits**\\n- Qubits leverage **superposition**",
+      "speaker_transcripts": [
+        "Let's explore quantum computing, a technology that's fundamentally different from the computers we use every day.",
+        "While traditional computers work with bits that are either zero or one, quantum computers use something called qubits.",
+        "The magic of qubits is superposition — they can exist in multiple states at the same time."
+      ],
+      "background_explanation": "Cosmic and futuristic with deep purple and magenta tones, evoking the mystery of quantum mechanics"
+    }}
+  ]
+}}
+</examples>
+
+Transform the source material into well-structured presentation slides with engaging narration.
+Ensure each slide has a clear visual mood and natural-sounding speaker transcripts.
+</video_presentation_system>
+"""
+
+
+# ---------------------------------------------------------------------------
+# Remotion scene code generation prompt
+# Ported from RemotionTets POC /api/generate system prompt
+# ---------------------------------------------------------------------------
+
+REMOTION_SCENE_SYSTEM_PROMPT = """
+You are a Remotion component generator that creates cinematic, modern motion graphics.
+Generate a single self-contained React component that uses Remotion.
+
+=== THEME PRESETS (pick ONE per slide — see user prompt for which to use) ===
+
+Each slide MUST use a DIFFERENT preset. The user prompt will tell you which preset to use.
+Use ALL colors from that preset — background, surface, text, accent, glow. Do NOT mix presets.
+
+TERRA (warm earth — terracotta + olive):
+  dark:  bg #1C1510  surface #261E16  border #3D3024  text #E8DDD0  muted #9A8A78  accent #C2623D  secondary #7D8C52  glow rgba(194,98,61,0.12)
+  light: bg #F7F0E8  surface #FFF8F0  border #DDD0BF  text #2C1D0E  muted #8A7A68  accent #B85430  secondary #6B7A42  glow rgba(184,84,48,0.08)
+  gradient-dark: radial-gradient(ellipse at 30% 80%, rgba(194,98,61,0.18), transparent 60%), linear-gradient(180deg, #1C1510, #261E16)
+  gradient-light: radial-gradient(ellipse at 70% 20%, rgba(107,122,66,0.12), transparent 55%), linear-gradient(180deg, #F7F0E8, #FFF8F0)
+
+OCEAN (cool depth — teal + coral):
+  dark:  bg #0B1A1E  surface #122428  border #1E3740  text #D5EAF0  muted #6A9AA8  accent #1DB6A8  secondary #E87461  glow rgba(29,182,168,0.12)
+  light: bg #F0F8FA  surface #FFFFFF  border #C8E0E8  text #0E2830  muted #5A8A98  accent #0EA69A  secondary #D05F4E  glow rgba(14,166,154,0.08)
+  gradient-dark: radial-gradient(ellipse at 80% 30%, rgba(29,182,168,0.20), transparent 55%), radial-gradient(circle at 20% 80%, rgba(232,116,97,0.10), transparent 50%), #0B1A1E
+  gradient-light: radial-gradient(ellipse at 20% 40%, rgba(14,166,154,0.10), transparent 55%), linear-gradient(180deg, #F0F8FA, #FFFFFF)
+
+SUNSET (warm energy — orange + purple):
+  dark:  bg #1E130F  surface #2A1B14  border #42291C  text #F0DDD0  muted #A08878  accent #E86A20  secondary #A855C0  glow rgba(232,106,32,0.12)
+  light: bg #FFF5ED  surface #FFFFFF  border #EADAC8  text #2E1508  muted #907860  accent #D05A18  secondary #9045A8  glow rgba(208,90,24,0.08)
+  gradient-dark: linear-gradient(135deg, rgba(232,106,32,0.15) 0%, transparent 40%), radial-gradient(circle at 80% 70%, rgba(168,85,192,0.15), transparent 50%), #1E130F
+  gradient-light: linear-gradient(135deg, rgba(208,90,24,0.08) 0%, rgba(144,69,168,0.06) 100%), #FFF5ED
+
+EMERALD (fresh life — green + mint):
+  dark:  bg #0B1E14  surface #12281A  border #1E3C28  text #D0F0E0  muted #5EA880  accent #10B981  secondary #84CC16  glow rgba(16,185,129,0.12)
+  light: bg #F0FAF5  surface #FFFFFF  border #C0E8D0  text #0E2C18  muted #489068  accent #059669  secondary #65A30D  glow rgba(5,150,105,0.08)
+  gradient-dark: radial-gradient(ellipse at 50% 50%, rgba(16,185,129,0.18), transparent 60%), linear-gradient(180deg, #0B1E14, #12281A)
+  gradient-light: radial-gradient(ellipse at 60% 30%, rgba(101,163,13,0.10), transparent 55%), linear-gradient(180deg, #F0FAF5, #FFFFFF)
+
+ECLIPSE (dramatic — black + gold):
+  dark:  bg #100C05  surface #1A1508  border #2E2510  text #D4B96A  muted #8A7840  accent #E8B830  secondary #C09020  glow rgba(232,184,48,0.14)
+  light: bg #FAF6ED  surface #FFFFFF  border #E0D8C0  text #1A1408  muted #7A6818  accent #C09820  secondary #A08018  glow rgba(192,152,32,0.08)
+  gradient-dark: radial-gradient(circle at 50% 40%, rgba(232,184,48,0.20), transparent 50%), radial-gradient(ellipse at 50% 90%, rgba(192,144,32,0.08), transparent 50%), #100C05
+  gradient-light: radial-gradient(circle at 50% 40%, rgba(192,152,32,0.10), transparent 55%), linear-gradient(180deg, #FAF6ED, #FFFFFF)
+
+ROSE (soft elegance — dusty pink + mauve):
+  dark:  bg #1E1018  surface #281820  border #3D2830  text #F0D8E0  muted #A08090  accent #E4508C  secondary #B06498  glow rgba(228,80,140,0.12)
+  light: bg #FDF2F5  surface #FFFFFF  border #F0D0D8  text #2C1018  muted #906878  accent #D43D78  secondary #9A5080  glow rgba(212,61,120,0.08)
+  gradient-dark: radial-gradient(ellipse at 70% 30%, rgba(228,80,140,0.18), transparent 55%), radial-gradient(circle at 20% 80%, rgba(176,100,152,0.10), transparent 50%), #1E1018
+  gradient-light: radial-gradient(ellipse at 30% 60%, rgba(212,61,120,0.08), transparent 55%), linear-gradient(180deg, #FDF2F5, #FFFFFF)
+
+FROST (crisp clarity — ice blue + silver):
+  dark:  bg #0A1520  surface #101D2A  border #1A3040  text #D0E5F5  muted #6090B0  accent #5AB4E8  secondary #8BA8C0  glow rgba(90,180,232,0.12)
+  light: bg #F0F6FC  surface #FFFFFF  border #C8D8E8  text #0C1820  muted #5080A0  accent #3A96D0  secondary #7090A8  glow rgba(58,150,208,0.08)
+  gradient-dark: radial-gradient(ellipse at 40% 20%, rgba(90,180,232,0.16), transparent 55%), linear-gradient(180deg, #0A1520, #101D2A)
+  gradient-light: radial-gradient(ellipse at 50% 50%, rgba(58,150,208,0.08), transparent 55%), linear-gradient(180deg, #F0F6FC, #FFFFFF)
+
+NEBULA (cosmic — magenta + deep purple):
+  dark:  bg #150A1E  surface #1E1028  border #351A48  text #E0D0F0  muted #8060A0  accent #C850E0  secondary #8030C0  glow rgba(200,80,224,0.14)
+  light: bg #F8F0FF  surface #FFFFFF  border #E0C8F0  text #1A0A24  muted #7050A0  accent #A840C0  secondary #6820A0  glow rgba(168,64,192,0.08)
+  gradient-dark: radial-gradient(circle at 60% 40%, rgba(200,80,224,0.18), transparent 50%), radial-gradient(ellipse at 30% 80%, rgba(128,48,192,0.12), transparent 50%), #150A1E
+  gradient-light: radial-gradient(circle at 40% 30%, rgba(168,64,192,0.10), transparent 55%), linear-gradient(180deg, #F8F0FF, #FFFFFF)
+
+AURORA (ethereal lights — green-teal + violet):
+  dark:  bg #0A1A1A  surface #102020  border #1A3838  text #D0F0F0  muted #60A0A0  accent #30D0B0  secondary #8040D0  glow rgba(48,208,176,0.12)
+  light: bg #F0FAF8  surface #FFFFFF  border #C0E8E0  text #0A2020  muted #508080  accent #20B090  secondary #6830B0  glow rgba(32,176,144,0.08)
+  gradient-dark: radial-gradient(ellipse at 30% 70%, rgba(48,208,176,0.18), transparent 55%), radial-gradient(circle at 70% 30%, rgba(128,64,208,0.12), transparent 50%), #0A1A1A
+  gradient-light: radial-gradient(ellipse at 50% 40%, rgba(32,176,144,0.10), transparent 55%), linear-gradient(180deg, #F0FAF8, #FFFFFF)
+
+CORAL (tropical warmth — coral + turquoise):
+  dark:  bg #1E0F0F  surface #281818  border #402828  text #F0D8D8  muted #A07070  accent #F06050  secondary #30B8B0  glow rgba(240,96,80,0.12)
+  light: bg #FFF5F3  surface #FFFFFF  border #F0D0C8  text #2E1010  muted #906060  accent #E04838  secondary #20A098  glow rgba(224,72,56,0.08)
+  gradient-dark: radial-gradient(ellipse at 60% 60%, rgba(240,96,80,0.18), transparent 55%), radial-gradient(circle at 30% 30%, rgba(48,184,176,0.10), transparent 50%), #1E0F0F
+  gradient-light: radial-gradient(ellipse at 40% 50%, rgba(224,72,56,0.08), transparent 55%), linear-gradient(180deg, #FFF5F3, #FFFFFF)
+
+MIDNIGHT (deep sophistication — navy + silver):
+  dark:  bg #080C18  surface #0E1420  border #1A2438  text #C8D8F0  muted #5070A0  accent #4080E0  secondary #A0B0D0  glow rgba(64,128,224,0.12)
+  light: bg #F0F2F8  surface #FFFFFF  border #C8D0E0  text #101828  muted #506080  accent #3060C0  secondary #8090B0  glow rgba(48,96,192,0.08)
+  gradient-dark: radial-gradient(ellipse at 50% 30%, rgba(64,128,224,0.16), transparent 55%), linear-gradient(180deg, #080C18, #0E1420)
+  gradient-light: radial-gradient(ellipse at 50% 50%, rgba(48,96,192,0.08), transparent 55%), linear-gradient(180deg, #F0F2F8, #FFFFFF)
+
+AMBER (rich honey warmth — amber + brown):
+  dark:  bg #1A1208  surface #221A0E  border #3A2C18  text #F0E0C0  muted #A09060  accent #E0A020  secondary #C08030  glow rgba(224,160,32,0.12)
+  light: bg #FFF8E8  surface #FFFFFF  border #E8D8B8  text #2A1C08  muted #907840  accent #C88810  secondary #A86820  glow rgba(200,136,16,0.08)
+  gradient-dark: radial-gradient(ellipse at 40% 60%, rgba(224,160,32,0.18), transparent 55%), linear-gradient(180deg, #1A1208, #221A0E)
+  gradient-light: radial-gradient(ellipse at 60% 40%, rgba(200,136,16,0.10), transparent 55%), linear-gradient(180deg, #FFF8E8, #FFFFFF)
+
+LAVENDER (gentle dreaminess — purple + lilac):
+  dark:  bg #14101E  surface #1C1628  border #302840  text #E0D8F0  muted #8070A0  accent #A060E0  secondary #C090D0  glow rgba(160,96,224,0.12)
+  light: bg #F8F0FF  surface #FFFFFF  border #E0D0F0  text #1C1028  muted #706090  accent #8848C0  secondary #A878B8  glow rgba(136,72,192,0.08)
+  gradient-dark: radial-gradient(ellipse at 60% 40%, rgba(160,96,224,0.18), transparent 55%), radial-gradient(circle at 30% 70%, rgba(192,144,208,0.10), transparent 50%), #14101E
+  gradient-light: radial-gradient(ellipse at 40% 30%, rgba(136,72,192,0.10), transparent 55%), linear-gradient(180deg, #F8F0FF, #FFFFFF)
+
+STEEL (industrial strength — gray + steel blue):
+  dark:  bg #101214  surface #181C20  border #282E38  text #D0D8E0  muted #708090  accent #5088B0  secondary #90A0B0  glow rgba(80,136,176,0.12)
+  light: bg #F2F4F6  surface #FFFFFF  border #D0D8E0  text #181C24  muted #607080  accent #3870A0  secondary #708898  glow rgba(56,112,160,0.08)
+  gradient-dark: radial-gradient(ellipse at 50% 50%, rgba(80,136,176,0.14), transparent 55%), linear-gradient(180deg, #101214, #181C20)
+  gradient-light: radial-gradient(ellipse at 50% 40%, rgba(56,112,160,0.08), transparent 55%), linear-gradient(180deg, #F2F4F6, #FFFFFF)
+
+CITRUS (bright optimism — yellow + lime):
+  dark:  bg #181808  surface #202010  border #383818  text #F0F0C0  muted #A0A060  accent #E8D020  secondary #90D030  glow rgba(232,208,32,0.12)
+  light: bg #FFFFF0  surface #FFFFFF  border #E8E8C0  text #282808  muted #808040  accent #C8B010  secondary #70B020  glow rgba(200,176,16,0.08)
+  gradient-dark: radial-gradient(ellipse at 40% 40%, rgba(232,208,32,0.18), transparent 55%), radial-gradient(circle at 70% 70%, rgba(144,208,48,0.10), transparent 50%), #181808
+  gradient-light: radial-gradient(ellipse at 50% 30%, rgba(200,176,16,0.10), transparent 55%), linear-gradient(180deg, #FFFFF0, #FFFFFF)
+
+CHERRY (bold impact — deep red + dark):
+  dark:  bg #1A0808  surface #241010  border #401818  text #F0D0D0  muted #A06060  accent #D02030  secondary #E05060  glow rgba(208,32,48,0.14)
+  light: bg #FFF0F0  surface #FFFFFF  border #F0C8C8  text #280808  muted #904848  accent #B01828  secondary #C83848  glow rgba(176,24,40,0.08)
+  gradient-dark: radial-gradient(ellipse at 50% 40%, rgba(208,32,48,0.20), transparent 50%), linear-gradient(180deg, #1A0808, #241010)
+  gradient-light: radial-gradient(ellipse at 50% 50%, rgba(176,24,40,0.10), transparent 55%), linear-gradient(180deg, #FFF0F0, #FFFFFF)
+
+=== SHARED TOKENS (use with any theme above) ===
+
+SPACING: xs 8px, sm 16px, md 24px, lg 32px, xl 48px, 2xl 64px, 3xl 96px, 4xl 128px
+TYPOGRAPHY: fontFamily "Inter, system-ui, -apple-system, sans-serif"
+  caption 14px/1.4, body 18px/1.6, subhead 24px/1.4, title 40px/1.2 w600, headline 64px/1.1 w700, display 96px/1.0 w800
+  letterSpacing: tight "-0.02em", normal "0", wide "0.05em"
+BORDER RADIUS: 12px (cards), 8px (buttons), 9999px (pills)
+
+=== VISUAL VARIETY (CRITICAL) ===
+
+The user prompt assigns each slide a specific theme preset AND mode (dark/light).
+You MUST use EXACTLY the assigned preset and mode. Additionally:
+
+1. Use the preset's gradient as the AbsoluteFill background.
+2. Use the preset's accent/secondary colors for highlights, pill badges, and card accents.
+3. Use the preset's glow value for all boxShadow effects.
+4. LAYOUT VARIATION: Vary layout between slides:
+   - One slide: bold centered headline + subtle stat
+   - Another: two-column card layout
+   - Another: single large number or quote as hero
+   Do NOT use the same layout pattern for every slide.
+
+=== LAYOUT RULES (CRITICAL — elements must NEVER overlap) ===
+
+The canvas is 1920x1080. You MUST use a SINGLE-LAYER layout. NO stacking, NO multiple AbsoluteFill layers.
+
+STRUCTURE — every component must follow this exact pattern:
+  <AbsoluteFill style={{ backgroundColor: "...", display: "flex", flexDirection: "column", justifyContent: "center", alignItems: "center", padding: 80 }}>
+    {/* ALL content goes here as direct children in normal flow */}
+  </AbsoluteFill>
+
+ABSOLUTE RULES:
+- Use exactly ONE AbsoluteFill as the root. Set its background color/gradient via its style prop.
+- NEVER nest AbsoluteFill inside AbsoluteFill.
+- NEVER use position "absolute" or position "fixed" on ANY element.
+- NEVER use multiple layers or z-index.
+- ALL elements must be in normal document flow inside the single root AbsoluteFill.
+
+SPACING:
+- Root padding: 80px on all sides (safe area).
+- Use flexDirection "column" with gap for vertical stacking, flexDirection "row" with gap for horizontal.
+- Minimum gap between elements: 24px vertical, 32px horizontal.
+- Text hierarchy gaps: headline→subheading 16px, subheading→body 12px, body→button 32px.
+- Cards/panels: padding 32px-48px, borderRadius 12px.
+- NEVER use margin to space siblings — always use the parent's gap property.
+
+=== DESIGN STYLE ===
+
+- Premium aesthetic — use the exact colors from the assigned theme preset (do NOT invent your own)
+- Background: use the preset's gradient-dark or gradient-light value directly as the AbsoluteFill's background
+- Card/surface backgrounds: use the preset's surface color
+- Text colors: use the preset's text, muted values
+- Borders: use the preset's border color
+- Glows: use the preset's glow value for all boxShadow — do NOT substitute other colors
+- Generous whitespace — less is more, let elements breathe
+- NO decorative background shapes, blurs, or overlapping ornaments
+
+=== REMOTION RULES ===
+
+- Export the component as: export const MyComposition = () => { ... }
+- Use useCurrentFrame() and useVideoConfig() from "remotion"
+- Do NOT use Sequence
+- Do NOT manually calculate animation timings or frame offsets
+
+=== ANIMATION (use the stagger() helper for ALL element animations) ===
+
+A pre-built helper function called stagger() is available globally.
+It handles enter, hold, and exit phases automatically — you MUST use it.
+
+Signature:
+  stagger(frame, fps, index, total) → { opacity: number, transform: string }
+
+Parameters:
+  frame  — from useCurrentFrame()
+  fps    — from useVideoConfig()
+  index  — 0-based index of this element in the entrance order
+  total  — total number of animated elements in the scene
+
+It returns a style object with opacity and transform that you spread onto the element.
+Timing is handled for you: staggered spring entrances, ambient hold motion, and a graceful exit.
+
+Usage pattern:
+  const frame = useCurrentFrame();
+  const { fps } = useVideoConfig();
+
+  <div style={stagger(frame, fps, 0, 4)}>Headline</div>
+  <div style={stagger(frame, fps, 1, 4)}>Subtitle</div>
+  <div style={stagger(frame, fps, 2, 4)}>Card</div>
+  <div style={stagger(frame, fps, 3, 4)}>Footer</div>
+
+Rules:
+- Count ALL animated elements in your scene and pass that count as the "total" parameter.
+- Assign each element a sequential index starting from 0.
+- You can merge stagger's return with additional styles:
+    <div style={{ ...stagger(frame, fps, 0, 3), fontSize: 64, color: "#fafafa" }}>
+- For non-animated static elements (backgrounds, borders), just use normal styles without stagger.
+- You may still use spring() and interpolate() for EXTRA custom effects (e.g., a number counter,
+  color shift, or typewriter effect), but stagger() must drive all entrance/exit animations.
+
+=== AVAILABLE GLOBALS (injected at runtime, do NOT import anything else) ===
+
+- React (available globally)
+- AbsoluteFill, useCurrentFrame, useVideoConfig, spring, interpolate, Easing from "remotion"
+- stagger(frame, fps, index, total) — animation helper described above
+
+=== CODE RULES ===
+
+- Output ONLY the raw code, no markdown fences, no explanations
+- Keep it fully self-contained, no external dependencies or images
+- Use inline styles only (no CSS imports, no className)
+- Target 1920x1080 resolution
+- Every container must use display "flex" with explicit gap values
+- NEVER use marginTop/marginBottom to space siblings — use the parent's gap instead
+""".strip()
+
+
+def build_scene_generation_user_prompt(
+    slide_number: int,
+    total_slides: int,
+    title: str,
+    subtitle: str,
+    content_in_markdown: str,
+    background_explanation: str,
+    duration_in_frames: int,
+    theme: str,
+    mode: str,
+) -> str:
+    """Build the user prompt for generating a single slide's Remotion scene code.
+
+    *theme* and *mode* are pre-assigned (by LLM or fallback) before this is called.
+    """
+    return "\n".join(
+        [
+            "Create a cinematic, visually striking Remotion scene.",
+            f"The video is {duration_in_frames} frames at {FPS}fps ({duration_in_frames / FPS:.1f}s total).",
+            "",
+            f"This is slide {slide_number} of {total_slides} in the video.",
+            "",
+            f"=== ASSIGNED THEME: {theme} / {mode.upper()} mode ===",
+            f"You MUST use the {theme} preset in {mode} mode from the theme presets above.",
+            f"Use its exact background gradient (gradient-{mode}), surface, text, accent, secondary, border, and glow colors.",
+            "Do NOT substitute, invent, or default to blue/violet colors.",
+            "",
+            f'The scene should communicate this message: "{title} — {subtitle}"',
+            "",
+            "Key ideas to convey (use as creative inspiration, NOT literal text to dump on screen):",
+            content_in_markdown,
+            "",
+            "Pick only the 1-2 most impactful phrases or numbers to display as text.",
+            "",
+            f"Mood & tone: {background_explanation}",
+        ]
+    )
+
+
+REFINE_SCENE_SYSTEM_PROMPT = """
+You are a code repair assistant. You will receive a Remotion React component that failed to compile,
+along with the exact error message from the Babel transpiler.
+
+Your job is to fix the code so it compiles and runs correctly.
+
+RULES:
+- Output ONLY the fixed raw code as a string — no markdown fences, no explanations.
+- Preserve the original intent, design, and animations as closely as possible.
+- The component must be exported as: export const MyComposition = () => { ... }
+- Only these globals are available at runtime (they are injected, not actually imported):
+    React, AbsoluteFill, useCurrentFrame, useVideoConfig, spring, interpolate, Easing,
+    stagger (a helper: stagger(frame, fps, index, total) → { opacity, transform })
+- Keep import statements at the top (they get stripped by the compiler) but do NOT import anything
+  other than "react" and "remotion".
+- Use inline styles only (no CSS, no className).
+- Common fixes:
+    - Mismatched braces/brackets in JSX style objects (e.g. }}, instead of }}>)
+    - Missing closing tags
+    - Trailing commas before > in JSX
+    - Undefined variables or typos
+    - Invalid JSX expressions
+- After fixing, mentally walk through every brace pair { } and JSX tag to verify they match.
+""".strip()
--- a/surfsense_backend/app/agents/video_presentation/state.py
+++ b/surfsense_backend/app/agents/video_presentation/state.py
@ -0,0 +1,72 @@
+"""Define the state structures for the video presentation agent."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from pydantic import BaseModel, Field
+from sqlalchemy.ext.asyncio import AsyncSession
+
+
+class SlideContent(BaseModel):
+    """Represents a single parsed slide from content analysis."""
+
+    slide_number: int = Field(..., description="1-based slide number")
+    title: str = Field(..., description="Concise slide title")
+    subtitle: str = Field(..., description="One-line subtitle or tagline")
+    content_in_markdown: str = Field(
+        ..., description="Slide body content formatted as markdown"
+    )
+    speaker_transcripts: list[str] = Field(
+        ...,
+        description="2-4 short sentences a presenter would say while this slide is shown",
+    )
+    background_explanation: str = Field(
+        ...,
+        description="Emotional mood and color direction for this slide",
+    )
+
+
+class PresentationSlides(BaseModel):
+    """Represents the full set of parsed slides from the LLM."""
+
+    slides: list[SlideContent] = Field(
+        ..., description="Ordered array of presentation slides"
+    )
+
+
+class SlideAudioResult(BaseModel):
+    """Audio generation result for a single slide."""
+
+    slide_number: int
+    audio_file: str = Field(..., description="Path to the per-slide audio file")
+    duration_seconds: float = Field(..., description="Audio duration in seconds")
+    duration_in_frames: int = Field(
+        ..., description="Audio duration in frames (at 30fps)"
+    )
+
+
+class SlideSceneCode(BaseModel):
+    """Generated Remotion component code for a single slide."""
+
+    slide_number: int
+    code: str = Field(
+        ..., description="Raw Remotion React component source code for this slide"
+    )
+    title: str = Field(..., description="Short title for the composition")
+
+
+@dataclass
+class State:
+    """State for the video presentation agent graph.
+
+    Pipeline: parse slides → generate per-slide TTS audio → generate per-slide Remotion code
+    The frontend receives the slides + code + audio and handles compilation/rendering.
+    """
+
+    db_session: AsyncSession
+    source_content: str
+
+    slides: list[SlideContent] | None = None
+    slide_audio_results: list[SlideAudioResult] | None = None
+    slide_scene_codes: list[SlideSceneCode] | None = None
--- a/surfsense_backend/app/agents/video_presentation/utils.py
+++ b/surfsense_backend/app/agents/video_presentation/utils.py
@ -0,0 +1,30 @@
+def get_voice_for_provider(provider: str, speaker_id: int = 0) -> dict | str:
+    """
+    Get the appropriate voice configuration based on the TTS provider.
+
+    Currently single-speaker only (speaker_id=0). Multi-speaker support
+    will be added in a future iteration.
+
+    Args:
+        provider: The TTS provider (e.g., "openai/tts-1", "vertex_ai/test")
+        speaker_id: The ID of the speaker (default 0, single speaker for now)
+
+    Returns:
+        Voice configuration - string for OpenAI, dict for Vertex AI
+    """
+    if provider == "local/kokoro":
+        return "af_heart"
+
+    provider_type = (
+        provider.split("/")[0].lower() if "/" in provider else provider.lower()
+    )
+
+    voices = {
+        "openai": "alloy",
+        "vertex_ai": {
+            "languageCode": "en-US",
+            "name": "en-US-Studio-O",
+        },
+        "azure": "alloy",
+    }
+    return voices.get(provider_type, {})
--- a/surfsense_backend/app/app.py
+++ b/surfsense_backend/app/app.py
@ -341,7 +341,7 @@ if config.NEXT_FRONTEND_URL:
            allowed_origins.append(www_url)

 allowed_origins.extend(
-    [ # For local development and desktop app
+    [  # For local development and desktop app
        "http://localhost:3000",
        "http://127.0.0.1:3000",
    ]
--- a/surfsense_backend/app/celery_app.py
+++ b/surfsense_backend/app/celery_app.py
@ -77,6 +77,7 @@ celery_app = Celery(
    include=[
        "app.tasks.celery_tasks.document_tasks",
        "app.tasks.celery_tasks.podcast_tasks",
+        "app.tasks.celery_tasks.video_presentation_tasks",
        "app.tasks.celery_tasks.connector_tasks",
        "app.tasks.celery_tasks.schedule_checker_task",
        "app.tasks.celery_tasks.document_reindex_tasks",
--- a/surfsense_backend/app/db.py
+++ b/surfsense_backend/app/db.py
@ -103,6 +103,13 @@ class PodcastStatus(StrEnum):
    FAILED = "failed"


+class VideoPresentationStatus(StrEnum):
+    PENDING = "pending"
+    GENERATING = "generating"
+    READY = "ready"
+    FAILED = "failed"
+
+
 class DocumentStatus:
    """
    Helper class for document processing status (stored as JSONB).
@ -337,6 +344,12 @@ class Permission(StrEnum):
    PODCASTS_UPDATE = "podcasts:update"
    PODCASTS_DELETE = "podcasts:delete"

+    # Video Presentations
+    VIDEO_PRESENTATIONS_CREATE = "video_presentations:create"
+    VIDEO_PRESENTATIONS_READ = "video_presentations:read"
+    VIDEO_PRESENTATIONS_UPDATE = "video_presentations:update"
+    VIDEO_PRESENTATIONS_DELETE = "video_presentations:delete"
+
    # Image Generations
    IMAGE_GENERATIONS_CREATE = "image_generations:create"
    IMAGE_GENERATIONS_READ = "image_generations:read"
@ -403,6 +416,10 @@ DEFAULT_ROLE_PERMISSIONS = {
        Permission.PODCASTS_CREATE.value,
        Permission.PODCASTS_READ.value,
        Permission.PODCASTS_UPDATE.value,
+        # Video Presentations (no delete)
+        Permission.VIDEO_PRESENTATIONS_CREATE.value,
+        Permission.VIDEO_PRESENTATIONS_READ.value,
+        Permission.VIDEO_PRESENTATIONS_UPDATE.value,
        # Image Generations (create and read, no delete)
        Permission.IMAGE_GENERATIONS_CREATE.value,
        Permission.IMAGE_GENERATIONS_READ.value,
@ -435,6 +452,8 @@ DEFAULT_ROLE_PERMISSIONS = {
        Permission.LLM_CONFIGS_READ.value,
        # Podcasts (read only)
        Permission.PODCASTS_READ.value,
+        # Video Presentations (read only)
+        Permission.VIDEO_PRESENTATIONS_READ.value,
        # Image Generations (read only)
        Permission.IMAGE_GENERATIONS_READ.value,
        # Connectors (read only)
@ -1044,6 +1063,46 @@ class Podcast(BaseModel, TimestampMixin):
    thread = relationship("NewChatThread")


+class VideoPresentation(BaseModel, TimestampMixin):
+    """Video presentation model for storing AI-generated video presentations.
+
+    The slides JSONB stores per-slide data including Remotion component code,
+    audio file paths, and durations. The frontend compiles the code and renders
+    the video using Remotion Player.
+    """
+
+    __tablename__ = "video_presentations"
+
+    title = Column(String(500), nullable=False)
+    slides = Column(JSONB, nullable=True)
+    scene_codes = Column(JSONB, nullable=True)
+    status = Column(
+        SQLAlchemyEnum(
+            VideoPresentationStatus,
+            name="video_presentation_status",
+            create_type=False,
+            values_callable=lambda x: [e.value for e in x],
+        ),
+        nullable=False,
+        default=VideoPresentationStatus.READY,
+        server_default="ready",
+        index=True,
+    )
+
+    search_space_id = Column(
+        Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False
+    )
+    search_space = relationship("SearchSpace", back_populates="video_presentations")
+
+    thread_id = Column(
+        Integer,
+        ForeignKey("new_chat_threads.id", ondelete="SET NULL"),
+        nullable=True,
+        index=True,
+    )
+    thread = relationship("NewChatThread")
+
+
 class Report(BaseModel, TimestampMixin):
    """Report model for storing generated Markdown reports."""

@ -1228,6 +1287,12 @@ class SearchSpace(BaseModel, TimestampMixin):
        order_by="Podcast.id.desc()",
        cascade="all, delete-orphan",
    )
+    video_presentations = relationship(
+        "VideoPresentation",
+        back_populates="search_space",
+        order_by="VideoPresentation.id.desc()",
+        cascade="all, delete-orphan",
+    )
    reports = relationship(
        "Report",
        back_populates="search_space",
--- a/surfsense_backend/app/routes/init.py
+++ b/surfsense_backend/app/routes/init.py
@ -42,6 +42,7 @@ from .search_spaces_routes import router as search_spaces_router
 from .slack_add_connector_route import router as slack_add_connector_router
 from .surfsense_docs_routes import router as surfsense_docs_router
 from .teams_add_connector_route import router as teams_add_connector_router
+from .video_presentations_routes import router as video_presentations_router
 from .youtube_routes import router as youtube_router

 router = APIRouter()
@ -55,6 +56,9 @@ router.include_router(new_chat_router)  # Chat with assistant-ui persistence
 router.include_router(sandbox_router)  # Sandbox file downloads (Daytona)
 router.include_router(chat_comments_router)
 router.include_router(podcasts_router)  # Podcast task status and audio
+router.include_router(
+    video_presentations_router
+)  # Video presentation status and streaming
 router.include_router(reports_router)  # Report CRUD and multi-format export
 router.include_router(image_generation_router)  # Image generation via litellm
 router.include_router(search_source_connectors_router)
--- a/surfsense_backend/app/routes/video_presentations_routes.py
+++ b/surfsense_backend/app/routes/video_presentations_routes.py
@ -0,0 +1,242 @@
+"""
+Video presentation routes for CRUD operations and per-slide audio streaming.
+
+These routes support the video presentation generation feature in new-chat.
+Frontend polls GET /video-presentations/{id} to check status field.
+When ready, the slides JSONB contains per-slide Remotion code and audio file paths.
+The frontend compiles the Remotion code via Babel and renders with Remotion Player.
+"""
+
+import os
+from pathlib import Path
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import StreamingResponse
+from sqlalchemy import select
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.db import (
+    Permission,
+    SearchSpace,
+    SearchSpaceMembership,
+    User,
+    VideoPresentation,
+    get_async_session,
+)
+from app.schemas import VideoPresentationRead
+from app.users import current_active_user
+from app.utils.rbac import check_permission
+
+router = APIRouter()
+
+
+@router.get("/video-presentations", response_model=list[VideoPresentationRead])
+async def read_video_presentations(
+    skip: int = 0,
+    limit: int = 100,
+    search_space_id: int | None = None,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    List video presentations the user has access to.
+    Requires VIDEO_PRESENTATIONS_READ permission for the search space(s).
+    """
+    if skip < 0 or limit < 1:
+        raise HTTPException(status_code=400, detail="Invalid pagination parameters")
+    try:
+        if search_space_id is not None:
+            await check_permission(
+                session,
+                user,
+                search_space_id,
+                Permission.VIDEO_PRESENTATIONS_READ.value,
+                "You don't have permission to read video presentations in this search space",
+            )
+            result = await session.execute(
+                select(VideoPresentation)
+                .filter(VideoPresentation.search_space_id == search_space_id)
+                .offset(skip)
+                .limit(limit)
+            )
+        else:
+            result = await session.execute(
+                select(VideoPresentation)
+                .join(SearchSpace)
+                .join(SearchSpaceMembership)
+                .filter(SearchSpaceMembership.user_id == user.id)
+                .offset(skip)
+                .limit(limit)
+            )
+        return [
+            VideoPresentationRead.from_orm_with_slides(vp)
+            for vp in result.scalars().all()
+        ]
+    except HTTPException:
+        raise
+    except SQLAlchemyError:
+        raise HTTPException(
+            status_code=500,
+            detail="Database error occurred while fetching video presentations",
+        ) from None
+
+
+@router.get(
+    "/video-presentations/{video_presentation_id}",
+    response_model=VideoPresentationRead,
+)
+async def read_video_presentation(
+    video_presentation_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Get a specific video presentation by ID.
+    Requires authentication with VIDEO_PRESENTATIONS_READ permission.
+
+    When status is "ready", the response includes:
+    - slides: parsed slide data with per-slide audio_url and durations
+    - scene_codes: Remotion component source code per slide
+    """
+    try:
+        result = await session.execute(
+            select(VideoPresentation).filter(
+                VideoPresentation.id == video_presentation_id
+            )
+        )
+        video_pres = result.scalars().first()
+
+        if not video_pres:
+            raise HTTPException(status_code=404, detail="Video presentation not found")
+
+        await check_permission(
+            session,
+            user,
+            video_pres.search_space_id,
+            Permission.VIDEO_PRESENTATIONS_READ.value,
+            "You don't have permission to read video presentations in this search space",
+        )
+
+        return VideoPresentationRead.from_orm_with_slides(video_pres)
+    except HTTPException as he:
+        raise he
+    except SQLAlchemyError:
+        raise HTTPException(
+            status_code=500,
+            detail="Database error occurred while fetching video presentation",
+        ) from None
+
+
+@router.delete("/video-presentations/{video_presentation_id}", response_model=dict)
+async def delete_video_presentation(
+    video_presentation_id: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Delete a video presentation.
+    Requires VIDEO_PRESENTATIONS_DELETE permission for the search space.
+    """
+    try:
+        result = await session.execute(
+            select(VideoPresentation).filter(
+                VideoPresentation.id == video_presentation_id
+            )
+        )
+        db_video_pres = result.scalars().first()
+
+        if not db_video_pres:
+            raise HTTPException(status_code=404, detail="Video presentation not found")
+
+        await check_permission(
+            session,
+            user,
+            db_video_pres.search_space_id,
+            Permission.VIDEO_PRESENTATIONS_DELETE.value,
+            "You don't have permission to delete video presentations in this search space",
+        )
+
+        await session.delete(db_video_pres)
+        await session.commit()
+        return {"message": "Video presentation deleted successfully"}
+    except HTTPException as he:
+        raise he
+    except SQLAlchemyError:
+        await session.rollback()
+        raise HTTPException(
+            status_code=500,
+            detail="Database error occurred while deleting video presentation",
+        ) from None
+
+
+@router.get("/video-presentations/{video_presentation_id}/slides/{slide_number}/audio")
+async def stream_slide_audio(
+    video_presentation_id: int,
+    slide_number: int,
+    session: AsyncSession = Depends(get_async_session),
+    user: User = Depends(current_active_user),
+):
+    """
+    Stream the audio file for a specific slide in a video presentation.
+    The slide_number is 1-based. Audio path is read from the slides JSONB.
+    """
+    try:
+        result = await session.execute(
+            select(VideoPresentation).filter(
+                VideoPresentation.id == video_presentation_id
+            )
+        )
+        video_pres = result.scalars().first()
+
+        if not video_pres:
+            raise HTTPException(status_code=404, detail="Video presentation not found")
+
+        await check_permission(
+            session,
+            user,
+            video_pres.search_space_id,
+            Permission.VIDEO_PRESENTATIONS_READ.value,
+            "You don't have permission to access video presentations in this search space",
+        )
+
+        slides = video_pres.slides or []
+        slide_data = None
+        for s in slides:
+            if s.get("slide_number") == slide_number:
+                slide_data = s
+                break
+
+        if not slide_data:
+            raise HTTPException(
+                status_code=404,
+                detail=f"Slide {slide_number} not found",
+            )
+
+        file_path = slide_data.get("audio_file")
+        if not file_path or not os.path.isfile(file_path):
+            raise HTTPException(status_code=404, detail="Slide audio file not found")
+
+        ext = Path(file_path).suffix.lower()
+        media_type = "audio/wav" if ext == ".wav" else "audio/mpeg"
+
+        def iterfile():
+            with open(file_path, mode="rb") as file_like:
+                yield from file_like
+
+        return StreamingResponse(
+            iterfile(),
+            media_type=media_type,
+            headers={
+                "Accept-Ranges": "bytes",
+                "Content-Disposition": f"inline; filename={Path(file_path).name}",
+            },
+        )
+
+    except HTTPException as he:
+        raise he
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error streaming slide audio: {e!s}",
+        ) from e
--- a/surfsense_backend/app/schemas/init.py
+++ b/surfsense_backend/app/schemas/init.py
@ -101,6 +101,12 @@ from .search_space import (
    SearchSpaceWithStats,
 )
 from .users import UserCreate, UserRead, UserUpdate
+from .video_presentations import (
+    VideoPresentationBase,
+    VideoPresentationCreate,
+    VideoPresentationRead,
+    VideoPresentationUpdate,
+)

 __all__ = [
    # Chat schemas (assistant-ui integration)
@ -220,4 +226,9 @@ __all__ = [
    "UserRead",
    "UserSearchSpaceAccess",
    "UserUpdate",
+    # Video Presentation schemas
+    "VideoPresentationBase",
+    "VideoPresentationCreate",
+    "VideoPresentationRead",
+    "VideoPresentationUpdate",
 ]
--- a/surfsense_backend/app/schemas/video_presentations.py
+++ b/surfsense_backend/app/schemas/video_presentations.py
@ -0,0 +1,103 @@
+"""Video presentation schemas for API responses."""
+
+from datetime import datetime
+from enum import StrEnum
+from typing import Any
+
+from pydantic import BaseModel
+
+
+class VideoPresentationStatusEnum(StrEnum):
+    PENDING = "pending"
+    GENERATING = "generating"
+    READY = "ready"
+    FAILED = "failed"
+
+
+class VideoPresentationBase(BaseModel):
+    """Base video presentation schema."""
+
+    title: str
+    slides: list[dict[str, Any]] | None = None
+    scene_codes: list[dict[str, Any]] | None = None
+    search_space_id: int
+
+
+class VideoPresentationCreate(VideoPresentationBase):
+    """Schema for creating a video presentation."""
+
+    pass
+
+
+class VideoPresentationUpdate(BaseModel):
+    """Schema for updating a video presentation."""
+
+    title: str | None = None
+    slides: list[dict[str, Any]] | None = None
+    scene_codes: list[dict[str, Any]] | None = None
+
+
+class VideoPresentationRead(VideoPresentationBase):
+    """Schema for reading a video presentation."""
+
+    id: int
+    status: VideoPresentationStatusEnum = VideoPresentationStatusEnum.READY
+    created_at: datetime
+    slide_count: int | None = None
+
+    class Config:
+        from_attributes = True
+
+    @classmethod
+    def from_orm_with_slides(cls, obj):
+        """Create VideoPresentationRead with slide_count computed.
+
+        Replaces raw server file paths in `audio_file` with API streaming
+        URLs so the frontend can use them directly in Remotion <Audio />.
+        """
+        slides = obj.slides
+        if slides:
+            slides = _replace_audio_paths_with_urls(obj.id, slides)
+
+        data = {
+            "id": obj.id,
+            "title": obj.title,
+            "slides": slides,
+            "scene_codes": obj.scene_codes,
+            "search_space_id": obj.search_space_id,
+            "status": obj.status,
+            "created_at": obj.created_at,
+            "slide_count": len(obj.slides) if obj.slides else None,
+        }
+        return cls(**data)
+
+
+def _replace_audio_paths_with_urls(
+    video_presentation_id: int,
+    slides: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Replace server-local audio_file paths with streaming API URLs.
+
+    Transforms:
+      "audio_file": "video_presentation_audio/abc_slide_1.mp3"
+    Into:
+      "audio_url": "/api/v1/video-presentations/42/slides/1/audio"
+
+    The frontend passes this URL to Remotion's <Audio src={slide.audio_url} />.
+    """
+    result = []
+    for slide in slides:
+        slide_copy = dict(slide)
+        slide_number = slide_copy.get("slide_number")
+        audio_file = slide_copy.pop("audio_file", None)
+
+        if audio_file and slide_number is not None:
+            slide_copy["audio_url"] = (
+                f"/api/v1/video-presentations/{video_presentation_id}"
+                f"/slides/{slide_number}/audio"
+            )
+        else:
+            slide_copy["audio_url"] = None
+
+        result.append(slide_copy)
+    return result
--- a/surfsense_backend/app/tasks/celery_tasks/video_presentation_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/video_presentation_tasks.py
@ -0,0 +1,178 @@
+"""Celery tasks for video presentation generation."""
+
+import asyncio
+import logging
+import sys
+
+from sqlalchemy import select
+
+from app.agents.video_presentation.graph import graph as video_presentation_graph
+from app.agents.video_presentation.state import State as VideoPresentationState
+from app.celery_app import celery_app
+from app.config import config
+from app.db import VideoPresentation, VideoPresentationStatus
+from app.tasks.celery_tasks import get_celery_session_maker
+
+logger = logging.getLogger(__name__)
+
+if sys.platform.startswith("win"):
+    try:
+        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+    except AttributeError:
+        logger.warning(
+            "WindowsProactorEventLoopPolicy is unavailable; async subprocess support may fail."
+        )
+
+
+def _clear_generating_video_presentation(search_space_id: int) -> None:
+    """Clear the generating video presentation marker from Redis when task completes."""
+    import redis
+
+    try:
+        client = redis.from_url(config.REDIS_APP_URL, decode_responses=True)
+        key = f"video_presentation:generating:{search_space_id}"
+        client.delete(key)
+        logger.info(
+            f"Cleared generating video presentation key for search_space_id={search_space_id}"
+        )
+    except Exception as e:
+        logger.warning(f"Could not clear generating video presentation key: {e}")
+
+
+@celery_app.task(name="generate_video_presentation", bind=True)
+def generate_video_presentation_task(
+    self,
+    video_presentation_id: int,
+    source_content: str,
+    search_space_id: int,
+    user_prompt: str | None = None,
+) -> dict:
+    """
+    Celery task to generate video presentation from source content.
+    Updates existing video presentation record created by the tool.
+    """
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+
+    try:
+        result = loop.run_until_complete(
+            _generate_video_presentation(
+                video_presentation_id,
+                source_content,
+                search_space_id,
+                user_prompt,
+            )
+        )
+        loop.run_until_complete(loop.shutdown_asyncgens())
+        return result
+    except Exception as e:
+        logger.error(f"Error generating video presentation: {e!s}")
+        loop.run_until_complete(_mark_video_presentation_failed(video_presentation_id))
+        return {"status": "failed", "video_presentation_id": video_presentation_id}
+    finally:
+        _clear_generating_video_presentation(search_space_id)
+        asyncio.set_event_loop(None)
+        loop.close()
+
+
+async def _mark_video_presentation_failed(video_presentation_id: int) -> None:
+    """Mark a video presentation as failed in the database."""
+    async with get_celery_session_maker()() as session:
+        try:
+            result = await session.execute(
+                select(VideoPresentation).filter(
+                    VideoPresentation.id == video_presentation_id
+                )
+            )
+            video_pres = result.scalars().first()
+            if video_pres:
+                video_pres.status = VideoPresentationStatus.FAILED
+                await session.commit()
+        except Exception as e:
+            logger.error(f"Failed to mark video presentation as failed: {e}")
+
+
+async def _generate_video_presentation(
+    video_presentation_id: int,
+    source_content: str,
+    search_space_id: int,
+    user_prompt: str | None = None,
+) -> dict:
+    """Generate video presentation and update existing record."""
+    async with get_celery_session_maker()() as session:
+        result = await session.execute(
+            select(VideoPresentation).filter(
+                VideoPresentation.id == video_presentation_id
+            )
+        )
+        video_pres = result.scalars().first()
+
+        if not video_pres:
+            raise ValueError(f"VideoPresentation {video_presentation_id} not found")
+
+        try:
+            video_pres.status = VideoPresentationStatus.GENERATING
+            await session.commit()
+
+            graph_config = {
+                "configurable": {
+                    "video_title": video_pres.title,
+                    "search_space_id": search_space_id,
+                    "user_prompt": user_prompt,
+                }
+            }
+
+            initial_state = VideoPresentationState(
+                source_content=source_content,
+                db_session=session,
+            )
+
+            graph_result = await video_presentation_graph.ainvoke(
+                initial_state, config=graph_config
+            )
+
+            # Serialize slides (parsed content + audio info merged)
+            slides_raw = graph_result.get("slides", [])
+            audio_results_raw = graph_result.get("slide_audio_results", [])
+            scene_codes_raw = graph_result.get("slide_scene_codes", [])
+
+            audio_map = {}
+            for ar in audio_results_raw:
+                data = ar.model_dump() if hasattr(ar, "model_dump") else ar
+                audio_map[data.get("slide_number", 0)] = data
+
+            serializable_slides = []
+            for slide in slides_raw:
+                slide_data = (
+                    slide.model_dump() if hasattr(slide, "model_dump") else dict(slide)
+                )
+                audio_data = audio_map.get(slide_data.get("slide_number", 0), {})
+                slide_data["audio_file"] = audio_data.get("audio_file")
+                slide_data["duration_seconds"] = audio_data.get("duration_seconds")
+                slide_data["duration_in_frames"] = audio_data.get("duration_in_frames")
+                serializable_slides.append(slide_data)
+
+            serializable_scene_codes = []
+            for sc in scene_codes_raw:
+                sc_data = sc.model_dump() if hasattr(sc, "model_dump") else dict(sc)
+                serializable_scene_codes.append(sc_data)
+
+            video_pres.slides = serializable_slides
+            video_pres.scene_codes = serializable_scene_codes
+            video_pres.status = VideoPresentationStatus.READY
+            await session.commit()
+
+            logger.info(f"Successfully generated video presentation: {video_pres.id}")
+
+            return {
+                "status": "ready",
+                "video_presentation_id": video_pres.id,
+                "title": video_pres.title,
+                "slide_count": len(serializable_slides),
+            }
+
+        except Exception as e:
+            logger.error(f"Error in _generate_video_presentation: {e!s}")
+            video_pres.status = VideoPresentationStatus.FAILED
+            await session.commit()
+            raise
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -613,6 +613,41 @@ async def _stream_agent_events(
                    status="completed",
                    items=completed_items,
                )
+            elif tool_name == "generate_video_presentation":
+                vp_status = (
+                    tool_output.get("status", "unknown")
+                    if isinstance(tool_output, dict)
+                    else "unknown"
+                )
+                vp_title = (
+                    tool_output.get("title", "Presentation")
+                    if isinstance(tool_output, dict)
+                    else "Presentation"
+                )
+                if vp_status in ("pending", "generating"):
+                    completed_items = [
+                        f"Title: {vp_title}",
+                        "Presentation generation started",
+                        "Processing in background...",
+                    ]
+                elif vp_status == "failed":
+                    error_msg = (
+                        tool_output.get("error", "Unknown error")
+                        if isinstance(tool_output, dict)
+                        else "Unknown error"
+                    )
+                    completed_items = [
+                        f"Title: {vp_title}",
+                        f"Error: {error_msg[:50]}",
+                    ]
+                else:
+                    completed_items = last_active_step_items
+                yield streaming_service.format_thinking_step(
+                    step_id=original_step_id,
+                    title="Generating video presentation",
+                    status="completed",
+                    items=completed_items,
+                )
            elif tool_name == "generate_report":
                report_status = (
                    tool_output.get("status", "unknown")
@ -756,6 +791,34 @@ async def _stream_agent_events(
                        f"Podcast generation failed: {error_msg}",
                        "error",
                    )
+            elif tool_name == "generate_video_presentation":
+                yield streaming_service.format_tool_output_available(
+                    tool_call_id,
+                    tool_output
+                    if isinstance(tool_output, dict)
+                    else {"result": tool_output},
+                )
+                if (
+                    isinstance(tool_output, dict)
+                    and tool_output.get("status") == "pending"
+                ):
+                    yield streaming_service.format_terminal_info(
+                        f"Video presentation queued: {tool_output.get('title', 'Presentation')}",
+                        "success",
+                    )
+                elif (
+                    isinstance(tool_output, dict)
+                    and tool_output.get("status") == "failed"
+                ):
+                    error_msg = (
+                        tool_output.get("error", "Unknown error")
+                        if isinstance(tool_output, dict)
+                        else "Unknown error"
+                    )
+                    yield streaming_service.format_terminal_info(
+                        f"Presentation generation failed: {error_msg}",
+                        "error",
+                    )
            elif tool_name == "link_preview":
                yield streaming_service.format_tool_output_available(
                    tool_call_id,