feat: init video presentation agent

2026-05-25 19:15:18 +02:00 · 2026-03-21 22:13:41 -07:00 · 2026-03-21 22:13:41 -07:00 · b28f135a96
commit b28f135a96
parent 40d949b7d5
37 changed files with 3567 additions and 24 deletions
--- a/surfsense_backend/app/agents/video_presentation/state.py
+++ b/surfsense_backend/app/agents/video_presentation/state.py
@ -0,0 +1,72 @@
+"""Define the state structures for the video presentation agent."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from pydantic import BaseModel, Field
+from sqlalchemy.ext.asyncio import AsyncSession
+
+
+class SlideContent(BaseModel):
+    """Represents a single parsed slide from content analysis."""
+
+    slide_number: int = Field(..., description="1-based slide number")
+    title: str = Field(..., description="Concise slide title")
+    subtitle: str = Field(..., description="One-line subtitle or tagline")
+    content_in_markdown: str = Field(
+        ..., description="Slide body content formatted as markdown"
+    )
+    speaker_transcripts: list[str] = Field(
+        ...,
+        description="2-4 short sentences a presenter would say while this slide is shown",
+    )
+    background_explanation: str = Field(
+        ...,
+        description="Emotional mood and color direction for this slide",
+    )
+
+
+class PresentationSlides(BaseModel):
+    """Represents the full set of parsed slides from the LLM."""
+
+    slides: list[SlideContent] = Field(
+        ..., description="Ordered array of presentation slides"
+    )
+
+
+class SlideAudioResult(BaseModel):
+    """Audio generation result for a single slide."""
+
+    slide_number: int
+    audio_file: str = Field(..., description="Path to the per-slide audio file")
+    duration_seconds: float = Field(..., description="Audio duration in seconds")
+    duration_in_frames: int = Field(
+        ..., description="Audio duration in frames (at 30fps)"
+    )
+
+
+class SlideSceneCode(BaseModel):
+    """Generated Remotion component code for a single slide."""
+
+    slide_number: int
+    code: str = Field(
+        ..., description="Raw Remotion React component source code for this slide"
+    )
+    title: str = Field(..., description="Short title for the composition")
+
+
+@dataclass
+class State:
+    """State for the video presentation agent graph.
+
+    Pipeline: parse slides → generate per-slide TTS audio → generate per-slide Remotion code
+    The frontend receives the slides + code + audio and handles compilation/rendering.
+    """
+
+    db_session: AsyncSession
+    source_content: str
+
+    slides: list[SlideContent] | None = None
+    slide_audio_results: list[SlideAudioResult] | None = None
+    slide_scene_codes: list[SlideSceneCode] | None = None