mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-25 19:15:18 +02:00
feat: init video presentation agent
This commit is contained in:
parent
40d949b7d5
commit
b28f135a96
37 changed files with 3567 additions and 24 deletions
72
surfsense_backend/app/agents/video_presentation/state.py
Normal file
72
surfsense_backend/app/agents/video_presentation/state.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""Define the state structures for the video presentation agent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
class SlideContent(BaseModel):
|
||||
"""Represents a single parsed slide from content analysis."""
|
||||
|
||||
slide_number: int = Field(..., description="1-based slide number")
|
||||
title: str = Field(..., description="Concise slide title")
|
||||
subtitle: str = Field(..., description="One-line subtitle or tagline")
|
||||
content_in_markdown: str = Field(
|
||||
..., description="Slide body content formatted as markdown"
|
||||
)
|
||||
speaker_transcripts: list[str] = Field(
|
||||
...,
|
||||
description="2-4 short sentences a presenter would say while this slide is shown",
|
||||
)
|
||||
background_explanation: str = Field(
|
||||
...,
|
||||
description="Emotional mood and color direction for this slide",
|
||||
)
|
||||
|
||||
|
||||
class PresentationSlides(BaseModel):
|
||||
"""Represents the full set of parsed slides from the LLM."""
|
||||
|
||||
slides: list[SlideContent] = Field(
|
||||
..., description="Ordered array of presentation slides"
|
||||
)
|
||||
|
||||
|
||||
class SlideAudioResult(BaseModel):
|
||||
"""Audio generation result for a single slide."""
|
||||
|
||||
slide_number: int
|
||||
audio_file: str = Field(..., description="Path to the per-slide audio file")
|
||||
duration_seconds: float = Field(..., description="Audio duration in seconds")
|
||||
duration_in_frames: int = Field(
|
||||
..., description="Audio duration in frames (at 30fps)"
|
||||
)
|
||||
|
||||
|
||||
class SlideSceneCode(BaseModel):
|
||||
"""Generated Remotion component code for a single slide."""
|
||||
|
||||
slide_number: int
|
||||
code: str = Field(
|
||||
..., description="Raw Remotion React component source code for this slide"
|
||||
)
|
||||
title: str = Field(..., description="Short title for the composition")
|
||||
|
||||
|
||||
@dataclass
|
||||
class State:
|
||||
"""State for the video presentation agent graph.
|
||||
|
||||
Pipeline: parse slides → generate per-slide TTS audio → generate per-slide Remotion code
|
||||
The frontend receives the slides + code + audio and handles compilation/rendering.
|
||||
"""
|
||||
|
||||
db_session: AsyncSession
|
||||
source_content: str
|
||||
|
||||
slides: list[SlideContent] | None = None
|
||||
slide_audio_results: list[SlideAudioResult] | None = None
|
||||
slide_scene_codes: list[SlideSceneCode] | None = None
|
||||
Loading…
Add table
Add a link
Reference in a new issue