From 8dd29fa8338acedc6aaeb00d490d8078d428f11e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 01/50] feat(podcasts): add module package init --- surfsense_backend/app/podcasts/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 surfsense_backend/app/podcasts/__init__.py diff --git a/surfsense_backend/app/podcasts/__init__.py b/surfsense_backend/app/podcasts/__init__.py new file mode 100644 index 000000000..058274b4f --- /dev/null +++ b/surfsense_backend/app/podcasts/__init__.py @@ -0,0 +1,10 @@ +"""Podcast generation: brief resolution, transcript drafting, and audio rendering. + +The public surface grows as the module is built. For now it owns the +``podcasts`` table model, which :mod:`app.db` re-exports so existing +``from app.db import Podcast`` call sites keep working during the migration. +""" + +from __future__ import annotations + +__all__: list[str] = [] From 73e191af0905d030518c04615803ee7139521d99 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 02/50] feat(podcasts): add brief and transcript schemas --- .../app/podcasts/schemas/__init__.py | 24 +++ .../app/podcasts/schemas/spec.py | 156 ++++++++++++++++++ .../app/podcasts/schemas/transcript.py | 41 +++++ 3 files changed, 221 insertions(+) create mode 100644 surfsense_backend/app/podcasts/schemas/__init__.py create mode 100644 surfsense_backend/app/podcasts/schemas/spec.py create mode 100644 surfsense_backend/app/podcasts/schemas/transcript.py diff --git a/surfsense_backend/app/podcasts/schemas/__init__.py b/surfsense_backend/app/podcasts/schemas/__init__.py new file mode 100644 index 000000000..cd19a21cc --- /dev/null +++ b/surfsense_backend/app/podcasts/schemas/__init__.py @@ -0,0 +1,24 @@ +"""Pydantic shapes for the podcast brief and transcript.""" + +from __future__ import annotations + +from .spec import ( + DurationTarget, + PodcastSpec, + PodcastStyle, + SpeakerRole, + SpeakerSpec, + normalize_language_tag, +) +from .transcript import Transcript, TranscriptTurn + +__all__ = [ + "DurationTarget", + "PodcastSpec", + "PodcastStyle", + "SpeakerRole", + "SpeakerSpec", + "Transcript", + "TranscriptTurn", + "normalize_language_tag", +] diff --git a/surfsense_backend/app/podcasts/schemas/spec.py b/surfsense_backend/app/podcasts/schemas/spec.py new file mode 100644 index 000000000..2d3b3c74e --- /dev/null +++ b/surfsense_backend/app/podcasts/schemas/spec.py @@ -0,0 +1,156 @@ +"""The brief: the editable configuration a user approves before drafting. + +A :class:`PodcastSpec` front-loads every decision that drives token or audio +cost (language, speakers, voices, style, target length) so the expensive +drafting and rendering steps run once against settled inputs. It is stored as +JSONB on the ``podcasts`` row and round-trips through the review API. +""" + +from __future__ import annotations + +import re +from enum import StrEnum + +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + +# A speaker count beyond this is almost never a real podcast and explodes the +# voice/turn-attribution space, so we reject it at the brief gate. +MAX_SPEAKERS = 6 + +# Long-form is a goal, but an open-ended upper bound invites runaway TTS bills. +# One day of audio is a generous ceiling that still blocks obvious mistakes. +MAX_DURATION_MINUTES = 24 * 60 + +# BCP-47 primary subtag plus optional region (e.g. ``en``, ``en-US``, ``pt-BR``). +# Kept deliberately permissive: the voice catalog, not the brief, decides which +# languages can actually be synthesised. Casing is normalised after matching. +_LANGUAGE_TAG = re.compile(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$") + + +def normalize_language_tag(value: str) -> str: + """Validate and canonicalise a BCP-47 tag (lowercased primary subtag). + + Shared with the generation layer so detected and user-entered languages are + normalised identically before they reach a :class:`PodcastSpec`. + """ + cleaned = value.strip() + if not _LANGUAGE_TAG.match(cleaned): + raise ValueError(f"not a valid BCP-47 language tag: {value!r}") + primary, _, rest = cleaned.partition("-") + return primary.lower() if not rest else f"{primary.lower()}-{rest}" + + +class SpeakerRole(StrEnum): + """How a speaker functions in the conversation, used to steer drafting.""" + + HOST = "host" + COHOST = "cohost" + GUEST = "guest" + EXPERT = "expert" + NARRATOR = "narrator" + + +class PodcastStyle(StrEnum): + """The conversational format the transcript should follow.""" + + CONVERSATIONAL = "conversational" + INTERVIEW = "interview" + DEBATE = "debate" + MONOLOGUE = "monologue" + NARRATIVE = "narrative" + + +class SpeakerSpec(BaseModel): + """One voice in the podcast: who they are and which TTS voice renders them. + + ``slot`` is the stable join key. Transcript turns reference a speaker by + ``slot`` and the renderer resolves ``voice_id`` for that same slot, so the + two never drift even if speakers are reordered in the brief. + """ + + model_config = ConfigDict(extra="forbid") + + slot: int = Field(..., ge=0, description="Stable index a transcript turn references") + name: str = Field(..., min_length=1, max_length=120) + role: SpeakerRole + voice_id: str = Field( + ..., + min_length=1, + description="Catalog voice id valid for the spec's language and provider", + ) + + @field_validator("name", "voice_id") + @classmethod + def _strip_required_text(cls, value: str) -> str: + cleaned = value.strip() + if not cleaned: + raise ValueError("must not be blank") + return cleaned + + +class DurationTarget(BaseModel): + """The desired finished length as an inclusive minute range. + + Drafting aims for the midpoint and treats the bounds as soft guardrails; + storing a range (rather than a point) keeps long-form expectations honest + without pretending we can hit an exact runtime. + """ + + model_config = ConfigDict(extra="forbid") + + min_minutes: int = Field(..., ge=1, le=MAX_DURATION_MINUTES) + max_minutes: int = Field(..., ge=1, le=MAX_DURATION_MINUTES) + + @model_validator(mode="after") + def _check_order(self) -> DurationTarget: + if self.max_minutes < self.min_minutes: + raise ValueError("max_minutes must be >= min_minutes") + return self + + @property + def midpoint_minutes(self) -> float: + """The runtime drafting should aim for within the range.""" + return (self.min_minutes + self.max_minutes) / 2 + + +class PodcastSpec(BaseModel): + """The full brief approved before any tokens or audio are spent.""" + + model_config = ConfigDict(extra="forbid") + + language: str = Field(..., description="BCP-47 tag, e.g. 'en', 'en-US', 'pt-BR'") + style: PodcastStyle = PodcastStyle.CONVERSATIONAL + speakers: list[SpeakerSpec] = Field(..., min_length=1, max_length=MAX_SPEAKERS) + duration: DurationTarget + focus: str | None = Field( + default=None, + max_length=2000, + description="Optional user steer for what the episode should emphasise", + ) + + @field_validator("language") + @classmethod + def _normalise_language(cls, value: str) -> str: + return normalize_language_tag(value) + + @field_validator("focus") + @classmethod + def _blank_focus_is_none(cls, value: str | None) -> str | None: + if value is None: + return None + cleaned = value.strip() + return cleaned or None + + @model_validator(mode="after") + def _check_speaker_slots(self) -> PodcastSpec: + slots = [speaker.slot for speaker in self.speakers] + if len(slots) != len(set(slots)): + raise ValueError("speaker slots must be unique") + return self + + def speaker_for(self, slot: int) -> SpeakerSpec: + """Return the speaker bound to ``slot`` or raise if none matches.""" + for speaker in self.speakers: + if speaker.slot == slot: + return speaker + raise KeyError(f"no speaker for slot {slot}") diff --git a/surfsense_backend/app/podcasts/schemas/transcript.py b/surfsense_backend/app/podcasts/schemas/transcript.py new file mode 100644 index 000000000..b4c1463d8 --- /dev/null +++ b/surfsense_backend/app/podcasts/schemas/transcript.py @@ -0,0 +1,41 @@ +"""The transcript: ordered dialogue turns drafting produces for review. + +A :class:`Transcript` is the reviewable artifact at the go/no-go gate and the +exact input the renderer turns into audio. Each turn names a speaker by the +``slot`` defined in the :class:`~app.podcasts.schemas.spec.PodcastSpec`, so the +renderer can resolve the right voice without re-attributing anything. +""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field, field_validator + + +class TranscriptTurn(BaseModel): + """A single spoken line by one speaker.""" + + model_config = ConfigDict(extra="forbid") + + speaker: int = Field(..., ge=0, description="The PodcastSpec speaker slot speaking") + text: str = Field(..., min_length=1) + + @field_validator("text") + @classmethod + def _strip_text(cls, value: str) -> str: + cleaned = value.strip() + if not cleaned: + raise ValueError("turn text must not be blank") + return cleaned + + +class Transcript(BaseModel): + """The full ordered dialogue for an episode.""" + + model_config = ConfigDict(extra="forbid") + + turns: list[TranscriptTurn] = Field(..., min_length=1) + + @property + def word_count(self) -> int: + """Total spoken words, used to estimate runtime against the brief.""" + return sum(len(turn.text.split()) for turn in self.turns) From 65b6c2d3577e523348674bab21b8daca7609ec3d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 03/50] feat(podcasts): add persistence model and repository --- .../app/podcasts/persistence/__init__.py | 9 ++ .../podcasts/persistence/enums/__init__.py | 7 ++ .../persistence/enums/podcast_status.py | 37 +++++++++ .../app/podcasts/persistence/models.py | 82 +++++++++++++++++++ .../app/podcasts/persistence/repository.py | 46 +++++++++++ 5 files changed, 181 insertions(+) create mode 100644 surfsense_backend/app/podcasts/persistence/__init__.py create mode 100644 surfsense_backend/app/podcasts/persistence/enums/__init__.py create mode 100644 surfsense_backend/app/podcasts/persistence/enums/podcast_status.py create mode 100644 surfsense_backend/app/podcasts/persistence/models.py create mode 100644 surfsense_backend/app/podcasts/persistence/repository.py diff --git a/surfsense_backend/app/podcasts/persistence/__init__.py b/surfsense_backend/app/podcasts/persistence/__init__.py new file mode 100644 index 000000000..2166d5d9d --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/__init__.py @@ -0,0 +1,9 @@ +"""Models, enums, and data access for the podcasts table.""" + +from __future__ import annotations + +from .enums import PodcastStatus +from .models import Podcast +from .repository import PodcastRepository + +__all__ = ["Podcast", "PodcastRepository", "PodcastStatus"] diff --git a/surfsense_backend/app/podcasts/persistence/enums/__init__.py b/surfsense_backend/app/podcasts/persistence/enums/__init__.py new file mode 100644 index 000000000..f0527fd78 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/enums/__init__.py @@ -0,0 +1,7 @@ +"""Enums for the podcasts table.""" + +from __future__ import annotations + +from .podcast_status import PodcastStatus + +__all__ = ["PodcastStatus"] diff --git a/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py new file mode 100644 index 000000000..403473353 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py @@ -0,0 +1,37 @@ +"""Podcast generation lifecycle. + +The status drives a guarded state machine. A podcast is proposed (``PENDING``), +gets a reviewable brief (``AWAITING_BRIEF``), is drafted into a transcript +(``DRAFTING`` → ``AWAITING_REVIEW``), then rendered to audio (``RENDERING`` → +``READY``). ``FAILED`` and ``CANCELLED`` are terminal. The Python enum is kept +in lockstep with the ``podcast_status`` Postgres type via its paired migration. +""" + +from __future__ import annotations + +from enum import StrEnum + + +class PodcastStatus(StrEnum): + PENDING = "pending" + AWAITING_BRIEF = "awaiting_brief" + DRAFTING = "drafting" + AWAITING_REVIEW = "awaiting_review" + RENDERING = "rendering" + READY = "ready" + FAILED = "failed" + CANCELLED = "cancelled" + + @property + def is_terminal(self) -> bool: + """Whether no further transition is possible from this state.""" + return self in _TERMINAL + + @property + def is_gate(self) -> bool: + """Whether this state waits on user input before proceeding.""" + return self in _GATES + + +_TERMINAL = frozenset({PodcastStatus.READY, PodcastStatus.FAILED, PodcastStatus.CANCELLED}) +_GATES = frozenset({PodcastStatus.AWAITING_BRIEF, PodcastStatus.AWAITING_REVIEW}) diff --git a/surfsense_backend/app/podcasts/persistence/models.py b/surfsense_backend/app/podcasts/persistence/models.py new file mode 100644 index 000000000..6e40a8040 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/models.py @@ -0,0 +1,82 @@ +"""``podcasts`` table: a generated podcast, its brief, transcript, and state.""" + +from __future__ import annotations + +from sqlalchemy import ( + Column, + Enum as SQLAlchemyEnum, + ForeignKey, + Integer, + String, + Text, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import relationship + +from app.db import BaseModel, TimestampMixin + +from .enums import PodcastStatus + + +class Podcast(BaseModel, TimestampMixin): + """A podcast across its whole lifecycle: brief, transcript, audio, status. + + ``spec`` (the reviewable brief) and ``podcast_transcript`` are JSONB so the + flexible Pydantic shapes can evolve without migrations. ``spec_version`` + backs optimistic concurrency on brief edits. Rendered audio lives in the + object store, addressed by ``storage_backend`` + ``storage_key`` rather than + a raw path. + """ + + __tablename__ = "podcasts" + + title = Column(String(500), nullable=False) + + status = Column( + SQLAlchemyEnum( + PodcastStatus, + name="podcast_status", + create_type=False, + values_callable=lambda x: [e.value for e in x], + ), + nullable=False, + default=PodcastStatus.PENDING, + server_default=PodcastStatus.PENDING.value, + index=True, + ) + + # The source material the episode is generated from. Persisted because + # drafting happens after the brief gate, long after creation. + source_content = Column(Text, nullable=True) + + # The reviewable brief (PodcastSpec); null until the brief gate is reached. + spec = Column(JSONB, nullable=True) + # Bumped on every spec edit; guards concurrent edits at the brief gate. + spec_version = Column(Integer, nullable=False, default=1, server_default="1") + + # The drafted dialogue (Transcript); null until drafting completes. + podcast_transcript = Column(JSONB, nullable=True) + + # Where the rendered audio lives in the object store; null until READY. + storage_backend = Column(String(32), nullable=True) + storage_key = Column(Text, nullable=True) + duration_seconds = Column(Integer, nullable=True) + + # Human-readable reason when status is FAILED. + error = Column(Text, nullable=True) + + # Legacy local audio path; retained for back-compat until cutover. + file_location = Column(Text, nullable=True) + + search_space_id = Column( + Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False + ) + search_space = relationship("SearchSpace", back_populates="podcasts") + + thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + thread = relationship("NewChatThread") diff --git a/surfsense_backend/app/podcasts/persistence/repository.py b/surfsense_backend/app/podcasts/persistence/repository.py new file mode 100644 index 000000000..04eae9ce1 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/repository.py @@ -0,0 +1,46 @@ +"""Data access for the ``podcasts`` table. + +A thin async repository so the service and tasks never write raw queries. It +only loads and persists rows; lifecycle rules and (de)serialization live in the +service. +""" + +from __future__ import annotations + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from .models import Podcast + + +class PodcastRepository: + """Loads and stores :class:`Podcast` rows for one session.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + + async def get(self, podcast_id: int) -> Podcast | None: + return await self._session.get(Podcast, podcast_id) + + async def add(self, podcast: Podcast) -> Podcast: + """Persist a new row and assign its primary key.""" + self._session.add(podcast) + await self._session.flush() + return podcast + + async def latest_with_spec(self, search_space_id: int) -> Podcast | None: + """Most recent podcast in the space that has a stored brief. + + Used to seed language/voice defaults for a new podcast from what the + user chose last. + """ + result = await self._session.execute( + select(Podcast) + .where( + Podcast.search_space_id == search_space_id, + Podcast.spec.is_not(None), + ) + .order_by(Podcast.created_at.desc()) + .limit(1) + ) + return result.scalars().first() From ee249257477d28a6bf3ef3e650010e925a1d81d7 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 04/50] feat(podcasts): add voice catalog --- .../app/podcasts/voices/__init__.py | 22 ++++++++ .../app/podcasts/voices/catalog.py | 55 +++++++++++++++++++ .../app/podcasts/voices/provider.py | 27 +++++++++ .../app/podcasts/voices/voice.py | 50 +++++++++++++++++ 4 files changed, 154 insertions(+) create mode 100644 surfsense_backend/app/podcasts/voices/__init__.py create mode 100644 surfsense_backend/app/podcasts/voices/catalog.py create mode 100644 surfsense_backend/app/podcasts/voices/provider.py create mode 100644 surfsense_backend/app/podcasts/voices/voice.py diff --git a/surfsense_backend/app/podcasts/voices/__init__.py b/surfsense_backend/app/podcasts/voices/__init__.py new file mode 100644 index 000000000..230b0b540 --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/__init__.py @@ -0,0 +1,22 @@ +"""Voices: the catalog of selectable TTS voices and the active provider. + +Replaces the legacy hardcoded speaker-id voice maps. Callers obtain the +catalog via :func:`get_voice_catalog` and identify the configured provider via +:func:`provider_from_service`. +""" + +from __future__ import annotations + +from .catalog import VoiceCatalog, get_voice_catalog +from .provider import TtsProvider, provider_from_service +from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender + +__all__ = [ + "ANY_LANGUAGE", + "CatalogVoice", + "TtsProvider", + "VoiceCatalog", + "VoiceGender", + "get_voice_catalog", + "provider_from_service", +] diff --git a/surfsense_backend/app/podcasts/voices/catalog.py b/surfsense_backend/app/podcasts/voices/catalog.py new file mode 100644 index 000000000..591812943 --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/catalog.py @@ -0,0 +1,55 @@ +"""The voice catalog: look up and filter selectable voices. + +A :class:`VoiceCatalog` is the single source of truth for which voices exist, +replacing the hardcoded speaker-id maps. Resolution uses it to pick defaults +for a brief, the API exposes it as picker options, and the renderer uses it to +turn a stored ``voice_id`` back into the provider-native reference. +""" + +from __future__ import annotations + +from collections.abc import Iterable +from functools import lru_cache + +from .data import AZURE_VOICES, KOKORO_VOICES, OPENAI_VOICES, VERTEX_VOICES +from .provider import TtsProvider +from .voice import CatalogVoice + + +class VoiceCatalog: + """An indexed, read-only collection of :class:`CatalogVoice`.""" + + def __init__(self, voices: Iterable[CatalogVoice]) -> None: + self._by_id: dict[str, CatalogVoice] = {} + self._by_provider: dict[TtsProvider, list[CatalogVoice]] = {} + for voice in voices: + if voice.voice_id in self._by_id: + raise ValueError(f"duplicate voice_id: {voice.voice_id}") + self._by_id[voice.voice_id] = voice + self._by_provider.setdefault(voice.provider, []).append(voice) + + def get(self, voice_id: str) -> CatalogVoice: + """Return the voice with ``voice_id`` or raise ``KeyError``.""" + return self._by_id[voice_id] + + def for_provider(self, provider: TtsProvider) -> list[CatalogVoice]: + """All voices offered by ``provider``, in catalog order.""" + return list(self._by_provider.get(provider, ())) + + def for_language( + self, provider: TtsProvider, language: str + ) -> list[CatalogVoice]: + """``provider`` voices that can render ``language``, in catalog order.""" + return [v for v in self.for_provider(provider) if v.speaks(language)] + + def supports_language(self, provider: TtsProvider, language: str) -> bool: + """Whether ``provider`` has at least one voice for ``language``.""" + return any(v.speaks(language) for v in self.for_provider(provider)) + + +@lru_cache(maxsize=1) +def get_voice_catalog() -> VoiceCatalog: + """The process-wide catalog assembled from every provider's roster.""" + return VoiceCatalog( + (*KOKORO_VOICES, *OPENAI_VOICES, *AZURE_VOICES, *VERTEX_VOICES) + ) diff --git a/surfsense_backend/app/podcasts/voices/provider.py b/surfsense_backend/app/podcasts/voices/provider.py new file mode 100644 index 000000000..f57ae11cc --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/provider.py @@ -0,0 +1,27 @@ +"""The TTS providers we carry voices for, and how to name one from config.""" + +from __future__ import annotations + +from enum import StrEnum + + +class TtsProvider(StrEnum): + """A speech provider whose voices the catalog enumerates.""" + + KOKORO = "kokoro" + OPENAI = "openai" + AZURE = "azure" + VERTEX_AI = "vertex_ai" + + +def provider_from_service(service: str) -> TtsProvider: + """Map a ``TTS_SERVICE`` string to its provider. + + The config value is a LiteLLM-style ``provider/model`` string + (``openai/tts-1``, ``vertex_ai/...``) except for local Kokoro, which is + spelled ``local/kokoro``; both halves of that special case resolve here. + """ + prefix = service.split("/", 1)[0].strip().lower() + if prefix == "local": + return TtsProvider.KOKORO + return TtsProvider(prefix) diff --git a/surfsense_backend/app/podcasts/voices/voice.py b/surfsense_backend/app/podcasts/voices/voice.py new file mode 100644 index 000000000..6478f04b0 --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/voice.py @@ -0,0 +1,50 @@ +"""A catalog voice: a stable id paired with its provider-native reference.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import StrEnum + +from app.podcasts.tts import VoiceRef + +from .provider import TtsProvider + +# A voice that speaks whatever language the input text is in (e.g. OpenAI's +# voices), matched against every requested language. +ANY_LANGUAGE = "*" + + +class VoiceGender(StrEnum): + """Perceived voice gender, used to pick distinct voices per speaker.""" + + MALE = "male" + FEMALE = "female" + NEUTRAL = "neutral" + + +@dataclass(frozen=True, slots=True) +class CatalogVoice: + """One selectable voice. + + ``voice_id`` is the provider-prefixed, stable id stored on a speaker in the + brief (e.g. ``"kokoro:am_adam"``). ``native_ref`` is the untyped value the + TTS adapter passes to the provider — a string for most, a mapping for + Vertex — kept separate so renaming the catalog id never breaks synthesis. + """ + + voice_id: str + provider: TtsProvider + language: str + display_name: str + gender: VoiceGender + native_ref: VoiceRef + + def speaks(self, language: str) -> bool: + """Whether this voice can render ``language`` (primary subtag match).""" + if self.language == ANY_LANGUAGE: + return True + return _primary(self.language) == _primary(language) + + +def _primary(language: str) -> str: + return language.split("-", 1)[0].strip().lower() From 75287020e1a109f0c4b2aef02ec45b82009978f6 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 05/50] feat(podcasts): add text-to-speech adapters --- .../app/podcasts/tts/__init__.py | 22 ++++ .../app/podcasts/tts/adapters/__init__.py | 3 + .../app/podcasts/tts/adapters/kokoro.py | 111 ++++++++++++++++++ .../app/podcasts/tts/adapters/litellm.py | 69 +++++++++++ surfsense_backend/app/podcasts/tts/audio.py | 19 +++ surfsense_backend/app/podcasts/tts/errors.py | 13 ++ surfsense_backend/app/podcasts/tts/factory.py | 38 ++++++ surfsense_backend/app/podcasts/tts/port.py | 31 +++++ surfsense_backend/app/podcasts/tts/request.py | 22 ++++ 9 files changed, 328 insertions(+) create mode 100644 surfsense_backend/app/podcasts/tts/__init__.py create mode 100644 surfsense_backend/app/podcasts/tts/adapters/__init__.py create mode 100644 surfsense_backend/app/podcasts/tts/adapters/kokoro.py create mode 100644 surfsense_backend/app/podcasts/tts/adapters/litellm.py create mode 100644 surfsense_backend/app/podcasts/tts/audio.py create mode 100644 surfsense_backend/app/podcasts/tts/errors.py create mode 100644 surfsense_backend/app/podcasts/tts/factory.py create mode 100644 surfsense_backend/app/podcasts/tts/port.py create mode 100644 surfsense_backend/app/podcasts/tts/request.py diff --git a/surfsense_backend/app/podcasts/tts/__init__.py b/surfsense_backend/app/podcasts/tts/__init__.py new file mode 100644 index 000000000..16379dc2b --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/__init__.py @@ -0,0 +1,22 @@ +"""Text-to-speech: a per-segment synthesis port with provider adapters. + +Callers depend on :class:`TextToSpeech` and obtain the configured provider from +:func:`get_text_to_speech`; the concrete Kokoro/LiteLLM adapters stay private. +""" + +from __future__ import annotations + +from .audio import SynthesizedAudio +from .errors import TextToSpeechError +from .factory import get_text_to_speech +from .port import TextToSpeech +from .request import SynthesisRequest, VoiceRef + +__all__ = [ + "SynthesisRequest", + "SynthesizedAudio", + "TextToSpeech", + "TextToSpeechError", + "VoiceRef", + "get_text_to_speech", +] diff --git a/surfsense_backend/app/podcasts/tts/adapters/__init__.py b/surfsense_backend/app/podcasts/tts/adapters/__init__.py new file mode 100644 index 000000000..24d517e55 --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/adapters/__init__.py @@ -0,0 +1,3 @@ +"""Per-provider TextToSpeech implementations.""" + +from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/tts/adapters/kokoro.py b/surfsense_backend/app/podcasts/tts/adapters/kokoro.py new file mode 100644 index 000000000..031b48e86 --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/adapters/kokoro.py @@ -0,0 +1,111 @@ +"""Local Kokoro adapter: on-box synthesis, no network or per-segment cost. + +Kokoro selects its language model by a single-letter ``lang_code``, so this +adapter maps the brief's BCP-47 tag to that code and caches one pipeline per +code (pipeline construction loads weights and is expensive). Pipelines run in a +thread pool because Kokoro is synchronous; the renderer caps how many segments +synthesise at once. +""" + +from __future__ import annotations + +import asyncio +import io +from typing import TYPE_CHECKING + +from ..audio import SynthesizedAudio +from ..errors import TextToSpeechError +from ..port import TextToSpeech +from ..request import SynthesisRequest + +if TYPE_CHECKING: + from kokoro import KPipeline + +# Kokoro emits 24 kHz mono PCM regardless of voice. +_SAMPLE_RATE = 24000 + +# BCP-47 primary subtag -> Kokoro language code. English defaults to American; +# the en-GB region override below switches it to British. +_LANG_CODE_BY_PRIMARY = { + "en": "a", + "es": "e", + "fr": "f", + "hi": "h", + "it": "i", + "ja": "j", + "pt": "p", + "zh": "z", +} + + +class KokoroTextToSpeech(TextToSpeech): + """Synthesises segments with locally hosted Kokoro pipelines.""" + + def __init__(self) -> None: + self._pipelines: dict[str, KPipeline] = {} + + @property + def container(self) -> str: + return "wav" + + async def synthesize(self, request: SynthesisRequest) -> SynthesizedAudio: + if not isinstance(request.voice, str): + raise TextToSpeechError( + "Kokoro voices are named by string, not a mapping" + ) + + pipeline = self._pipeline_for(request.language) + loop = asyncio.get_event_loop() + try: + generator = await loop.run_in_executor( + None, + lambda: pipeline( + request.text, + voice=request.voice, + speed=request.speed, + split_pattern=r"\n+", + ), + ) + segments = [audio for _gs, _ps, audio in generator] + except Exception as exc: # noqa: BLE001 - normalise provider errors + raise TextToSpeechError(f"Kokoro synthesis failed: {exc}") from exc + + if not segments: + raise TextToSpeechError("Kokoro produced no audio for the text") + + return SynthesizedAudio( + data=_encode_wav(segments, _SAMPLE_RATE), + container="wav", + sample_rate=_SAMPLE_RATE, + ) + + def _pipeline_for(self, language: str) -> KPipeline: + lang_code = _lang_code(language) + pipeline = self._pipelines.get(lang_code) + if pipeline is None: + from kokoro import KPipeline + + pipeline = KPipeline(lang_code=lang_code) + self._pipelines[lang_code] = pipeline + return pipeline + + +def _lang_code(language: str) -> str: + normalised = language.strip().lower() + if normalised.startswith("en-gb") or normalised == "en-uk": + return "b" + primary = normalised.partition("-")[0] + code = _LANG_CODE_BY_PRIMARY.get(primary) + if code is None: + raise TextToSpeechError(f"Kokoro has no language model for {language!r}") + return code + + +def _encode_wav(segments: list, sample_rate: int) -> bytes: + import numpy as np + import soundfile as sf + + waveform = segments[0] if len(segments) == 1 else np.concatenate(segments) + buffer = io.BytesIO() + sf.write(buffer, waveform, sample_rate, format="WAV") + return buffer.getvalue() diff --git a/surfsense_backend/app/podcasts/tts/adapters/litellm.py b/surfsense_backend/app/podcasts/tts/adapters/litellm.py new file mode 100644 index 000000000..55f49bd1e --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/adapters/litellm.py @@ -0,0 +1,69 @@ +"""LiteLLM adapter: hosted TTS (OpenAI, Azure, Vertex AI) via one ``aspeech`` call. + +LiteLLM normalises every hosted provider behind the same ``aspeech`` surface, +so a single adapter covers them all. The provider is encoded in the model +string (e.g. ``openai/tts-1``, ``vertex_ai/...``) and the voice reference is +whatever that provider expects, which the catalog already supplies. +""" + +from __future__ import annotations + +from ..audio import SynthesizedAudio +from ..errors import TextToSpeechError +from ..port import TextToSpeech +from ..request import SynthesisRequest + +# Hosted providers return MP3-encoded bytes from ``aspeech``. +_CONTAINER = "mp3" + +# Matches the legacy podcaster timeouts; long single segments still finish well +# under this, and retries cover transient upstream failures. +_TIMEOUT_SECONDS = 600 +_MAX_RETRIES = 2 + + +class LiteLlmTextToSpeech(TextToSpeech): + """Synthesises segments through any LiteLLM-supported hosted TTS model.""" + + def __init__( + self, + *, + model: str, + api_base: str | None = None, + api_key: str | None = None, + ) -> None: + self._model = model + self._api_base = api_base + self._api_key = api_key + + @property + def container(self) -> str: + return _CONTAINER + + async def synthesize(self, request: SynthesisRequest) -> SynthesizedAudio: + from litellm import aspeech + + kwargs = { + "model": self._model, + "voice": request.voice, + "input": request.text, + "max_retries": _MAX_RETRIES, + "timeout": _TIMEOUT_SECONDS, + } + if self._api_base: + kwargs["api_base"] = self._api_base + if self._api_key: + kwargs["api_key"] = self._api_key + + try: + response = await aspeech(**kwargs) + except Exception as exc: # noqa: BLE001 - normalise provider errors + raise TextToSpeechError( + f"{self._model} synthesis failed: {exc}" + ) from exc + + data = getattr(response, "content", None) + if not data: + raise TextToSpeechError(f"{self._model} returned no audio") + + return SynthesizedAudio(data=data, container=_CONTAINER) diff --git a/surfsense_backend/app/podcasts/tts/audio.py b/surfsense_backend/app/podcasts/tts/audio.py new file mode 100644 index 000000000..f3c79dd5a --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/audio.py @@ -0,0 +1,19 @@ +"""The bytes a TTS provider returns for one segment.""" + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class SynthesizedAudio: + """Encoded audio for a single segment, ready to cache and concatenate. + + ``container`` is the file extension the bytes are encoded as (``"wav"`` or + ``"mp3"``); the renderer uses it to name the on-disk segment so FFmpeg can + demux the right format during merge. + """ + + data: bytes + container: str + sample_rate: int | None = None diff --git a/surfsense_backend/app/podcasts/tts/errors.py b/surfsense_backend/app/podcasts/tts/errors.py new file mode 100644 index 000000000..8e7ec3f2b --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/errors.py @@ -0,0 +1,13 @@ +"""Failures raised by the TTS layer.""" + +from __future__ import annotations + + +class TextToSpeechError(RuntimeError): + """A provider failed to synthesise a segment. + + Raised for both configuration faults (an unusable voice reference) and + provider faults (the upstream call errored or returned no audio), so the + renderer can fail the segment without unwrapping provider-specific + exceptions. + """ diff --git a/surfsense_backend/app/podcasts/tts/factory.py b/surfsense_backend/app/podcasts/tts/factory.py new file mode 100644 index 000000000..7b4a48adf --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/factory.py @@ -0,0 +1,38 @@ +"""Resolve the configured :class:`TextToSpeech` as a process-wide singleton.""" + +from __future__ import annotations + +from functools import lru_cache + +from .port import TextToSpeech + +# Sentinel model string that selects the local Kokoro pipeline; anything else is +# treated as a LiteLLM-hosted model (``openai/...``, ``vertex_ai/...``, etc.). +KOKORO_SERVICE = "local/kokoro" + + +@lru_cache(maxsize=1) +def get_text_to_speech() -> TextToSpeech: + """Build the provider selected by ``TTS_SERVICE`` (adapters lazy-imported). + + Cached because the Kokoro adapter holds loaded pipelines that must be reused + across segments and requests rather than rebuilt per call. + """ + from app.config import config as app_config + + service = app_config.TTS_SERVICE + if not service: + raise ValueError("TTS_SERVICE is not configured") + + if service == KOKORO_SERVICE: + from .adapters.kokoro import KokoroTextToSpeech + + return KokoroTextToSpeech() + + from .adapters.litellm import LiteLlmTextToSpeech + + return LiteLlmTextToSpeech( + model=service, + api_base=app_config.TTS_SERVICE_API_BASE, + api_key=app_config.TTS_SERVICE_API_KEY, + ) diff --git a/surfsense_backend/app/podcasts/tts/port.py b/surfsense_backend/app/podcasts/tts/port.py new file mode 100644 index 000000000..604708260 --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/port.py @@ -0,0 +1,31 @@ +"""The TTS contract: turn one segment of text into encoded audio.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + +from .audio import SynthesizedAudio +from .request import SynthesisRequest + + +class TextToSpeech(ABC): + """Synthesises a single segment; one implementation per provider. + + The contract is intentionally per-segment rather than per-episode: it keeps + each call independently cacheable and lets the renderer cap concurrency and + retry segments in isolation. Stitching segments into one file is the + renderer's job, not the provider's. + """ + + @property + @abstractmethod + def container(self) -> str: + """File extension/container this provider emits (e.g. ``"mp3"``).""" + + @abstractmethod + async def synthesize(self, request: SynthesisRequest) -> SynthesizedAudio: + """Voice ``request.text`` and return its encoded audio. + + Raises :class:`~app.podcasts.tts.errors.TextToSpeechError` on any + provider or configuration failure. + """ diff --git a/surfsense_backend/app/podcasts/tts/request.py b/surfsense_backend/app/podcasts/tts/request.py new file mode 100644 index 000000000..2cb5f6ec4 --- /dev/null +++ b/surfsense_backend/app/podcasts/tts/request.py @@ -0,0 +1,22 @@ +"""What the renderer hands a TTS provider to voice a single segment.""" + +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass +from typing import Any + +# A provider-native voice reference. OpenAI/Azure/Kokoro name a voice with a +# string; Vertex passes a mapping (``languageCode`` + ``name``). The catalog +# stores whichever shape the provider expects and we pass it through untouched. +VoiceRef = str | Mapping[str, Any] + + +@dataclass(frozen=True, slots=True) +class SynthesisRequest: + """One unit of speech to synthesise: the smallest cacheable render step.""" + + text: str + voice: VoiceRef + language: str + speed: float = 1.0 From bd6d0790302d8c6991029682eea985588b2f1e77 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 06/50] feat(podcasts): add language and voice resolution --- .../app/podcasts/resolution/__init__.py | 27 +++++++ .../app/podcasts/resolution/language.py | 74 +++++++++++++++++ .../app/podcasts/resolution/voices.py | 79 +++++++++++++++++++ 3 files changed, 180 insertions(+) create mode 100644 surfsense_backend/app/podcasts/resolution/__init__.py create mode 100644 surfsense_backend/app/podcasts/resolution/language.py create mode 100644 surfsense_backend/app/podcasts/resolution/voices.py diff --git a/surfsense_backend/app/podcasts/resolution/__init__.py b/surfsense_backend/app/podcasts/resolution/__init__.py new file mode 100644 index 000000000..ebfd3153a --- /dev/null +++ b/surfsense_backend/app/podcasts/resolution/__init__.py @@ -0,0 +1,27 @@ +"""Resolution: deterministic default chains for a fresh brief. + +Turns weak signals (detected language, last-used preferences) into concrete +language and voice defaults, so the brief gate opens pre-filled and most users +approve without editing. +""" + +from __future__ import annotations + +from .language import ( + DEFAULT_LANGUAGE, + DEFAULT_LANGUAGE_CHAIN, + LanguageContext, + LanguageResolver, + resolve_language, +) +from .voices import VoiceResolutionError, resolve_voices + +__all__ = [ + "DEFAULT_LANGUAGE", + "DEFAULT_LANGUAGE_CHAIN", + "LanguageContext", + "LanguageResolver", + "VoiceResolutionError", + "resolve_language", + "resolve_voices", +] diff --git a/surfsense_backend/app/podcasts/resolution/language.py b/surfsense_backend/app/podcasts/resolution/language.py new file mode 100644 index 000000000..2da90ef37 --- /dev/null +++ b/surfsense_backend/app/podcasts/resolution/language.py @@ -0,0 +1,74 @@ +"""Resolve the brief's language without spending tokens at the gate. + +The chain mirrors the agreed policy: prefer a language detected from the source, +fall back to what the user last chose, and finally default to English (which the +user can still override in the brief). Detection itself is performed upstream +where an LLM is available and passed in as :attr:`LanguageContext.detected`, so +this layer stays pure and deterministic. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass + +# What a brand-new user with no signal gets, and what every chain ends on. +DEFAULT_LANGUAGE = "en" + + +@dataclass(frozen=True, slots=True) +class LanguageContext: + """Signals available when proposing a language for a fresh podcast.""" + + detected: str | None = None + last_used: str | None = None + + +class LanguageResolver(ABC): + """One step in the language fallback chain.""" + + @abstractmethod + def resolve(self, context: LanguageContext) -> str | None: + """Return a language tag, or ``None`` to defer to the next resolver.""" + + +class DetectedLanguage(LanguageResolver): + """Use the language detected from the source, when confident enough.""" + + def resolve(self, context: LanguageContext) -> str | None: + return context.detected + + +class LastUsedLanguage(LanguageResolver): + """Reuse the language from the user's previous podcast.""" + + def resolve(self, context: LanguageContext) -> str | None: + return context.last_used + + +class DefaultLanguage(LanguageResolver): + """Terminal step: always yields the default so the chain never fails.""" + + def resolve(self, context: LanguageContext) -> str | None: + return DEFAULT_LANGUAGE + + +# Order encodes the policy; prepend stronger signals here as they appear. +DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = ( + DetectedLanguage(), + LastUsedLanguage(), + DefaultLanguage(), +) + + +def resolve_language( + context: LanguageContext, + chain: tuple[LanguageResolver, ...] = DEFAULT_LANGUAGE_CHAIN, +) -> str: + """Walk ``chain`` and return the first language a resolver yields.""" + for resolver in chain: + language = resolver.resolve(context) + if language: + return language.strip() + # The default resolver guarantees a value; this guards a misconfigured chain. + return DEFAULT_LANGUAGE diff --git a/surfsense_backend/app/podcasts/resolution/voices.py b/surfsense_backend/app/podcasts/resolution/voices.py new file mode 100644 index 000000000..8d865fbaa --- /dev/null +++ b/surfsense_backend/app/podcasts/resolution/voices.py @@ -0,0 +1,79 @@ +"""Assign a default voice to each speaker for the resolved language. + +The default chain reuses the user's previously chosen voices where they are +still valid for the new language/provider, then fills any remaining speakers +with distinct catalog voices (preferring an unused gender so a two-speaker +episode sounds like two people). The user can override any of these in the +brief; this only seeds sensible defaults so most briefs need no edits. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from app.podcasts.voices import CatalogVoice, TtsProvider, VoiceCatalog + + +class VoiceResolutionError(RuntimeError): + """No catalog voice exists for the requested provider and language.""" + + +def resolve_voices( + *, + catalog: VoiceCatalog, + provider: TtsProvider, + language: str, + speaker_count: int, + preferred: Sequence[str] | None = None, +) -> list[CatalogVoice]: + """Return one :class:`CatalogVoice` per speaker, in slot order. + + ``preferred`` is the user's last-used voice ids (by slot); any that no + longer fit the provider/language are silently dropped and replaced. + """ + if speaker_count < 1: + raise ValueError("speaker_count must be >= 1") + + available = catalog.for_language(provider, language) + if not available: + raise VoiceResolutionError( + f"{provider.value} has no voice for language {language!r}" + ) + + preferred = preferred or () + by_id = {voice.voice_id: voice for voice in available} + + assignment: list[CatalogVoice] = [] + used_ids: set[str] = set() + used_genders: set = set() + + for slot in range(speaker_count): + reuse_id = preferred[slot] if slot < len(preferred) else None + if reuse_id and reuse_id in by_id and reuse_id not in used_ids: + voice = by_id[reuse_id] + else: + voice = _pick_distinct(available, used_ids, used_genders) + assignment.append(voice) + used_ids.add(voice.voice_id) + used_genders.add(voice.gender) + + return assignment + + +def _pick_distinct( + available: list[CatalogVoice], + used_ids: set[str], + used_genders: set, +) -> CatalogVoice: + """Pick a fresh voice, preferring an unused gender, then any unused voice. + + Falls back to the first catalog voice when speakers outnumber distinct + voices, so resolution always assigns every speaker rather than failing. + """ + fresh = [v for v in available if v.voice_id not in used_ids] + if fresh: + for voice in fresh: + if voice.gender not in used_genders: + return voice + return fresh[0] + return available[0] From 0004abdc79f6471aaed1e0f979b14e4946cc3cc8 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 07/50] feat(podcasts): add audio renderer --- .../app/podcasts/rendering/__init__.py | 12 ++ .../app/podcasts/rendering/cache.py | 53 ++++++ .../app/podcasts/rendering/errors.py | 11 ++ .../app/podcasts/rendering/merge.py | 48 ++++++ .../app/podcasts/rendering/renderer.py | 157 ++++++++++++++++++ 5 files changed, 281 insertions(+) create mode 100644 surfsense_backend/app/podcasts/rendering/__init__.py create mode 100644 surfsense_backend/app/podcasts/rendering/cache.py create mode 100644 surfsense_backend/app/podcasts/rendering/errors.py create mode 100644 surfsense_backend/app/podcasts/rendering/merge.py create mode 100644 surfsense_backend/app/podcasts/rendering/renderer.py diff --git a/surfsense_backend/app/podcasts/rendering/__init__.py b/surfsense_backend/app/podcasts/rendering/__init__.py new file mode 100644 index 000000000..9fb50a2e1 --- /dev/null +++ b/surfsense_backend/app/podcasts/rendering/__init__.py @@ -0,0 +1,12 @@ +"""Rendering: synthesise and merge an approved transcript into audio. + +The :class:`PodcastRenderer` is the public entry point; the segment cache and +FFmpeg merge are implementation details it owns. +""" + +from __future__ import annotations + +from .errors import RenderError +from .renderer import PodcastRenderer, RenderedPodcast + +__all__ = ["PodcastRenderer", "RenderError", "RenderedPodcast"] diff --git a/surfsense_backend/app/podcasts/rendering/cache.py b/surfsense_backend/app/podcasts/rendering/cache.py new file mode 100644 index 000000000..32d9f0c21 --- /dev/null +++ b/surfsense_backend/app/podcasts/rendering/cache.py @@ -0,0 +1,53 @@ +"""Content-addressed cache for synthesised segments. + +Each segment's audio is keyed by everything that determines its bytes (voice, +language, speed, text). Keeping the cache in a stable per-podcast directory +makes re-renders cheap: changing one speaker's voice only misses that speaker's +turns, and a worker restart mid-render resumes from whatever was already +written. The key intentionally excludes the segment's position so identical +lines (e.g. repeated "Right.") synthesise once. +""" + +from __future__ import annotations + +import hashlib +import json +from pathlib import Path + +from app.podcasts.tts import SynthesisRequest + + +class SegmentCache: + """On-disk store of segment audio, addressed by request content hash.""" + + def __init__(self, root: Path) -> None: + self._root = root + self._root.mkdir(parents=True, exist_ok=True) + + def key(self, request: SynthesisRequest) -> str: + """A stable hash of the inputs that determine the synthesised bytes.""" + material = json.dumps( + { + "voice": request.voice, + "language": request.language, + "speed": request.speed, + "text": request.text, + }, + sort_keys=True, + ensure_ascii=True, + ) + return hashlib.sha256(material.encode("utf-8")).hexdigest() + + def path(self, key: str, container: str) -> Path: + return self._root / f"{key}.{container}" + + def get(self, key: str, container: str) -> Path | None: + """Return the cached segment path, or ``None`` on a miss.""" + path = self.path(key, container) + return path if path.exists() else None + + def put(self, key: str, container: str, data: bytes) -> Path: + """Write ``data`` for ``key`` and return its path.""" + path = self.path(key, container) + path.write_bytes(data) + return path diff --git a/surfsense_backend/app/podcasts/rendering/errors.py b/surfsense_backend/app/podcasts/rendering/errors.py new file mode 100644 index 000000000..7192890c6 --- /dev/null +++ b/surfsense_backend/app/podcasts/rendering/errors.py @@ -0,0 +1,11 @@ +"""Failures raised while rendering a transcript to audio.""" + +from __future__ import annotations + + +class RenderError(RuntimeError): + """Rendering could not produce a final audio file. + + Wraps both per-segment synthesis failures and the merge step so the render + task sees one failure type regardless of where it originated. + """ diff --git a/surfsense_backend/app/podcasts/rendering/merge.py b/surfsense_backend/app/podcasts/rendering/merge.py new file mode 100644 index 000000000..48771d17c --- /dev/null +++ b/surfsense_backend/app/podcasts/rendering/merge.py @@ -0,0 +1,48 @@ +"""Concatenate ordered segment files into a single MP3. + +Uses FFmpeg's concat *demuxer* (a list file of inputs) rather than a +``filter_complex`` graph. The demuxer takes one ``-i`` no matter how many +segments there are, so an hour-long episode with thousands of turns never hits +command-line length limits. Output is always re-encoded to MP3 for a uniform +artifact regardless of the source container (Kokoro WAV or hosted MP3). +""" + +from __future__ import annotations + +from pathlib import Path + +from ffmpeg.asyncio import FFmpeg + +from .errors import RenderError + + +async def concat_to_mp3(segment_paths: list[Path], output_path: Path) -> None: + """Merge ``segment_paths`` in order into ``output_path`` as MP3.""" + if not segment_paths: + raise RenderError("cannot merge an empty list of segments") + + list_file = output_path.with_name(f"{output_path.stem}.concat.txt") + list_file.write_text(_concat_list(segment_paths), encoding="utf-8") + + try: + ffmpeg = ( + FFmpeg() + .option("y") + .input(str(list_file), f="concat", safe=0) + .output(str(output_path), {"c:a": "libmp3lame"}) + ) + await ffmpeg.execute() + except Exception as exc: # noqa: BLE001 - normalise ffmpeg failures + raise RenderError(f"audio merge failed: {exc}") from exc + finally: + list_file.unlink(missing_ok=True) + + +def _concat_list(segment_paths: list[Path]) -> str: + # The concat demuxer reads `file ''` lines; single quotes in a path + # are escaped per its quoting rules ('\''). + lines = [] + for path in segment_paths: + escaped = str(path.resolve()).replace("'", "'\\''") + lines.append(f"file '{escaped}'") + return "\n".join(lines) + "\n" diff --git a/surfsense_backend/app/podcasts/rendering/renderer.py b/surfsense_backend/app/podcasts/rendering/renderer.py new file mode 100644 index 000000000..89a4e6b7d --- /dev/null +++ b/surfsense_backend/app/podcasts/rendering/renderer.py @@ -0,0 +1,157 @@ +"""Render an approved transcript into a single podcast audio file. + +The renderer is the only place that turns dialogue into sound. It maps each +turn to its speaker's voice, synthesises segments concurrently (capped, served +from the segment cache when possible, and coalesced so identical lines render +once), then merges them in order. It takes a settled spec + transcript and +returns bytes; persistence and lifecycle transitions belong to the service. +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from pathlib import Path + +from app.podcasts.schemas import PodcastSpec, Transcript, TranscriptTurn +from app.podcasts.tts import SynthesisRequest, TextToSpeech, TextToSpeechError +from app.podcasts.voices import VoiceCatalog + +from .cache import SegmentCache +from .errors import RenderError +from .merge import concat_to_mp3 + +# Bounds how many segments synthesise at once. Protects hosted-provider rate +# limits and avoids thrashing the local Kokoro pipeline; the renderer is I/O- or +# model-bound per segment, so a small pool already saturates throughput. +DEFAULT_MAX_CONCURRENCY = 4 + +_MERGED_FILENAME = "podcast.mp3" + + +@dataclass(frozen=True, slots=True) +class RenderedPodcast: + """The finished episode: encoded bytes plus their container.""" + + data: bytes + container: str + + +class PodcastRenderer: + """Synthesises and merges a transcript using one TTS provider.""" + + def __init__( + self, + *, + tts: TextToSpeech, + catalog: VoiceCatalog, + max_concurrency: int = DEFAULT_MAX_CONCURRENCY, + ) -> None: + self._tts = tts + self._catalog = catalog + self._max_concurrency = max_concurrency + + async def render( + self, + *, + spec: PodcastSpec, + transcript: Transcript, + workdir: Path, + ) -> RenderedPodcast: + """Produce the merged MP3 for ``transcript`` under ``spec``. + + ``workdir`` holds the segment cache and merge output; reusing the same + directory across renders is what makes voice edits cheap. + """ + cache = SegmentCache(workdir / "segments") + requests = [self._request_for(spec, turn) for turn in transcript.turns] + + # Concurrency primitives are created per render so each call is bound to + # the event loop running it (Celery tasks may use a fresh loop). + synthesizer = _SegmentSynthesizer(self._tts, cache, self._max_concurrency) + segment_paths = await asyncio.gather( + *(synthesizer.segment(request) for request in requests) + ) + + output_path = workdir / _MERGED_FILENAME + await concat_to_mp3(list(segment_paths), output_path) + return RenderedPodcast(data=output_path.read_bytes(), container="mp3") + + def _request_for( + self, spec: PodcastSpec, turn: TranscriptTurn + ) -> SynthesisRequest: + try: + speaker = spec.speaker_for(turn.speaker) + except KeyError as exc: + raise RenderError( + f"transcript references unknown speaker slot {turn.speaker}" + ) from exc + try: + voice = self._catalog.get(speaker.voice_id) + except KeyError as exc: + raise RenderError(f"unknown voice {speaker.voice_id!r}") from exc + return SynthesisRequest( + text=turn.text, voice=voice.native_ref, language=spec.language + ) + + +class _SegmentSynthesizer: + """Per-render synthesis coordinator: caps concurrency and dedupes work. + + Beyond the on-disk cache (which serves cross-render reuse), this coalesces + identical segments that race within one render so the same line is voiced + once even when several turns request it simultaneously. + """ + + def __init__( + self, tts: TextToSpeech, cache: SegmentCache, max_concurrency: int + ) -> None: + self._tts = tts + self._cache = cache + self._container = tts.container + self._semaphore = asyncio.Semaphore(max_concurrency) + self._inflight: dict[str, asyncio.Future[Path]] = {} + self._inflight_lock = asyncio.Lock() + + async def segment(self, request: SynthesisRequest) -> Path: + key = self._cache.key(request) + cached = self._cache.get(key, self._container) + if cached is not None: + return cached + + async with self._inflight_lock: + future = self._inflight.get(key) + owner = future is None + if owner: + future = asyncio.get_event_loop().create_future() + self._inflight[key] = future + + # The owner runs the work and publishes the outcome on the shared future; + # every caller (owner included) reads it back via ``await future`` so the + # result is retrieved exactly once-or-more and never left dangling. + if owner: + try: + path = await self._synthesize(request, key) + except BaseException as exc: # noqa: BLE001 - relayed to all waiters + future.set_exception(exc) + else: + future.set_result(path) + finally: + await self._forget(key) + + return await future + + async def _synthesize(self, request: SynthesisRequest, key: str) -> Path: + async with self._semaphore: + cached = self._cache.get(key, self._container) + if cached is not None: + return cached + try: + audio = await self._tts.synthesize(request) + except TextToSpeechError as exc: + raise RenderError(f"segment synthesis failed: {exc}") from exc + return self._cache.put(key, audio.container, audio.data) + + async def _forget(self, key: str) -> None: + async with self._inflight_lock: + self._inflight.pop(key, None) From a3386cd5f983263fdf925b494f053b2ce0504084 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 08/50] feat(podcasts): add brief and transcript generation --- .../app/podcasts/generation/__init__.py | 20 +++ .../app/podcasts/generation/brief/__init__.py | 9 ++ .../app/podcasts/generation/brief/config.py | 31 ++++ .../podcasts/generation/brief/detection.py | 28 ++++ .../app/podcasts/generation/brief/graph.py | 27 ++++ .../app/podcasts/generation/brief/nodes.py | 153 ++++++++++++++++++ .../app/podcasts/generation/brief/state.py | 19 +++ .../podcasts/generation/prompts/__init__.py | 15 ++ .../generation/prompts/detect_language.py | 22 +++ .../generation/prompts/draft_segment.py | 54 +++++++ .../generation/prompts/plan_outline.py | 47 ++++++ .../podcasts/generation/prompts/speakers.py | 18 +++ .../app/podcasts/generation/structured.py | 49 ++++++ .../generation/transcript/__init__.py | 17 ++ .../podcasts/generation/transcript/config.py | 26 +++ .../podcasts/generation/transcript/graph.py | 29 ++++ .../podcasts/generation/transcript/nodes.py | 127 +++++++++++++++ .../generation/transcript/planning.py | 32 ++++ .../podcasts/generation/transcript/state.py | 22 +++ 19 files changed, 745 insertions(+) create mode 100644 surfsense_backend/app/podcasts/generation/__init__.py create mode 100644 surfsense_backend/app/podcasts/generation/brief/__init__.py create mode 100644 surfsense_backend/app/podcasts/generation/brief/config.py create mode 100644 surfsense_backend/app/podcasts/generation/brief/detection.py create mode 100644 surfsense_backend/app/podcasts/generation/brief/graph.py create mode 100644 surfsense_backend/app/podcasts/generation/brief/nodes.py create mode 100644 surfsense_backend/app/podcasts/generation/brief/state.py create mode 100644 surfsense_backend/app/podcasts/generation/prompts/__init__.py create mode 100644 surfsense_backend/app/podcasts/generation/prompts/detect_language.py create mode 100644 surfsense_backend/app/podcasts/generation/prompts/draft_segment.py create mode 100644 surfsense_backend/app/podcasts/generation/prompts/plan_outline.py create mode 100644 surfsense_backend/app/podcasts/generation/prompts/speakers.py create mode 100644 surfsense_backend/app/podcasts/generation/structured.py create mode 100644 surfsense_backend/app/podcasts/generation/transcript/__init__.py create mode 100644 surfsense_backend/app/podcasts/generation/transcript/config.py create mode 100644 surfsense_backend/app/podcasts/generation/transcript/graph.py create mode 100644 surfsense_backend/app/podcasts/generation/transcript/nodes.py create mode 100644 surfsense_backend/app/podcasts/generation/transcript/planning.py create mode 100644 surfsense_backend/app/podcasts/generation/transcript/state.py diff --git a/surfsense_backend/app/podcasts/generation/__init__.py b/surfsense_backend/app/podcasts/generation/__init__.py new file mode 100644 index 000000000..30a2425b0 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/__init__.py @@ -0,0 +1,20 @@ +"""Generation: the LLM-driven brief and transcript controlled graphs. + +Two small graphs hold all the intelligence: ``brief`` proposes a reviewable spec +(language detection + resolution), and ``transcript`` drafts long-form dialogue +outline-first. Everything else in the podcast pipeline is deterministic. +""" + +from __future__ import annotations + +from .brief import BriefConfig, BriefState, build_brief_graph +from .transcript import TranscriptConfig, TranscriptState, build_transcript_graph + +__all__ = [ + "BriefConfig", + "BriefState", + "TranscriptConfig", + "TranscriptState", + "build_brief_graph", + "build_transcript_graph", +] diff --git a/surfsense_backend/app/podcasts/generation/brief/__init__.py b/surfsense_backend/app/podcasts/generation/brief/__init__.py new file mode 100644 index 000000000..0359a513d --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/brief/__init__.py @@ -0,0 +1,9 @@ +"""Brief planning: propose a reviewable spec from weak signals.""" + +from __future__ import annotations + +from .config import BriefConfig +from .graph import build_brief_graph +from .state import BriefState + +__all__ = ["BriefConfig", "BriefState", "build_brief_graph"] diff --git a/surfsense_backend/app/podcasts/generation/brief/config.py b/surfsense_backend/app/podcasts/generation/brief/config.py new file mode 100644 index 000000000..a9f2f9dec --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/brief/config.py @@ -0,0 +1,31 @@ +"""Configurable inputs for the brief-planning graph.""" + +from __future__ import annotations + +from dataclasses import dataclass, field, fields + +from langchain_core.runnables import RunnableConfig + +# Sensible defaults for a fresh brief; the user adjusts the range at the gate. +DEFAULT_SPEAKER_COUNT = 2 +DEFAULT_MIN_MINUTES = 10 +DEFAULT_MAX_MINUTES = 20 + + +@dataclass(kw_only=True) +class BriefConfig: + """Signals used to propose a brief; everything here is non-LLM context.""" + + search_space_id: int + speaker_count: int = DEFAULT_SPEAKER_COUNT + min_minutes: int = DEFAULT_MIN_MINUTES + max_minutes: int = DEFAULT_MAX_MINUTES + focus: str | None = None + last_used_language: str | None = None + last_used_voices: list[str] = field(default_factory=list) + + @classmethod + def from_runnable_config(cls, config: RunnableConfig | None = None) -> BriefConfig: + configurable = (config.get("configurable") or {}) if config else {} + names = {f.name for f in fields(cls) if f.init} + return cls(**{k: v for k, v in configurable.items() if k in names}) diff --git a/surfsense_backend/app/podcasts/generation/brief/detection.py b/surfsense_backend/app/podcasts/generation/brief/detection.py new file mode 100644 index 000000000..d505d4993 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/brief/detection.py @@ -0,0 +1,28 @@ +"""The language-detection reply shape, normalised to a safe tag or ``None``.""" + +from __future__ import annotations + +from pydantic import BaseModel, field_validator + +from app.podcasts.schemas import normalize_language_tag + + +class DetectedLanguage(BaseModel): + """What the detector returns: a usable BCP-47 tag, or ``None`` when unsure. + + A malformed or non-language reply is coerced to ``None`` so a bad detection + quietly defers to the rest of the resolution chain rather than poisoning the + spec with an invalid tag. + """ + + language: str | None = None + + @field_validator("language") + @classmethod + def _normalise(cls, value: str | None) -> str | None: + if value is None: + return None + try: + return normalize_language_tag(value) + except ValueError: + return None diff --git a/surfsense_backend/app/podcasts/generation/brief/graph.py b/surfsense_backend/app/podcasts/generation/brief/graph.py new file mode 100644 index 000000000..328529e59 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/brief/graph.py @@ -0,0 +1,27 @@ +"""The brief-planning graph: detect language, then propose a spec.""" + +from __future__ import annotations + +from langgraph.graph import StateGraph + +from .config import BriefConfig +from .nodes import detect_language, propose_spec +from .state import BriefState + + +def build_brief_graph(): + workflow = StateGraph(BriefState, config_schema=BriefConfig) + + workflow.add_node("detect_language", detect_language) + workflow.add_node("propose_spec", propose_spec) + + workflow.add_edge("__start__", "detect_language") + workflow.add_edge("detect_language", "propose_spec") + workflow.add_edge("propose_spec", "__end__") + + graph = workflow.compile() + graph.name = "Surfsense Podcast Brief" + return graph + + +graph = build_brief_graph() diff --git a/surfsense_backend/app/podcasts/generation/brief/nodes.py b/surfsense_backend/app/podcasts/generation/brief/nodes.py new file mode 100644 index 000000000..e0477940c --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/brief/nodes.py @@ -0,0 +1,153 @@ +"""Brief-planning nodes: detect the language, then propose a full spec. + +Only ``detect_language`` spends tokens, and only a small sample of source text; +``propose_spec`` is pure resolution. Together they open the brief gate pre-filled +so the common case needs no edits. +""" + +from __future__ import annotations + +from typing import Any + +from langchain_core.messages import HumanMessage, SystemMessage +from langchain_core.runnables import RunnableConfig + +from app.config import config as app_config +from app.podcasts.resolution import ( + DEFAULT_LANGUAGE, + LanguageContext, + resolve_language, + resolve_voices, +) +from app.podcasts.schemas import ( + DurationTarget, + PodcastSpec, + PodcastStyle, + SpeakerRole, + SpeakerSpec, + normalize_language_tag, +) +from app.podcasts.voices import ( + VoiceCatalog, + TtsProvider, + get_voice_catalog, + provider_from_service, +) +from app.services.llm_service import get_agent_llm + +from ..prompts import detect_language_prompt +from ..structured import StructuredOutputError, invoke_json +from .config import BriefConfig +from .detection import DetectedLanguage +from .state import BriefState + +# Only the head of the source is needed to judge language; this caps tokens. +_DETECTION_SAMPLE_CHARS = 4000 + +# Default role per speaker slot; extra speakers beyond the list fall back to guest. +_ROLE_BY_SLOT = ( + SpeakerRole.HOST, + SpeakerRole.GUEST, + SpeakerRole.EXPERT, + SpeakerRole.COHOST, + SpeakerRole.NARRATOR, +) + + +async def detect_language( + state: BriefState, config: RunnableConfig +) -> dict[str, Any]: + """Detect the source language; defer (``None``) on any uncertainty.""" + brief = BriefConfig.from_runnable_config(config) + llm = await get_agent_llm(state.db_session, brief.search_space_id) + if llm is None: + return {"detected_language": None} + + sample = (state.source_content or "")[:_DETECTION_SAMPLE_CHARS].strip() + if not sample: + return {"detected_language": None} + + messages = [ + SystemMessage(content=detect_language_prompt()), + HumanMessage(content=f"{sample}"), + ] + try: + detected = await invoke_json(llm, messages, DetectedLanguage) + except StructuredOutputError: + return {"detected_language": None} + return {"detected_language": detected.language} + + +def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]: + """Build a complete :class:`PodcastSpec` from the resolved defaults.""" + brief = BriefConfig.from_runnable_config(config) + provider = _active_provider() + catalog = get_voice_catalog() + + language = _supported_language( + detected=state.detected_language, + last_used=brief.last_used_language, + provider=provider, + catalog=catalog, + ) + voices = resolve_voices( + catalog=catalog, + provider=provider, + language=language, + speaker_count=brief.speaker_count, + preferred=brief.last_used_voices, + ) + + speakers = [ + SpeakerSpec( + slot=slot, + name=_default_name(slot), + role=_role_for(slot), + voice_id=voice.voice_id, + ) + for slot, voice in enumerate(voices) + ] + spec = PodcastSpec( + language=language, + style=PodcastStyle.CONVERSATIONAL, + speakers=speakers, + duration=DurationTarget( + min_minutes=brief.min_minutes, max_minutes=brief.max_minutes + ), + focus=brief.focus, + ) + return {"spec": spec} + + +def _active_provider() -> TtsProvider: + service = app_config.TTS_SERVICE + if not service: + raise ValueError("TTS_SERVICE is not configured") + return provider_from_service(service) + + +def _supported_language( + *, + detected: str | None, + last_used: str | None, + provider: TtsProvider, + catalog: VoiceCatalog, +) -> str: + raw = resolve_language(LanguageContext(detected=detected, last_used=last_used)) + try: + language = normalize_language_tag(raw) + except ValueError: + language = DEFAULT_LANGUAGE + if not catalog.supports_language(provider, language): + return DEFAULT_LANGUAGE + return language + + +def _role_for(slot: int) -> SpeakerRole: + return _ROLE_BY_SLOT[slot] if slot < len(_ROLE_BY_SLOT) else SpeakerRole.GUEST + + +def _default_name(slot: int) -> str: + role = _role_for(slot) + label = role.value.replace("cohost", "co-host").title() + return label if slot < len(_ROLE_BY_SLOT) else f"{label} {slot}" diff --git a/surfsense_backend/app/podcasts/generation/brief/state.py b/surfsense_backend/app/podcasts/generation/brief/state.py new file mode 100644 index 000000000..976a72df5 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/brief/state.py @@ -0,0 +1,19 @@ +"""Mutable state threaded through the brief-planning graph.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.podcasts.schemas import PodcastSpec + + +@dataclass +class BriefState: + """Runtime inputs and the proposed spec the graph produces.""" + + db_session: AsyncSession + source_content: str + detected_language: str | None = None + spec: PodcastSpec | None = None diff --git a/surfsense_backend/app/podcasts/generation/prompts/__init__.py b/surfsense_backend/app/podcasts/generation/prompts/__init__.py new file mode 100644 index 000000000..1f6d3993b --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/prompts/__init__.py @@ -0,0 +1,15 @@ +"""Prompt builders for the generation graphs.""" + +from __future__ import annotations + +from .detect_language import detect_language_prompt +from .draft_segment import draft_segment_prompt +from .plan_outline import plan_outline_prompt +from .speakers import render_speaker_roster + +__all__ = [ + "detect_language_prompt", + "draft_segment_prompt", + "plan_outline_prompt", + "render_speaker_roster", +] diff --git a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py b/surfsense_backend/app/podcasts/generation/prompts/detect_language.py new file mode 100644 index 000000000..a5ab4da5c --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/prompts/detect_language.py @@ -0,0 +1,22 @@ +"""Prompt for detecting the dominant natural language of source content.""" + +from __future__ import annotations + +_SYSTEM = """\ +You identify the dominant natural language of a piece of source content for a \ +podcast that will be generated from it. + +Rules: +- Report the language the listener-facing podcast should be spoken in, i.e. the \ +language most of the meaningful prose is written in. +- Ignore code, markup, URLs, numbers, and proper nouns when judging. +- If the content is too short, ambiguous, mixed without a clear majority, or not \ +natural-language prose, return null rather than guessing. + +Respond with strict JSON and nothing else: +{"language": ""} or {"language": null} +""" + + +def detect_language_prompt() -> str: + return _SYSTEM diff --git a/surfsense_backend/app/podcasts/generation/prompts/draft_segment.py b/surfsense_backend/app/podcasts/generation/prompts/draft_segment.py new file mode 100644 index 000000000..c81dfa385 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/prompts/draft_segment.py @@ -0,0 +1,54 @@ +"""Prompt for drafting one outline segment into dialogue turns. + +Each segment is drafted on its own so long episodes stay coherent and within +context limits. A short recap of the preceding dialogue is passed in so the new +segment continues naturally instead of restarting. The model must write in the +episode language and attribute every line to a real speaker slot. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from app.podcasts.schemas import PodcastSpec + +from .speakers import render_speaker_roster + +if TYPE_CHECKING: + from app.podcasts.generation.transcript.planning import OutlineSegment + + +def draft_segment_prompt( + *, + spec: PodcastSpec, + segment: OutlineSegment, + position: int, + total: int, + recap: str | None, +) -> str: + talking_points = "\n".join(f"- {point}" for point in segment.talking_points) + recap_block = ( + f"\nRecap of the conversation so far (continue from here, do not repeat " + f"it):\n{recap}\n" + if recap + else "\nThis is the opening segment; begin the conversation naturally.\n" + ) + return f"""\ +You are scripting natural, engaging podcast dialogue for segment {position} of \ +{total}. + +Write entirely in {spec.language}. The format is {spec.style.value}. +Speakers — attribute every line using these exact slot numbers: +{render_speaker_roster(spec)} +{recap_block} +This segment is "{segment.title}". Cover these points using only facts grounded \ +in the provided source content: +{talking_points} + +Aim for about {segment.target_words} words of dialogue. Keep turns conversational \ +and varied; speakers should react to each other rather than deliver monologues. \ +Do not add greetings or sign-offs unless this is the first or last segment. + +Respond with strict JSON and nothing else: +{{"turns": [{{"speaker": , "text": "..."}}]}} +""" diff --git a/surfsense_backend/app/podcasts/generation/prompts/plan_outline.py b/surfsense_backend/app/podcasts/generation/prompts/plan_outline.py new file mode 100644 index 000000000..1b227c2ff --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/prompts/plan_outline.py @@ -0,0 +1,47 @@ +"""Prompt for planning a long-form podcast outline before drafting dialogue. + +Outlining first is what makes long-form reliable: a single LLM call cannot hold +a coherent one- to two-hour script, but it can plan segments that are then +drafted independently against a shared plan. The prompt is told the target +length so the number and size of segments scale with the requested duration. +""" + +from __future__ import annotations + +from app.podcasts.schemas import PodcastSpec + +from .speakers import render_speaker_roster + + +def plan_outline_prompt( + *, + spec: PodcastSpec, + target_words: int, + suggested_segments: int, + focus: str | None, +) -> str: + focus_block = ( + f"\nThe user asked the episode to focus on:\n{focus}\n" if focus else "" + ) + return f"""\ +You are a podcast showrunner planning the structure of an episode before any \ +dialogue is written. + +The episode language is {spec.language}. The format is {spec.style.value}. +Speakers (refer to them by these slots later): +{render_speaker_roster(spec)} +{focus_block} +Plan an outline that, when fully drafted, reaches roughly {target_words} words \ +of spoken dialogue (about {suggested_segments} segments). Each segment is one \ +coherent beat of the conversation: an opening, distinct topic areas grounded in \ +the source content, and a closing. + +For each segment provide: +- title: a short label for the beat +- talking_points: 2-5 concrete points to cover, drawn from the source content +- target_words: how many words of dialogue this segment should run (the sum \ +across segments should approximate {target_words}) + +Respond with strict JSON and nothing else: +{{"segments": [{{"title": "...", "talking_points": ["..."], "target_words": 0}}]}} +""" diff --git a/surfsense_backend/app/podcasts/generation/prompts/speakers.py b/surfsense_backend/app/podcasts/generation/prompts/speakers.py new file mode 100644 index 000000000..9df4138df --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/prompts/speakers.py @@ -0,0 +1,18 @@ +"""Render a spec's speaker roster for prompts. + +The drafting prompts must reference speakers by the exact ``slot`` the renderer +expects, so this is the single place that formats that roster — keeping the +slot contract identical across every prompt that mentions speakers. +""" + +from __future__ import annotations + +from app.podcasts.schemas import PodcastSpec + + +def render_speaker_roster(spec: PodcastSpec) -> str: + lines = [ + f"- slot {speaker.slot} — {speaker.name} (role: {speaker.role.value})" + for speaker in spec.speakers + ] + return "\n".join(lines) diff --git a/surfsense_backend/app/podcasts/generation/structured.py b/surfsense_backend/app/podcasts/generation/structured.py new file mode 100644 index 000000000..9e9731c2f --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/structured.py @@ -0,0 +1,49 @@ +"""Parse a model's reply into a Pydantic shape, tolerating chatty output. + +Agent LLMs return JSON wrapped in prose, markdown fences, or reasoning blocks. +This mirrors the legacy podcaster's resilient parsing — strip fences, then fall +back to the outermost ``{...}`` span — so every generation node validates the +reply the same way instead of repeating ad-hoc parsing. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, TypeVar + +from pydantic import BaseModel, ValidationError + +from app.utils.content_utils import extract_text_content, strip_markdown_fences + +if TYPE_CHECKING: + from langchain_core.messages import BaseMessage + +T = TypeVar("T", bound=BaseModel) + + +class StructuredOutputError(RuntimeError): + """The model reply could not be parsed into the expected shape.""" + + +async def invoke_json(llm, messages: list[BaseMessage], model: type[T]) -> T: + """Invoke ``llm`` and validate its reply as ``model``.""" + response = await llm.ainvoke(messages) + content = strip_markdown_fences(extract_text_content(response.content)) + + try: + return model.model_validate_json(content) + except (ValidationError, ValueError): + pass + + start = content.find("{") + end = content.rfind("}") + 1 + if 0 <= start < end: + try: + return model.model_validate_json(content[start:end]) + except (ValidationError, ValueError) as exc: + raise StructuredOutputError( + f"could not parse {model.__name__} from model reply" + ) from exc + + raise StructuredOutputError( + f"no JSON object found for {model.__name__} in model reply" + ) diff --git a/surfsense_backend/app/podcasts/generation/transcript/__init__.py b/surfsense_backend/app/podcasts/generation/transcript/__init__.py new file mode 100644 index 000000000..5c8f23cd7 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/transcript/__init__.py @@ -0,0 +1,17 @@ +"""Transcript drafting: outline-first, long-form dialogue generation.""" + +from __future__ import annotations + +from .config import TranscriptConfig +from .graph import build_transcript_graph +from .planning import Outline, OutlineSegment, SegmentDraft +from .state import TranscriptState + +__all__ = [ + "Outline", + "OutlineSegment", + "SegmentDraft", + "TranscriptConfig", + "TranscriptState", + "build_transcript_graph", +] diff --git a/surfsense_backend/app/podcasts/generation/transcript/config.py b/surfsense_backend/app/podcasts/generation/transcript/config.py new file mode 100644 index 000000000..f627fc166 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/transcript/config.py @@ -0,0 +1,26 @@ +"""Configurable inputs for the transcript-drafting graph.""" + +from __future__ import annotations + +from dataclasses import dataclass, fields + +from langchain_core.runnables import RunnableConfig + +from app.podcasts.schemas import PodcastSpec + + +@dataclass(kw_only=True) +class TranscriptConfig: + """The approved spec and user focus that drive drafting.""" + + search_space_id: int + spec: PodcastSpec + focus: str | None = None + + @classmethod + def from_runnable_config( + cls, config: RunnableConfig | None = None + ) -> TranscriptConfig: + configurable = (config.get("configurable") or {}) if config else {} + names = {f.name for f in fields(cls) if f.init} + return cls(**{k: v for k, v in configurable.items() if k in names}) diff --git a/surfsense_backend/app/podcasts/generation/transcript/graph.py b/surfsense_backend/app/podcasts/generation/transcript/graph.py new file mode 100644 index 000000000..2f97db50f --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/transcript/graph.py @@ -0,0 +1,29 @@ +"""The transcript-drafting graph: outline, draft segments, finalize.""" + +from __future__ import annotations + +from langgraph.graph import StateGraph + +from .config import TranscriptConfig +from .nodes import draft_segments, finalize, plan_outline +from .state import TranscriptState + + +def build_transcript_graph(): + workflow = StateGraph(TranscriptState, config_schema=TranscriptConfig) + + workflow.add_node("plan_outline", plan_outline) + workflow.add_node("draft_segments", draft_segments) + workflow.add_node("finalize", finalize) + + workflow.add_edge("__start__", "plan_outline") + workflow.add_edge("plan_outline", "draft_segments") + workflow.add_edge("draft_segments", "finalize") + workflow.add_edge("finalize", "__end__") + + graph = workflow.compile() + graph.name = "Surfsense Podcast Transcript" + return graph + + +graph = build_transcript_graph() diff --git a/surfsense_backend/app/podcasts/generation/transcript/nodes.py b/surfsense_backend/app/podcasts/generation/transcript/nodes.py new file mode 100644 index 000000000..b4a3e6541 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/transcript/nodes.py @@ -0,0 +1,127 @@ +"""Transcript-drafting nodes: plan an outline, draft each beat, then assemble. + +Long-form is produced beat-by-beat: a single call plans the structure, then each +segment is drafted on its own with a recap of what came before so the script +stays coherent without holding the whole episode in one context window. +""" + +from __future__ import annotations + +from typing import Any + +from langchain_core.messages import HumanMessage, SystemMessage +from langchain_core.runnables import RunnableConfig + +from app.podcasts.schemas import PodcastSpec, Transcript, TranscriptTurn +from app.services.llm_service import get_agent_llm + +from ..prompts import draft_segment_prompt, plan_outline_prompt +from ..structured import invoke_json +from .config import TranscriptConfig +from .planning import Outline, OutlineSegment, SegmentDraft +from .state import TranscriptState + +# Average speaking rate; converts target minutes to a target word count. +_WORDS_PER_MINUTE = 150 +# Rough words per outline segment, used to suggest how many segments to plan. +_WORDS_PER_SEGMENT = 250 +# Cap on source text sent per LLM call to bound tokens on large sources. +_SOURCE_BUDGET_CHARS = 12000 +# How much prior dialogue to recap into each segment for continuity. +_RECAP_CHARS = 800 + + +async def plan_outline( + state: TranscriptState, config: RunnableConfig +) -> dict[str, Any]: + """Plan the segment structure sized to the spec's target duration.""" + tc = TranscriptConfig.from_runnable_config(config) + llm = await _require_llm(state, tc) + + target_words = round(tc.spec.duration.midpoint_minutes * _WORDS_PER_MINUTE) + suggested_segments = max(1, round(target_words / _WORDS_PER_SEGMENT)) + + messages = [ + SystemMessage( + content=plan_outline_prompt( + spec=tc.spec, + target_words=target_words, + suggested_segments=suggested_segments, + focus=tc.focus, + ) + ), + HumanMessage(content=_source_block(state.source_content)), + ] + outline = await invoke_json(llm, messages, Outline) + return {"outline": outline} + + +async def draft_segments( + state: TranscriptState, config: RunnableConfig +) -> dict[str, Any]: + """Draft each outline segment in order, carrying a running recap.""" + tc = TranscriptConfig.from_runnable_config(config) + llm = await _require_llm(state, tc) + outline = state.outline + if outline is None: + raise RuntimeError("draft_segments requires an outline") + + source_block = _source_block(state.source_content) + turns: list[TranscriptTurn] = [] + total = len(outline.segments) + + for index, segment in enumerate(outline.segments): + messages = [ + SystemMessage( + content=draft_segment_prompt( + spec=tc.spec, + segment=segment, + position=index + 1, + total=total, + recap=_recap(turns, tc.spec), + ) + ), + HumanMessage(content=source_block), + ] + draft = await invoke_json(llm, messages, SegmentDraft) + turns.extend(_valid_turns(draft, tc.spec)) + + return {"drafted_turns": turns} + + +def finalize(state: TranscriptState, config: RunnableConfig) -> dict[str, Any]: + """Assemble drafted turns into a validated transcript.""" + if not state.drafted_turns: + raise RuntimeError("drafting produced no usable dialogue") + return {"transcript": Transcript(turns=state.drafted_turns)} + + +async def _require_llm(state: TranscriptState, tc: TranscriptConfig): + llm = await get_agent_llm(state.db_session, tc.search_space_id) + if llm is None: + raise RuntimeError( + f"no agent LLM configured for search space {tc.search_space_id}" + ) + return llm + + +def _source_block(source_content: str) -> str: + sample = (source_content or "")[:_SOURCE_BUDGET_CHARS] + return f"{sample}" + + +def _valid_turns(draft: SegmentDraft, spec: PodcastSpec) -> list[TranscriptTurn]: + # Drop any turn the model attributed to a slot the spec doesn't define, so a + # stray attribution can't break rendering downstream. + valid_slots = {speaker.slot for speaker in spec.speakers} + return [turn for turn in draft.turns if turn.speaker in valid_slots] + + +def _recap(turns: list[TranscriptTurn], spec: PodcastSpec) -> str | None: + if not turns: + return None + names = {speaker.slot: speaker.name for speaker in spec.speakers} + rendered = "\n".join( + f"{names.get(turn.speaker, turn.speaker)}: {turn.text}" for turn in turns + ) + return rendered[-_RECAP_CHARS:] diff --git a/surfsense_backend/app/podcasts/generation/transcript/planning.py b/surfsense_backend/app/podcasts/generation/transcript/planning.py new file mode 100644 index 000000000..3f6aeac9b --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/transcript/planning.py @@ -0,0 +1,32 @@ +"""Internal shapes the transcript graph passes between its nodes. + +These are generation-time artifacts (the outline and per-segment drafts), not +persisted or API-facing. Segment drafts reuse :class:`TranscriptTurn` so the +speaker-slot contract and turn validation are identical to the final transcript. +""" + +from __future__ import annotations + +from pydantic import BaseModel, Field + +from app.podcasts.schemas import TranscriptTurn + + +class OutlineSegment(BaseModel): + """One planned beat of the conversation, drafted independently.""" + + title: str = Field(..., min_length=1) + talking_points: list[str] = Field(default_factory=list) + target_words: int = Field(..., ge=1) + + +class Outline(BaseModel): + """The full plan: ordered segments sized to the target duration.""" + + segments: list[OutlineSegment] = Field(..., min_length=1) + + +class SegmentDraft(BaseModel): + """The dialogue a single segment produced.""" + + turns: list[TranscriptTurn] = Field(default_factory=list) diff --git a/surfsense_backend/app/podcasts/generation/transcript/state.py b/surfsense_backend/app/podcasts/generation/transcript/state.py new file mode 100644 index 000000000..f11337471 --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/transcript/state.py @@ -0,0 +1,22 @@ +"""Mutable state threaded through the transcript-drafting graph.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.podcasts.schemas import Transcript, TranscriptTurn + +from .planning import Outline + + +@dataclass +class TranscriptState: + """Source content plus the intermediate and final drafting artifacts.""" + + db_session: AsyncSession + source_content: str + outline: Outline | None = None + drafted_turns: list[TranscriptTurn] = field(default_factory=list) + transcript: Transcript | None = None From 470af28688ffb2a4d2d9de355832f8080d5f748d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 09/50] feat(podcasts): add lifecycle service --- surfsense_backend/app/podcasts/service.py | 204 ++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 surfsense_backend/app/podcasts/service.py diff --git a/surfsense_backend/app/podcasts/service.py b/surfsense_backend/app/podcasts/service.py new file mode 100644 index 000000000..6df8c315f --- /dev/null +++ b/surfsense_backend/app/podcasts/service.py @@ -0,0 +1,204 @@ +"""The podcast lifecycle authority: every status change goes through here. + +The service owns the state machine. Each method names a real lifecycle step, +validates it against the allowed-transition table, and (de)serializes the brief +and transcript to/from their JSONB columns. It deliberately does not enqueue +Celery work — callers transition the row here, then schedule the next task — so +the rules stay testable and free of task-queue coupling. +""" + +from __future__ import annotations + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.podcasts.persistence import Podcast, PodcastRepository, PodcastStatus +from app.podcasts.schemas import PodcastSpec, Transcript + +_MAX_ERROR_CHARS = 2000 + +# The only status changes the machine permits. Terminal states have no exits. +_ALLOWED: dict[PodcastStatus, frozenset[PodcastStatus]] = { + PodcastStatus.PENDING: frozenset( + {PodcastStatus.AWAITING_BRIEF, PodcastStatus.FAILED, PodcastStatus.CANCELLED} + ), + PodcastStatus.AWAITING_BRIEF: frozenset( + {PodcastStatus.DRAFTING, PodcastStatus.FAILED, PodcastStatus.CANCELLED} + ), + PodcastStatus.DRAFTING: frozenset( + {PodcastStatus.AWAITING_REVIEW, PodcastStatus.FAILED, PodcastStatus.CANCELLED} + ), + PodcastStatus.AWAITING_REVIEW: frozenset( + { + PodcastStatus.RENDERING, # approve + PodcastStatus.DRAFTING, # regenerate + PodcastStatus.FAILED, + PodcastStatus.CANCELLED, + } + ), + PodcastStatus.RENDERING: frozenset( + {PodcastStatus.READY, PodcastStatus.FAILED, PodcastStatus.CANCELLED} + ), + PodcastStatus.READY: frozenset(), + PodcastStatus.FAILED: frozenset(), + PodcastStatus.CANCELLED: frozenset(), +} + + +class PodcastError(RuntimeError): + """Base class for lifecycle errors.""" + + +class InvalidTransition(PodcastError): + """A requested status change is not permitted from the current state.""" + + +class SpecConflict(PodcastError): + """A spec edit raced another: the expected version is stale.""" + + def __init__(self, expected: int, actual: int) -> None: + super().__init__( + f"spec version conflict: expected {expected}, current is {actual}" + ) + self.expected = expected + self.actual = actual + + +class PreconditionFailed(PodcastError): + """A transition's data precondition (brief/transcript present) is unmet.""" + + +class PodcastService: + """Drives one podcast through its lifecycle within a single session.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + self._repo = PodcastRepository(session) + + async def create( + self, *, title: str, search_space_id: int, thread_id: int | None = None + ) -> Podcast: + """Create a fresh podcast in ``PENDING`` awaiting its brief.""" + podcast = Podcast( + title=title, + search_space_id=search_space_id, + thread_id=thread_id, + status=PodcastStatus.PENDING, + spec_version=1, + ) + return await self._repo.add(podcast) + + async def attach_brief(self, podcast: Podcast, spec: PodcastSpec) -> Podcast: + """Record the proposed brief and open the review gate.""" + self._transition(podcast, PodcastStatus.AWAITING_BRIEF) + podcast.spec = spec.model_dump(mode="json") + await self._session.flush() + return podcast + + async def update_spec( + self, podcast: Podcast, spec: PodcastSpec, expected_version: int + ) -> Podcast: + """Edit the brief at the gate, guarded by optimistic concurrency.""" + if _status(podcast) is not PodcastStatus.AWAITING_BRIEF: + raise InvalidTransition( + f"the brief can only be edited while awaiting_brief, " + f"not {_status(podcast).value}" + ) + if expected_version != podcast.spec_version: + raise SpecConflict(expected_version, podcast.spec_version) + podcast.spec = spec.model_dump(mode="json") + podcast.spec_version += 1 + await self._session.flush() + return podcast + + async def begin_drafting(self, podcast: Podcast) -> Podcast: + """Approve the brief and start transcript drafting.""" + if podcast.spec is None: + raise PreconditionFailed("cannot draft without a brief") + self._transition(podcast, PodcastStatus.DRAFTING) + await self._session.flush() + return podcast + + async def attach_transcript( + self, podcast: Podcast, transcript: Transcript + ) -> Podcast: + """Record the drafted transcript and open the go/no-go gate.""" + self._transition(podcast, PodcastStatus.AWAITING_REVIEW) + podcast.podcast_transcript = transcript.model_dump(mode="json") + await self._session.flush() + return podcast + + async def approve(self, podcast: Podcast) -> Podcast: + """Accept the transcript and start rendering.""" + if not podcast.podcast_transcript: + raise PreconditionFailed("cannot render without a transcript") + self._transition(podcast, PodcastStatus.RENDERING) + await self._session.flush() + return podcast + + async def regenerate(self, podcast: Podcast) -> Podcast: + """Reject the transcript and draft a new one.""" + self._transition(podcast, PodcastStatus.DRAFTING) + await self._session.flush() + return podcast + + async def attach_audio( + self, + podcast: Podcast, + *, + storage_backend: str, + storage_key: str, + duration_seconds: int | None = None, + ) -> Podcast: + """Record rendered audio and mark the podcast ready.""" + self._transition(podcast, PodcastStatus.READY) + podcast.storage_backend = storage_backend + podcast.storage_key = storage_key + podcast.duration_seconds = duration_seconds + podcast.error = None + await self._session.flush() + return podcast + + async def fail(self, podcast: Podcast, error: str) -> Podcast: + """Move a non-terminal podcast to ``FAILED`` with a reason.""" + self._transition(podcast, PodcastStatus.FAILED) + podcast.error = (error or "")[:_MAX_ERROR_CHARS] or None + await self._session.flush() + return podcast + + async def cancel(self, podcast: Podcast) -> Podcast: + """Cancel a non-terminal podcast at the user's request.""" + self._transition(podcast, PodcastStatus.CANCELLED) + await self._session.flush() + return podcast + + def _transition(self, podcast: Podcast, target: PodcastStatus) -> None: + current = _status(podcast) + if target not in _ALLOWED[current]: + raise InvalidTransition( + f"{current.value} -> {target.value} is not allowed" + ) + podcast.status = target + + +def _status(podcast: Podcast) -> PodcastStatus: + return PodcastStatus(podcast.status) + + +def read_spec(podcast: Podcast) -> PodcastSpec | None: + """Deserialize the stored brief, or ``None`` if not yet proposed.""" + return PodcastSpec.model_validate(podcast.spec) if podcast.spec else None + + +def read_transcript(podcast: Podcast) -> Transcript | None: + """Deserialize the stored transcript, or ``None`` if not yet drafted.""" + if not podcast.podcast_transcript: + return None + return Transcript.model_validate(podcast.podcast_transcript) + + +def preferences_from(podcast: Podcast | None) -> tuple[str | None, list[str]]: + """Extract reusable (language, voice_ids) defaults from a prior podcast.""" + spec = read_spec(podcast) if podcast is not None else None + if spec is None: + return None, [] + return spec.language, [speaker.voice_id for speaker in spec.speakers] From 4271048dcf496052261432bd3dd06b04ab40157f Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 10/50] feat(podcasts): add audio storage --- surfsense_backend/app/podcasts/storage.py | 48 +++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 surfsense_backend/app/podcasts/storage.py diff --git a/surfsense_backend/app/podcasts/storage.py b/surfsense_backend/app/podcasts/storage.py new file mode 100644 index 000000000..30179adae --- /dev/null +++ b/surfsense_backend/app/podcasts/storage.py @@ -0,0 +1,48 @@ +"""Durable storage for rendered podcast audio. + +Wraps the shared :class:`StorageBackend` so the rest of the module never deals +with object keys directly. Audio is stored under a per-podcast key, streamed for +download, and purged when a podcast is deleted. +""" + +from __future__ import annotations + +import uuid +from collections.abc import AsyncIterator + +from app.file_storage.factory import get_storage_backend +from app.podcasts.persistence import Podcast + +_AUDIO_CONTENT_TYPE = "audio/mpeg" + + +def build_audio_key(*, search_space_id: int, podcast_id: int) -> str: + """Object key for a podcast's audio. + + Shape: ``podcasts/{search_space_id}/{podcast_id}/{uuid}.mp3``. The uuid lets + a re-render write a fresh object before the old one is purged. + """ + return f"podcasts/{search_space_id}/{podcast_id}/{uuid.uuid4().hex}.mp3" + + +async def store_audio( + *, search_space_id: int, podcast_id: int, data: bytes +) -> tuple[str, str]: + """Persist audio bytes and return ``(backend_name, storage_key)``.""" + backend = get_storage_backend() + key = build_audio_key(search_space_id=search_space_id, podcast_id=podcast_id) + await backend.put(key, data, content_type=_AUDIO_CONTENT_TYPE) + return backend.backend_name, key + + +def open_audio_stream(podcast: Podcast) -> AsyncIterator[bytes]: + """Stream a ready podcast's audio bytes. Raises if it has none.""" + if not podcast.storage_key: + raise FileNotFoundError(f"podcast {podcast.id} has no stored audio") + return get_storage_backend().open_stream(podcast.storage_key) + + +async def purge_audio(podcast: Podcast) -> None: + """Delete a podcast's stored audio if present; a missing object is fine.""" + if podcast.storage_key: + await get_storage_backend().delete(podcast.storage_key) From b2970ba37eecbfc496ca073e148e745cd34e29e6 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 11/50] feat(podcasts): add celery tasks --- .../app/podcasts/tasks/__init__.py | 18 ++++ surfsense_backend/app/podcasts/tasks/brief.py | 60 ++++++++++++ surfsense_backend/app/podcasts/tasks/draft.py | 94 +++++++++++++++++++ .../app/podcasts/tasks/render.py | 70 ++++++++++++++ .../app/podcasts/tasks/runtime.py | 40 ++++++++ 5 files changed, 282 insertions(+) create mode 100644 surfsense_backend/app/podcasts/tasks/__init__.py create mode 100644 surfsense_backend/app/podcasts/tasks/brief.py create mode 100644 surfsense_backend/app/podcasts/tasks/draft.py create mode 100644 surfsense_backend/app/podcasts/tasks/render.py create mode 100644 surfsense_backend/app/podcasts/tasks/runtime.py diff --git a/surfsense_backend/app/podcasts/tasks/__init__.py b/surfsense_backend/app/podcasts/tasks/__init__.py new file mode 100644 index 000000000..32f7c3a72 --- /dev/null +++ b/surfsense_backend/app/podcasts/tasks/__init__.py @@ -0,0 +1,18 @@ +"""Celery tasks driving the podcast lifecycle across its user gates. + +One task per async phase: propose the brief, draft the transcript, render the +audio. Each is enqueued by the API after it performs the guarded status +transition, and each pushes its result onto the row for the frontend to observe. +""" + +from __future__ import annotations + +from .brief import propose_brief_task +from .draft import draft_transcript_task +from .render import render_audio_task + +__all__ = [ + "draft_transcript_task", + "propose_brief_task", + "render_audio_task", +] diff --git a/surfsense_backend/app/podcasts/tasks/brief.py b/surfsense_backend/app/podcasts/tasks/brief.py new file mode 100644 index 000000000..2f1e3f240 --- /dev/null +++ b/surfsense_backend/app/podcasts/tasks/brief.py @@ -0,0 +1,60 @@ +"""Brief-proposal task: PENDING -> AWAITING_BRIEF. + +Runs the (cheap, token-light) brief graph to detect language and propose a spec, +seeded with the user's last-used language/voice preferences. Pushes the result +straight onto the row so the frontend sees the brief gate open via Zero. +""" + +from __future__ import annotations + +import logging + +from app.celery_app import celery_app +from app.podcasts.generation.brief.graph import graph as brief_graph +from app.podcasts.generation.brief.state import BriefState +from app.podcasts.persistence import PodcastRepository +from app.podcasts.service import PodcastService, preferences_from +from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task + +from .runtime import mark_failed + +logger = logging.getLogger(__name__) + + +@celery_app.task(name="podcast.propose_brief", bind=True) +def propose_brief_task(self, podcast_id: int, search_space_id: int) -> dict: + try: + return run_async_celery_task( + lambda: _propose_brief(podcast_id, search_space_id) + ) + except Exception as exc: # noqa: BLE001 - record and report, never crash worker + logger.error("Podcast %s brief proposal failed: %s", podcast_id, exc) + run_async_celery_task(lambda: mark_failed(podcast_id, str(exc))) + return {"status": "failed", "podcast_id": podcast_id} + + +async def _propose_brief(podcast_id: int, search_space_id: int) -> dict: + async with get_celery_session_maker()() as session: + repo = PodcastRepository(session) + podcast = await repo.get(podcast_id) + if podcast is None: + raise ValueError(f"podcast {podcast_id} not found") + + last_language, last_voices = preferences_from( + await repo.latest_with_spec(search_space_id) + ) + state = BriefState( + db_session=session, source_content=podcast.source_content or "" + ) + config = { + "configurable": { + "search_space_id": search_space_id, + "last_used_language": last_language, + "last_used_voices": last_voices, + } + } + result = await brief_graph.ainvoke(state, config=config) + + await PodcastService(session).attach_brief(podcast, result["spec"]) + await session.commit() + return {"status": "awaiting_brief", "podcast_id": podcast_id} diff --git a/surfsense_backend/app/podcasts/tasks/draft.py b/surfsense_backend/app/podcasts/tasks/draft.py new file mode 100644 index 000000000..8d461bf9b --- /dev/null +++ b/surfsense_backend/app/podcasts/tasks/draft.py @@ -0,0 +1,94 @@ +"""Transcript-drafting task: DRAFTING -> AWAITING_REVIEW. + +The expensive, LLM-heavy step, so it runs under ``billable_call`` exactly like +the legacy generator. The API has already moved the row to DRAFTING and stored +the approved brief; this task drafts the long-form transcript and opens the +go/no-go gate. +""" + +from __future__ import annotations + +import logging + +from app.celery_app import celery_app +from app.config import config as app_config +from app.podcasts.generation.transcript.graph import graph as transcript_graph +from app.podcasts.generation.transcript.state import TranscriptState +from app.podcasts.persistence import PodcastRepository +from app.podcasts.service import PodcastService, read_spec +from app.services.billable_calls import ( + BillingSettlementError, + QuotaInsufficientError, + _resolve_agent_billing_for_search_space, + billable_call, +) +from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task + +from .runtime import billable_session, mark_failed + +logger = logging.getLogger(__name__) + + +@celery_app.task(name="podcast.draft_transcript", bind=True) +def draft_transcript_task(self, podcast_id: int, search_space_id: int) -> dict: + try: + return run_async_celery_task( + lambda: _draft_transcript(podcast_id, search_space_id) + ) + except Exception as exc: # noqa: BLE001 - record and report, never crash worker + logger.error("Podcast %s drafting failed: %s", podcast_id, exc) + run_async_celery_task(lambda: mark_failed(podcast_id, str(exc))) + return {"status": "failed", "podcast_id": podcast_id} + + +async def _draft_transcript(podcast_id: int, search_space_id: int) -> dict: + async with get_celery_session_maker()() as session: + repo = PodcastRepository(session) + service = PodcastService(session) + podcast = await repo.get(podcast_id) + if podcast is None: + raise ValueError(f"podcast {podcast_id} not found") + + spec = read_spec(podcast) + if spec is None: + raise ValueError(f"podcast {podcast_id} has no approved brief") + + owner_id, tier, base_model = await _resolve_agent_billing_for_search_space( + session, search_space_id, thread_id=podcast.thread_id + ) + + state = TranscriptState( + db_session=session, source_content=podcast.source_content or "" + ) + config = { + "configurable": { + "search_space_id": search_space_id, + "spec": spec, + "focus": spec.focus, + } + } + + try: + async with billable_call( + user_id=owner_id, + search_space_id=search_space_id, + billing_tier=tier, + base_model=base_model, + quota_reserve_micros_override=app_config.QUOTA_DEFAULT_PODCAST_RESERVE_MICROS, + usage_type="podcast_generation", + call_details={"podcast_id": podcast_id, "title": podcast.title}, + billable_session_factory=billable_session, + ): + result = await transcript_graph.ainvoke(state, config=config) + except QuotaInsufficientError: + await service.fail(podcast, "premium quota exhausted") + await session.commit() + return {"status": "failed", "podcast_id": podcast_id, "reason": "quota"} + except BillingSettlementError: + await service.fail(podcast, "billing settlement failed") + await session.commit() + return {"status": "failed", "podcast_id": podcast_id, "reason": "billing"} + + await service.attach_transcript(podcast, result["transcript"]) + await session.commit() + return {"status": "awaiting_review", "podcast_id": podcast_id} diff --git a/surfsense_backend/app/podcasts/tasks/render.py b/surfsense_backend/app/podcasts/tasks/render.py new file mode 100644 index 000000000..04fb9ab9d --- /dev/null +++ b/surfsense_backend/app/podcasts/tasks/render.py @@ -0,0 +1,70 @@ +"""Audio-rendering task: RENDERING -> READY. + +Synthesises and merges the approved transcript, stores the MP3 in the object +store, and marks the podcast ready. The working directory is stable per podcast +so a re-render (e.g. after a voice change) reuses the segment cache. +""" + +from __future__ import annotations + +import logging +import tempfile +from pathlib import Path + +from app.celery_app import celery_app +from app.podcasts.persistence import PodcastRepository +from app.podcasts.rendering import PodcastRenderer +from app.podcasts.service import PodcastService, read_spec, read_transcript +from app.podcasts.storage import store_audio +from app.podcasts.tts import get_text_to_speech +from app.podcasts.voices import get_voice_catalog +from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task + +from .runtime import mark_failed + +logger = logging.getLogger(__name__) + +_WORKDIR_BASE = Path(tempfile.gettempdir()) / "surfsense_podcasts" + + +@celery_app.task(name="podcast.render_audio", bind=True) +def render_audio_task(self, podcast_id: int) -> dict: + try: + return run_async_celery_task(lambda: _render_audio(podcast_id)) + except Exception as exc: # noqa: BLE001 - record and report, never crash worker + logger.error("Podcast %s render failed: %s", podcast_id, exc) + run_async_celery_task(lambda: mark_failed(podcast_id, str(exc))) + return {"status": "failed", "podcast_id": podcast_id} + + +async def _render_audio(podcast_id: int) -> dict: + async with get_celery_session_maker()() as session: + repo = PodcastRepository(session) + podcast = await repo.get(podcast_id) + if podcast is None: + raise ValueError(f"podcast {podcast_id} not found") + + spec = read_spec(podcast) + transcript = read_transcript(podcast) + if spec is None or transcript is None: + raise ValueError(f"podcast {podcast_id} is missing brief or transcript") + + renderer = PodcastRenderer( + tts=get_text_to_speech(), catalog=get_voice_catalog() + ) + workdir = _WORKDIR_BASE / str(podcast_id) + workdir.mkdir(parents=True, exist_ok=True) + rendered = await renderer.render( + spec=spec, transcript=transcript, workdir=workdir + ) + + backend_name, key = await store_audio( + search_space_id=podcast.search_space_id, + podcast_id=podcast_id, + data=rendered.data, + ) + await PodcastService(session).attach_audio( + podcast, storage_backend=backend_name, storage_key=key + ) + await session.commit() + return {"status": "ready", "podcast_id": podcast_id} diff --git a/surfsense_backend/app/podcasts/tasks/runtime.py b/surfsense_backend/app/podcasts/tasks/runtime.py new file mode 100644 index 000000000..349aeffb2 --- /dev/null +++ b/surfsense_backend/app/podcasts/tasks/runtime.py @@ -0,0 +1,40 @@ +"""Shared plumbing for the podcast Celery tasks. + +Each task runs its async body via :func:`run_async_celery_task` and, on any +failure, records the reason on the row through the lifecycle service. Marking +failed is best-effort: a podcast that already reached a terminal state is left +untouched rather than forced. +""" + +from __future__ import annotations + +import logging +from contextlib import asynccontextmanager + +from app.podcasts.persistence import PodcastRepository +from app.podcasts.service import PodcastError, PodcastService +from app.tasks.celery_tasks import get_celery_session_maker + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def billable_session(): + """Session factory for ``billable_call`` inside the worker loop.""" + async with get_celery_session_maker()() as session: + yield session + + +async def mark_failed(podcast_id: int, error: str) -> None: + """Best-effort: move a non-terminal podcast to FAILED with ``error``.""" + async with get_celery_session_maker()() as session: + repo = PodcastRepository(session) + podcast = await repo.get(podcast_id) + if podcast is None: + return + try: + await PodcastService(session).fail(podcast, error) + await session.commit() + except PodcastError: + # Already terminal (e.g. cancelled): nothing to record. + logger.info("Podcast %s already terminal; not marking failed", podcast_id) From 7087f7866d2d0d1425be4cd442145eefeaf67754 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH 12/50] feat(podcasts): add http api --- .../app/podcasts/api/__init__.py | 12 + surfsense_backend/app/podcasts/api/routes.py | 310 ++++++++++++++++++ surfsense_backend/app/podcasts/api/schemas.py | 97 ++++++ 3 files changed, 419 insertions(+) create mode 100644 surfsense_backend/app/podcasts/api/__init__.py create mode 100644 surfsense_backend/app/podcasts/api/routes.py create mode 100644 surfsense_backend/app/podcasts/api/schemas.py diff --git a/surfsense_backend/app/podcasts/api/__init__.py b/surfsense_backend/app/podcasts/api/__init__.py new file mode 100644 index 000000000..f943faeeb --- /dev/null +++ b/surfsense_backend/app/podcasts/api/__init__.py @@ -0,0 +1,12 @@ +"""HTTP API for the podcast lifecycle. + +The router is mounted at cutover (replacing the legacy podcast routes); it is +kept separate here so it can be wired in one step without colliding with the old +routes during parallel development. +""" + +from __future__ import annotations + +from .routes import router + +__all__ = ["router"] diff --git a/surfsense_backend/app/podcasts/api/routes.py b/surfsense_backend/app/podcasts/api/routes.py new file mode 100644 index 000000000..0d5de2463 --- /dev/null +++ b/surfsense_backend/app/podcasts/api/routes.py @@ -0,0 +1,310 @@ +"""HTTP surface for the podcast lifecycle. + +Status is observed by the frontend through Zero, so these routes are about +actions (create, edit the brief, approve/regenerate, cancel) and audio delivery. +Each mutating route performs the guarded transition via the service, commits, +then enqueues the matching Celery task; lifecycle errors map to 409/422. +""" + +from __future__ import annotations + +import os +from pathlib import Path + +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import StreamingResponse +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import ( + Permission, + SearchSpace, + SearchSpaceMembership, + User, + get_async_session, +) +from app.podcasts.persistence import Podcast, PodcastRepository +from app.podcasts.service import ( + InvalidTransition, + PodcastService, + PreconditionFailed, + SpecConflict, +) +from app.podcasts.storage import open_audio_stream, purge_audio +from app.podcasts.tasks import ( + draft_transcript_task, + propose_brief_task, + render_audio_task, +) +from app.podcasts.voices import get_voice_catalog, provider_from_service +from app.config import config as app_config +from app.users import current_active_user +from app.utils.rbac import check_permission + +from .schemas import ( + CreatePodcastRequest, + PodcastDetail, + PodcastSummary, + UpdateSpecRequest, + VoiceOption, +) + +router = APIRouter() + + +@router.get("/podcasts", response_model=list[PodcastSummary]) +async def list_podcasts( + search_space_id: int | None = None, + skip: int = 0, + limit: int = 100, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + if skip < 0 or limit < 1: + raise HTTPException(status_code=400, detail="Invalid pagination parameters") + + if search_space_id is not None: + await _require(session, user, search_space_id, Permission.PODCASTS_READ) + query = ( + select(Podcast) + .where(Podcast.search_space_id == search_space_id) + .order_by(Podcast.created_at.desc()) + .offset(skip) + .limit(limit) + ) + else: + query = ( + select(Podcast) + .join(SearchSpace) + .join(SearchSpaceMembership) + .where(SearchSpaceMembership.user_id == user.id) + .order_by(Podcast.created_at.desc()) + .offset(skip) + .limit(limit) + ) + result = await session.execute(query) + return list(result.scalars().all()) + + +@router.get("/podcasts/voices", response_model=list[VoiceOption]) +async def list_voices(language: str | None = None): + """Voices the active TTS provider offers, optionally filtered by language.""" + if not app_config.TTS_SERVICE: + raise HTTPException(status_code=503, detail="No TTS provider configured") + + provider = provider_from_service(app_config.TTS_SERVICE) + catalog = get_voice_catalog() + voices = ( + catalog.for_language(provider, language) + if language + else catalog.for_provider(provider) + ) + return [ + VoiceOption( + voice_id=v.voice_id, + display_name=v.display_name, + language=v.language, + gender=v.gender.value, + ) + for v in voices + ] + + +@router.post("/podcasts", response_model=PodcastDetail, status_code=201) +async def create_podcast( + body: CreatePodcastRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + await _require(session, user, body.search_space_id, Permission.PODCASTS_CREATE) + + podcast = await PodcastService(session).create( + title=body.title, + search_space_id=body.search_space_id, + thread_id=body.thread_id, + ) + podcast.source_content = body.source_content + await session.commit() + + propose_brief_task.delay(podcast.id, body.search_space_id) + return PodcastDetail.of(podcast) + + +@router.get("/podcasts/{podcast_id}", response_model=PodcastDetail) +async def get_podcast( + podcast_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_READ) + return PodcastDetail.of(podcast) + + +@router.patch("/podcasts/{podcast_id}/spec", response_model=PodcastDetail) +async def update_spec( + podcast_id: int, + body: UpdateSpecRequest, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) + async with _lifecycle_errors(): + await PodcastService(session).update_spec( + podcast, body.spec, body.expected_version + ) + await session.commit() + return PodcastDetail.of(podcast) + + +@router.post("/podcasts/{podcast_id}/brief/approve", response_model=PodcastDetail) +async def approve_brief( + podcast_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Approve the brief and start drafting the transcript.""" + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) + async with _lifecycle_errors(): + await PodcastService(session).begin_drafting(podcast) + await session.commit() + draft_transcript_task.delay(podcast.id, podcast.search_space_id) + return PodcastDetail.of(podcast) + + +@router.post("/podcasts/{podcast_id}/transcript/approve", response_model=PodcastDetail) +async def approve_transcript( + podcast_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Approve the transcript and start rendering audio.""" + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) + async with _lifecycle_errors(): + await PodcastService(session).approve(podcast) + await session.commit() + render_audio_task.delay(podcast.id) + return PodcastDetail.of(podcast) + + +@router.post( + "/podcasts/{podcast_id}/transcript/regenerate", response_model=PodcastDetail +) +async def regenerate_transcript( + podcast_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + """Reject the transcript and draft a fresh one.""" + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) + async with _lifecycle_errors(): + await PodcastService(session).regenerate(podcast) + await session.commit() + draft_transcript_task.delay(podcast.id, podcast.search_space_id) + return PodcastDetail.of(podcast) + + +@router.post("/podcasts/{podcast_id}/cancel", response_model=PodcastDetail) +async def cancel_podcast( + podcast_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) + async with _lifecycle_errors(): + await PodcastService(session).cancel(podcast) + await session.commit() + return PodcastDetail.of(podcast) + + +@router.delete("/podcasts/{podcast_id}", response_model=dict) +async def delete_podcast( + podcast_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_DELETE) + await purge_audio(podcast) + await session.delete(podcast) + await session.commit() + return {"message": "Podcast deleted successfully"} + + +@router.get("/podcasts/{podcast_id}/stream") +async def stream_podcast( + podcast_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + podcast = await _load(session, user, podcast_id, Permission.PODCASTS_READ) + + if podcast.storage_key: + return StreamingResponse( + open_audio_stream(podcast), + media_type="audio/mpeg", + headers={"Accept-Ranges": "bytes"}, + ) + + # Back-compat: rows rendered before the storage migration kept a local path. + if podcast.file_location and os.path.isfile(podcast.file_location): + path = podcast.file_location + + def iterfile(): + with open(path, mode="rb") as handle: + yield from handle + + return StreamingResponse( + iterfile(), + media_type="audio/mpeg", + headers={ + "Accept-Ranges": "bytes", + "Content-Disposition": f"inline; filename={Path(path).name}", + }, + ) + + raise HTTPException(status_code=404, detail="Podcast audio not found") + + +async def _require( + session: AsyncSession, + user: User, + search_space_id: int, + permission: Permission, +) -> None: + await check_permission( + session, + user, + search_space_id, + permission.value, + "You don't have permission for podcasts in this search space", + ) + + +async def _load( + session: AsyncSession, + user: User, + podcast_id: int, + permission: Permission, +) -> Podcast: + podcast = await PodcastRepository(session).get(podcast_id) + if podcast is None: + raise HTTPException(status_code=404, detail="Podcast not found") + await _require(session, user, podcast.search_space_id, permission) + return podcast + + +class _lifecycle_errors: + """Map service lifecycle errors onto HTTP responses.""" + + async def __aenter__(self) -> None: + return None + + async def __aexit__(self, exc_type, exc, tb) -> bool: + if exc is None: + return False + if isinstance(exc, SpecConflict): + raise HTTPException(status_code=409, detail=str(exc)) from exc + if isinstance(exc, InvalidTransition): + raise HTTPException(status_code=409, detail=str(exc)) from exc + if isinstance(exc, PreconditionFailed): + raise HTTPException(status_code=422, detail=str(exc)) from exc + return False diff --git a/surfsense_backend/app/podcasts/api/schemas.py b/surfsense_backend/app/podcasts/api/schemas.py new file mode 100644 index 000000000..d5aba0e14 --- /dev/null +++ b/surfsense_backend/app/podcasts/api/schemas.py @@ -0,0 +1,97 @@ +"""Request and response shapes for the podcast API. + +Read models surface the lifecycle state the frontend can't derive from Zero (the +deserialized brief and transcript); the action requests carry just what each +guarded transition needs. +""" + +from __future__ import annotations + +from datetime import datetime + +from pydantic import BaseModel, ConfigDict, Field + +from app.podcasts.persistence import Podcast, PodcastStatus +from app.podcasts.schemas import PodcastSpec, Transcript +from app.podcasts.service import read_spec, read_transcript + +# Defaults applied when a create request omits brief sizing; the brief gate lets +# the user adjust before any cost is incurred. +DEFAULT_SPEAKER_COUNT = 2 +DEFAULT_MIN_MINUTES = 10 +DEFAULT_MAX_MINUTES = 20 + + +class CreatePodcastRequest(BaseModel): + """Create a podcast and kick off brief proposal.""" + + title: str = Field(..., min_length=1, max_length=500) + search_space_id: int + source_content: str = Field(..., min_length=1) + thread_id: int | None = None + speaker_count: int = Field(default=DEFAULT_SPEAKER_COUNT, ge=1, le=6) + min_minutes: int = Field(default=DEFAULT_MIN_MINUTES, ge=1) + max_minutes: int = Field(default=DEFAULT_MAX_MINUTES, ge=1) + focus: str | None = Field(default=None, max_length=2000) + + +class UpdateSpecRequest(BaseModel): + """Replace the brief at the gate, guarded by the expected version.""" + + spec: PodcastSpec + expected_version: int = Field(..., ge=1) + + +class VoiceOption(BaseModel): + """One selectable voice surfaced to the brief editor.""" + + voice_id: str + display_name: str + language: str + gender: str + + +class PodcastSummary(BaseModel): + """Lightweight list item.""" + + model_config = ConfigDict(from_attributes=True) + + id: int + title: str + status: PodcastStatus + created_at: datetime + search_space_id: int + + +class PodcastDetail(BaseModel): + """Full podcast state for the detail view and action responses.""" + + id: int + title: str + status: PodcastStatus + spec_version: int + spec: PodcastSpec | None + transcript: Transcript | None + has_audio: bool + duration_seconds: int | None + error: str | None + created_at: datetime + search_space_id: int + thread_id: int | None + + @classmethod + def of(cls, podcast: Podcast) -> PodcastDetail: + return cls( + id=podcast.id, + title=podcast.title, + status=PodcastStatus(podcast.status), + spec_version=podcast.spec_version, + spec=read_spec(podcast), + transcript=read_transcript(podcast), + has_audio=bool(podcast.storage_key), + duration_seconds=podcast.duration_seconds, + error=podcast.error, + created_at=podcast.created_at, + search_space_id=podcast.search_space_id, + thread_id=podcast.thread_id, + ) From 89ceae8bab3f5e4573c2f518e63b661dea870536 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:12 +0200 Subject: [PATCH 13/50] refactor(podcasts): re-export podcast model from module --- surfsense_backend/app/db.py | 46 ++++--------------------------------- 1 file changed, 4 insertions(+), 42 deletions(-) diff --git a/surfsense_backend/app/db.py b/surfsense_backend/app/db.py index 6117caecb..a2cb3b61f 100644 --- a/surfsense_backend/app/db.py +++ b/surfsense_backend/app/db.py @@ -114,13 +114,6 @@ class SearchSourceConnectorType(StrEnum): COMPOSIO_GOOGLE_CALENDAR_CONNECTOR = "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR" -class PodcastStatus(StrEnum): - PENDING = "pending" - GENERATING = "generating" - READY = "ready" - FAILED = "failed" - - class VideoPresentationStatus(StrEnum): PENDING = "pending" GENERATING = "generating" @@ -1536,41 +1529,6 @@ class Chunk(BaseModel, TimestampMixin): document = relationship("Document", back_populates="chunks") -class Podcast(BaseModel, TimestampMixin): - """Podcast model for storing generated podcasts.""" - - __tablename__ = "podcasts" - - title = Column(String(500), nullable=False) - podcast_transcript = Column(JSONB, nullable=True) - file_location = Column(Text, nullable=True) - status = Column( - SQLAlchemyEnum( - PodcastStatus, - name="podcast_status", - create_type=False, - values_callable=lambda x: [e.value for e in x], - ), - nullable=False, - default=PodcastStatus.READY, - server_default="ready", - index=True, - ) - - search_space_id = Column( - Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False - ) - search_space = relationship("SearchSpace", back_populates="podcasts") - - thread_id = Column( - Integer, - ForeignKey("new_chat_threads.id", ondelete="SET NULL"), - nullable=True, - index=True, - ) - thread = relationship("NewChatThread") - - class VideoPresentation(BaseModel, TimestampMixin): """Video presentation model for storing AI-generated video presentations. @@ -2889,6 +2847,10 @@ from app.automations.persistence import ( # noqa: E402, F401 ) from app.file_storage.persistence import DocumentFile # noqa: E402, F401 from app.notifications.persistence import Notification # noqa: E402, F401 +from app.podcasts.persistence import ( # noqa: E402, F401 + Podcast, + PodcastStatus, +) engine = create_async_engine( DATABASE_URL, From 5d956e8d0322ae1b97cd0cedea1aa10a14dcc1b9 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:12 +0200 Subject: [PATCH 14/50] feat(podcasts): register podcast tasks --- surfsense_backend/app/celery_app.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index 0e852b801..9622647f3 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -182,6 +182,9 @@ celery_app = Celery( include=[ "app.tasks.celery_tasks.document_tasks", "app.tasks.celery_tasks.podcast_tasks", + "app.podcasts.tasks.brief", + "app.podcasts.tasks.draft", + "app.podcasts.tasks.render", "app.tasks.celery_tasks.video_presentation_tasks", "app.tasks.celery_tasks.connector_tasks", "app.tasks.celery_tasks.obsidian_tasks", From 1ebb57e1dff1edd6dd1fb95d3e85dfdcede5eb4d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:12 +0200 Subject: [PATCH 15/50] feat(podcasts): publish podcasts to zero --- surfsense_backend/app/zero_publication.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/surfsense_backend/app/zero_publication.py b/surfsense_backend/app/zero_publication.py index d2755d0a1..a97a4d27c 100644 --- a/surfsense_backend/app/zero_publication.py +++ b/surfsense_backend/app/zero_publication.py @@ -55,6 +55,22 @@ AUTOMATION_RUN_COLS = [ "created_at", ] +# Enough to drive the lifecycle UI by push: status, the reviewable brief, and +# its version. The bulky source_content and transcript are deliberately excluded +# and fetched over REST when a gate opens. +PODCAST_COLS = [ + "id", + "title", + "status", + "spec", + "spec_version", + "duration_seconds", + "error", + "search_space_id", + "thread_id", + "created_at", +] + ZERO_PUBLICATION: Mapping[str, Sequence[str] | None] = { "notifications": None, "documents": DOCUMENT_COLS, @@ -65,6 +81,7 @@ ZERO_PUBLICATION: Mapping[str, Sequence[str] | None] = { "chat_session_state": None, "user": USER_COLS, "automation_runs": AUTOMATION_RUN_COLS, + "podcasts": PODCAST_COLS, } @@ -92,7 +109,9 @@ def _expected_columns(conn: Connection, table: str) -> list[str] | None: return None expected = list(columns) - if table in {"documents", "user"} and _column_exists(conn, table, "_0_version"): + if table in {"documents", "user", "podcasts"} and _column_exists( + conn, table, "_0_version" + ): expected.append("_0_version") return expected From 63f5f1283489830458bb93c7ee8c9c74fbb98949 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:12 +0200 Subject: [PATCH 16/50] feat(podcasts): add lifecycle migration --- .../versions/156_evolve_podcasts_lifecycle.py | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 surfsense_backend/alembic/versions/156_evolve_podcasts_lifecycle.py diff --git a/surfsense_backend/alembic/versions/156_evolve_podcasts_lifecycle.py b/surfsense_backend/alembic/versions/156_evolve_podcasts_lifecycle.py new file mode 100644 index 000000000..f10bacb56 --- /dev/null +++ b/surfsense_backend/alembic/versions/156_evolve_podcasts_lifecycle.py @@ -0,0 +1,92 @@ +"""evolve podcasts: expand status lifecycle and add brief/transcript/storage columns + +Revision ID: 156 +Revises: 155 +""" + +from collections.abc import Sequence + +from alembic import op + +revision: str = "156" +down_revision: str | None = "155" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + # Retype the status enum by swapping in a fresh type and casting existing + # rows. The legacy transient value 'generating' maps onto 'rendering'. + op.execute("ALTER TYPE podcast_status RENAME TO podcast_status_old;") + op.execute( + """ + CREATE TYPE podcast_status AS ENUM ( + 'pending', 'awaiting_brief', 'drafting', 'awaiting_review', + 'rendering', 'ready', 'failed', 'cancelled' + ); + """ + ) + op.execute("ALTER TABLE podcasts ALTER COLUMN status DROP DEFAULT;") + op.execute( + """ + ALTER TABLE podcasts + ALTER COLUMN status TYPE podcast_status + USING ( + CASE status::text + WHEN 'generating' THEN 'rendering' + ELSE status::text + END + )::podcast_status; + """ + ) + op.execute("ALTER TABLE podcasts ALTER COLUMN status SET DEFAULT 'pending';") + op.execute("DROP TYPE podcast_status_old;") + + op.execute("ALTER TABLE podcasts ADD COLUMN IF NOT EXISTS source_content TEXT;") + op.execute("ALTER TABLE podcasts ADD COLUMN IF NOT EXISTS spec JSONB;") + op.execute( + "ALTER TABLE podcasts ADD COLUMN IF NOT EXISTS spec_version " + "INTEGER NOT NULL DEFAULT 1;" + ) + op.execute( + "ALTER TABLE podcasts ADD COLUMN IF NOT EXISTS storage_backend VARCHAR(32);" + ) + op.execute("ALTER TABLE podcasts ADD COLUMN IF NOT EXISTS storage_key TEXT;") + op.execute("ALTER TABLE podcasts ADD COLUMN IF NOT EXISTS duration_seconds INTEGER;") + op.execute("ALTER TABLE podcasts ADD COLUMN IF NOT EXISTS error TEXT;") + + +def downgrade() -> None: + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS error;") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS duration_seconds;") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS storage_key;") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS storage_backend;") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS spec_version;") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS spec;") + op.execute("ALTER TABLE podcasts DROP COLUMN IF EXISTS source_content;") + + # Collapse the expanded lifecycle back onto the original four values. + op.execute("ALTER TYPE podcast_status RENAME TO podcast_status_new;") + op.execute( + "CREATE TYPE podcast_status AS ENUM " + "('pending', 'generating', 'ready', 'failed');" + ) + op.execute("ALTER TABLE podcasts ALTER COLUMN status DROP DEFAULT;") + op.execute( + """ + ALTER TABLE podcasts + ALTER COLUMN status TYPE podcast_status + USING ( + CASE status::text + WHEN 'awaiting_brief' THEN 'pending' + WHEN 'drafting' THEN 'generating' + WHEN 'awaiting_review' THEN 'generating' + WHEN 'rendering' THEN 'generating' + WHEN 'cancelled' THEN 'failed' + ELSE status::text + END + )::podcast_status; + """ + ) + op.execute("ALTER TABLE podcasts ALTER COLUMN status SET DEFAULT 'ready';") + op.execute("DROP TYPE podcast_status_new;") From 467bcd4f7b54207f685b65cf10f584db6b86b65c Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:12 +0200 Subject: [PATCH 17/50] feat(podcasts): add zero publication migration --- .../versions/157_publish_podcasts_to_zero.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 surfsense_backend/alembic/versions/157_publish_podcasts_to_zero.py diff --git a/surfsense_backend/alembic/versions/157_publish_podcasts_to_zero.py b/surfsense_backend/alembic/versions/157_publish_podcasts_to_zero.py new file mode 100644 index 000000000..d495796fb --- /dev/null +++ b/surfsense_backend/alembic/versions/157_publish_podcasts_to_zero.py @@ -0,0 +1,27 @@ +"""publish podcasts to zero_publication + +Reconciles ``zero_publication`` after migration 156 added the lifecycle columns, +so the frontend observes podcast status and the reviewable brief by push. + +Revision ID: 157 +Revises: 156 +""" + +from collections.abc import Sequence + +from alembic import op + +from app.zero_publication import apply_publication + +revision: str = "157" +down_revision: str | None = "156" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + apply_publication(op.get_bind()) + + +def downgrade() -> None: + """No-op. Historical publication shapes are immutable.""" From eaaeebc1bb12249fefc757cfbfe957bff0ad674f Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:12 +0200 Subject: [PATCH 18/50] fix(podcasts): anchor podcasts ignore to blob dir --- surfsense_backend/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfsense_backend/.gitignore b/surfsense_backend/.gitignore index efc6c90d7..bd233e459 100644 --- a/surfsense_backend/.gitignore +++ b/surfsense_backend/.gitignore @@ -6,7 +6,7 @@ data/ __pycache__/ .flashrank_cache surf_new_backend.egg-info/ -podcasts/ +/podcasts/ video_presentation_audio/ sandbox_files/ temp_audio/ From f61e8af8c0bc3a7d8541140e3a3b9a5a7b05acf8 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 19/50] test(podcasts): add shared test fixtures --- .../tests/unit/podcasts/conftest.py | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/conftest.py diff --git a/surfsense_backend/tests/unit/podcasts/conftest.py b/surfsense_backend/tests/unit/podcasts/conftest.py new file mode 100644 index 000000000..446982904 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/conftest.py @@ -0,0 +1,142 @@ +"""Shared builders for podcast unit tests. + +These tests exercise the podcast domain through its public interfaces. The only +test double is a minimal stand-in for the SQLAlchemy ``AsyncSession`` — a real +system boundary — so the service's own repository and state machine run for +real. Briefs and transcripts are built with valid factories so each test states +just the fields it cares about. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.schemas import ( + DurationTarget, + PodcastSpec, + PodcastStyle, + SpeakerRole, + SpeakerSpec, + Transcript, + TranscriptTurn, +) + + +class FakeAsyncSession: + """A no-op stand-in for ``AsyncSession`` at the persistence boundary. + + The service flushes to assign state within a unit of work; in a unit test + there is no database, so ``add``/``flush`` simply do nothing. Behavior is + observed through the returned aggregate, never through this double. + """ + + def add(self, _obj: object) -> None: + return None + + async def flush(self) -> None: + return None + + +class FakeCeleryDbSession(FakeAsyncSession): + """An async-context session double for Celery task bodies. + + Task bodies open ``get_celery_session_maker()()`` as an async context, + ``get`` the row, then ``commit``. This holds one preloaded podcast and + records whether the body committed, so tests assert on the row's final + state — not on the calls made to get there. + """ + + def __init__(self, podcast: object | None = None) -> None: + self._podcast = podcast + self.committed = False + + async def get(self, _model: object, _id: object) -> object | None: + return self._podcast + + async def commit(self) -> None: + self.committed = True + + async def __aenter__(self) -> FakeCeleryDbSession: + return self + + async def __aexit__(self, *_exc: object) -> None: + return None + + +@pytest.fixture +def fake_session() -> FakeAsyncSession: + return FakeAsyncSession() + + +@pytest.fixture +def make_celery_session(): + """Factory for a Celery-style session double holding one podcast.""" + + def _make(podcast: object | None = None) -> FakeCeleryDbSession: + return FakeCeleryDbSession(podcast) + + return _make + + +@pytest.fixture +def session_maker_for(): + """Build a ``get_celery_session_maker`` replacement bound to one session. + + ``get_celery_session_maker()()`` must yield the session, so the replacement + is a zero-arg callable returning a maker that returns the session. + """ + + def _make(session: object): + return lambda: (lambda: session) + + return _make + + +@pytest.fixture +def make_spec(): + """Factory for a valid :class:`PodcastSpec`; override only what matters.""" + + def _make( + *, + language: str = "en", + style: PodcastStyle = PodcastStyle.CONVERSATIONAL, + speakers: list[SpeakerSpec] | None = None, + min_minutes: int = 10, + max_minutes: int = 20, + focus: str | None = None, + ) -> PodcastSpec: + if speakers is None: + speakers = [ + SpeakerSpec( + slot=0, name="Host", role=SpeakerRole.HOST, voice_id="kokoro:am_adam" + ), + SpeakerSpec( + slot=1, + name="Guest", + role=SpeakerRole.GUEST, + voice_id="kokoro:af_bella", + ), + ] + return PodcastSpec( + language=language, + style=style, + speakers=speakers, + duration=DurationTarget(min_minutes=min_minutes, max_minutes=max_minutes), + focus=focus, + ) + + return _make + + +@pytest.fixture +def make_transcript(): + """Factory for a valid :class:`Transcript`.""" + + def _make(turns: list[tuple[int, str]] | None = None) -> Transcript: + if turns is None: + turns = [(0, "Welcome to the show."), (1, "Glad to be here.")] + return Transcript( + turns=[TranscriptTurn(speaker=slot, text=text) for slot, text in turns] + ) + + return _make From 9d8e4e4f9dd3c441619408828f2561783fbc2b1e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 20/50] test(podcasts): cover lifecycle state machine --- .../tests/unit/podcasts/test_lifecycle.py | 163 ++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_lifecycle.py diff --git a/surfsense_backend/tests/unit/podcasts/test_lifecycle.py b/surfsense_backend/tests/unit/podcasts/test_lifecycle.py new file mode 100644 index 000000000..5f61c7562 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_lifecycle.py @@ -0,0 +1,163 @@ +"""The podcast lifecycle: the guarantees the rest of the system relies on. + +These tests drive the aggregate through :class:`PodcastService`'s public +methods and observe the resulting status and stored brief/transcript — the +domain's contract. They say nothing about how the service stores or flushes, +so they survive any refactor that preserves the lifecycle. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import PodcastStatus +from app.podcasts.service import ( + InvalidTransition, + PodcastService, + PreconditionFailed, + SpecConflict, + read_spec, + read_transcript, +) + +pytestmark = pytest.mark.unit + + +async def test_a_podcast_progresses_from_creation_to_ready( + fake_session, make_spec, make_transcript +): + """The full happy path: create → brief → draft → review → render → ready.""" + service = PodcastService(fake_session) + + podcast = await service.create(title="Episode 1", search_space_id=7) + assert podcast.status == PodcastStatus.PENDING + + spec = make_spec() + await service.attach_brief(podcast, spec) + assert podcast.status == PodcastStatus.AWAITING_BRIEF + assert read_spec(podcast) == spec + + await service.begin_drafting(podcast) + assert podcast.status == PodcastStatus.DRAFTING + + transcript = make_transcript() + await service.attach_transcript(podcast, transcript) + assert podcast.status == PodcastStatus.AWAITING_REVIEW + assert read_transcript(podcast) == transcript + + await service.approve(podcast) + assert podcast.status == PodcastStatus.RENDERING + + await service.attach_audio( + podcast, storage_backend="local", storage_key="k", duration_seconds=42 + ) + assert podcast.status == PodcastStatus.READY + assert podcast.duration_seconds == 42 + + +async def test_drafting_requires_an_approved_brief(fake_session): + """A brief must exist before drafting can begin.""" + service = PodcastService(fake_session) + podcast = await service.create(title="No brief", search_space_id=1) + + with pytest.raises(PreconditionFailed): + await service.begin_drafting(podcast) + + +async def test_rendering_requires_a_transcript(fake_session, make_spec): + """Approval to render is refused when no transcript has been drafted.""" + service = PodcastService(fake_session) + podcast = await service.create(title="No transcript", search_space_id=1) + await service.attach_brief(podcast, make_spec()) + await service.begin_drafting(podcast) + + with pytest.raises(PreconditionFailed): + await service.approve(podcast) + + +async def test_regenerate_returns_a_reviewed_transcript_to_drafting( + fake_session, make_spec, make_transcript +): + """At the go/no-go gate, rejecting sends the podcast back to drafting.""" + service = PodcastService(fake_session) + podcast = await service.create(title="Redo", search_space_id=1) + await service.attach_brief(podcast, make_spec()) + await service.begin_drafting(podcast) + await service.attach_transcript(podcast, make_transcript()) + + await service.regenerate(podcast) + + assert podcast.status == PodcastStatus.DRAFTING + + +async def test_brief_can_be_edited_at_the_gate_and_bumps_its_version( + fake_session, make_spec +): + """Editing the brief while awaiting review records it and advances version.""" + service = PodcastService(fake_session) + podcast = await service.create(title="Editable", search_space_id=1) + await service.attach_brief(podcast, make_spec(language="en")) + starting_version = podcast.spec_version + + await service.update_spec(podcast, make_spec(language="fr"), starting_version) + + assert read_spec(podcast).language == "fr" + assert podcast.spec_version == starting_version + 1 + + +async def test_editing_a_brief_with_a_stale_version_conflicts( + fake_session, make_spec +): + """A concurrent edit racing on a stale version is rejected, not silently lost.""" + service = PodcastService(fake_session) + podcast = await service.create(title="Raced", search_space_id=1) + await service.attach_brief(podcast, make_spec()) + current = podcast.spec_version + + with pytest.raises(SpecConflict): + await service.update_spec(podcast, make_spec(language="es"), current - 1) + + +async def test_brief_cannot_be_edited_after_the_gate_closes( + fake_session, make_spec +): + """Once drafting starts, the brief is settled and edits are refused.""" + service = PodcastService(fake_session) + podcast = await service.create(title="Locked", search_space_id=1) + await service.attach_brief(podcast, make_spec()) + await service.begin_drafting(podcast) + + with pytest.raises(InvalidTransition): + await service.update_spec(podcast, make_spec(language="es"), podcast.spec_version) + + +async def test_a_podcast_can_be_cancelled_while_in_flight(fake_session, make_spec): + """Cancellation is available from a non-terminal state.""" + service = PodcastService(fake_session) + podcast = await service.create(title="Abort", search_space_id=1) + await service.attach_brief(podcast, make_spec()) + + await service.cancel(podcast) + + assert podcast.status == PodcastStatus.CANCELLED + + +async def test_failure_records_a_reason(fake_session): + """Failing a podcast captures a human-readable reason.""" + service = PodcastService(fake_session) + podcast = await service.create(title="Boom", search_space_id=1) + + await service.fail(podcast, "tts provider unavailable") + + assert podcast.status == PodcastStatus.FAILED + assert podcast.error == "tts provider unavailable" + + +async def test_terminal_podcasts_reject_further_transitions(fake_session): + """A finished podcast cannot be cancelled or otherwise moved.""" + service = PodcastService(fake_session) + podcast = await service.create(title="Done", search_space_id=1) + await service.cancel(podcast) + + with pytest.raises(InvalidTransition): + await service.fail(podcast, "too late") From aaa9f01087721787580391a4d052afd10f1f4db6 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 21/50] test(podcasts): cover brief and transcript contracts --- .../tests/unit/podcasts/test_spec.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_spec.py diff --git a/surfsense_backend/tests/unit/podcasts/test_spec.py b/surfsense_backend/tests/unit/podcasts/test_spec.py new file mode 100644 index 000000000..938515988 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_spec.py @@ -0,0 +1,141 @@ +"""The brief and transcript contracts. + +A brief is what a user approves before any tokens or audio are spent, so its +validation rules are real behavior: they are the guardrails that keep a +nonsensical or ambiguous brief from ever reaching the expensive stages. These +tests pin those rules through construction of the public Pydantic models. +""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.podcasts.schemas import ( + DurationTarget, + PodcastSpec, + SpeakerRole, + SpeakerSpec, + Transcript, + TranscriptTurn, + normalize_language_tag, +) + +pytestmark = pytest.mark.unit + + +def _speaker(slot: int, voice_id: str = "kokoro:am_adam") -> SpeakerSpec: + return SpeakerSpec( + slot=slot, name=f"Speaker {slot}", role=SpeakerRole.HOST, voice_id=voice_id + ) + + +@pytest.mark.parametrize( + ("raw", "expected"), + [ + ("EN", "en"), + ("en-US", "en-US"), + ("PT-BR", "pt-BR"), + (" fr ", "fr"), + ], +) +def test_language_is_normalized_to_canonical_form(raw, expected): + """The primary subtag is lowercased and surrounding space trimmed.""" + assert normalize_language_tag(raw) == expected + + +@pytest.mark.parametrize("invalid", ["", "e", "english!", "123", "en_US"]) +def test_invalid_language_tags_are_rejected(invalid): + """Tags that are not BCP-47-shaped never reach a brief.""" + with pytest.raises(ValueError): + normalize_language_tag(invalid) + + +def test_spec_normalizes_its_language_on_construction(): + """A brief stores a canonical language regardless of how it was entered.""" + spec = PodcastSpec( + language="EN-us", + speakers=[_speaker(0)], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + assert spec.language == "en-us" + + +def test_speakers_must_have_unique_slots(): + """Slots are the join key to transcript turns, so duplicates are invalid.""" + with pytest.raises(ValidationError): + PodcastSpec( + language="en", + speakers=[_speaker(0), _speaker(0, voice_id="kokoro:af_bella")], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + + +def test_a_brief_needs_at_least_one_speaker(): + with pytest.raises(ValidationError): + PodcastSpec( + language="en", + speakers=[], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + + +def test_duration_rejects_an_inverted_range(): + """A max below the min is a user error caught at the brief gate.""" + with pytest.raises(ValidationError): + DurationTarget(min_minutes=20, max_minutes=10) + + +def test_duration_midpoint_is_where_drafting_aims(): + assert DurationTarget(min_minutes=10, max_minutes=20).midpoint_minutes == 15 + + +def test_blank_focus_becomes_absent(): + """Whitespace-only steer is treated as no steer.""" + spec = PodcastSpec( + language="en", + speakers=[_speaker(0)], + duration=DurationTarget(min_minutes=5, max_minutes=10), + focus=" ", + ) + assert spec.focus is None + + +def test_speaker_for_returns_the_speaker_bound_to_a_slot(): + spec = PodcastSpec( + language="en", + speakers=[_speaker(0), _speaker(1, voice_id="kokoro:af_bella")], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + assert spec.speaker_for(1).voice_id == "kokoro:af_bella" + + +def test_speaker_for_raises_when_no_speaker_matches(): + spec = PodcastSpec( + language="en", + speakers=[_speaker(0)], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + with pytest.raises(KeyError): + spec.speaker_for(99) + + +def test_transcript_word_count_sums_spoken_words(): + """Word count is what drafting checks runtime against, so it must be exact.""" + transcript = Transcript( + turns=[ + TranscriptTurn(speaker=0, text="hello there world"), + TranscriptTurn(speaker=1, text="one two"), + ] + ) + assert transcript.word_count == 5 + + +def test_blank_transcript_turns_are_rejected(): + with pytest.raises(ValidationError): + TranscriptTurn(speaker=0, text=" ") + + +def test_a_transcript_needs_at_least_one_turn(): + with pytest.raises(ValidationError): + Transcript(turns=[]) From e926990d8eb3a3a0ffff6206e86615f2f44dcdae Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 22/50] test(podcasts): cover language and voice resolution --- .../tests/unit/podcasts/test_resolution.py | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_resolution.py diff --git a/surfsense_backend/tests/unit/podcasts/test_resolution.py b/surfsense_backend/tests/unit/podcasts/test_resolution.py new file mode 100644 index 000000000..4fe3df5cd --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_resolution.py @@ -0,0 +1,100 @@ +"""Default language and voice selection for a fresh brief. + +Resolution is what lets most briefs need no edits: it proposes a sensible +language and a distinct voice per speaker. These tests state the policy +("detected wins, else last-used, else English"; "two speakers should sound +like two people") through the public resolver functions and the real catalog. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.resolution import ( + DEFAULT_LANGUAGE, + LanguageContext, + VoiceResolutionError, + resolve_language, + resolve_voices, +) +from app.podcasts.voices import TtsProvider, get_voice_catalog + +pytestmark = pytest.mark.unit + + +def test_detected_language_is_preferred_over_everything(): + context = LanguageContext(detected="es", last_used="fr") + assert resolve_language(context) == "es" + + +def test_falls_back_to_last_used_when_nothing_detected(): + context = LanguageContext(detected=None, last_used="fr") + assert resolve_language(context) == "fr" + + +def test_first_time_user_with_no_signal_gets_the_default(): + assert resolve_language(LanguageContext()) == DEFAULT_LANGUAGE + + +def test_two_speakers_get_distinct_voices(): + """A two-speaker episode should not voice both with the same person.""" + catalog = get_voice_catalog() + voices = resolve_voices( + catalog=catalog, provider=TtsProvider.KOKORO, language="en", speaker_count=2 + ) + assert len(voices) == 2 + assert voices[0].voice_id != voices[1].voice_id + + +def test_a_users_preferred_voice_is_reused_when_still_valid(): + catalog = get_voice_catalog() + voices = resolve_voices( + catalog=catalog, + provider=TtsProvider.KOKORO, + language="en", + speaker_count=2, + preferred=["kokoro:af_bella"], + ) + assert voices[0].voice_id == "kokoro:af_bella" + + +def test_a_preferred_voice_invalid_for_the_language_is_replaced(): + """A stale preference (wrong provider/language) is silently dropped.""" + catalog = get_voice_catalog() + voices = resolve_voices( + catalog=catalog, + provider=TtsProvider.KOKORO, + language="en", + speaker_count=1, + preferred=["kokoro:does-not-exist"], + ) + assert voices[0].voice_id in {v.voice_id for v in catalog.for_provider(TtsProvider.KOKORO)} + + +def test_resolution_fails_when_no_voice_speaks_the_language(): + """If a provider can't speak the language at all, that is surfaced loudly.""" + catalog = get_voice_catalog() + with pytest.raises(VoiceResolutionError): + resolve_voices( + catalog=catalog, + provider=TtsProvider.KOKORO, + language="xx", + speaker_count=1, + ) + + +def test_every_speaker_is_assigned_even_when_voices_run_out(): + """With one available voice, both speakers still get one rather than failing.""" + catalog = get_voice_catalog() + voices = resolve_voices( + catalog=catalog, provider=TtsProvider.KOKORO, language="fr", speaker_count=2 + ) + assert len(voices) == 2 + + +def test_speaker_count_must_be_positive(): + catalog = get_voice_catalog() + with pytest.raises(ValueError): + resolve_voices( + catalog=catalog, provider=TtsProvider.KOKORO, language="en", speaker_count=0 + ) From 0c92ee963e7140088710aea5938b67eb1248bd2e Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 23/50] test(podcasts): cover voice catalog --- .../tests/unit/podcasts/test_voice_catalog.py | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_voice_catalog.py diff --git a/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py b/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py new file mode 100644 index 000000000..d94c85922 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py @@ -0,0 +1,103 @@ +"""The voice catalog and provider identification. + +The catalog is the single source of truth for which voices exist; resolution, +the API picker, and the renderer all depend on its lookups behaving correctly. +These tests build a small catalog of their own so they assert on the lookup +behavior, not on which specific voices ship. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.voices import ( + ANY_LANGUAGE, + CatalogVoice, + TtsProvider, + VoiceCatalog, + VoiceGender, + provider_from_service, +) + +pytestmark = pytest.mark.unit + + +def _voice( + voice_id: str, + *, + provider: TtsProvider = TtsProvider.KOKORO, + language: str = "en-US", + gender: VoiceGender = VoiceGender.MALE, +) -> CatalogVoice: + return CatalogVoice( + voice_id=voice_id, + provider=provider, + language=language, + display_name=voice_id, + gender=gender, + native_ref=voice_id, + ) + + +def test_for_provider_returns_only_that_providers_voices(): + catalog = VoiceCatalog( + [ + _voice("k1", provider=TtsProvider.KOKORO), + _voice("o1", provider=TtsProvider.OPENAI), + ] + ) + assert [v.voice_id for v in catalog.for_provider(TtsProvider.KOKORO)] == ["k1"] + + +def test_for_language_matches_on_the_primary_subtag(): + """A request for 'en' should match an 'en-US' voice (region-insensitive).""" + catalog = VoiceCatalog([_voice("k1", language="en-US")]) + assert [v.voice_id for v in catalog.for_language(TtsProvider.KOKORO, "en")] == ["k1"] + + +def test_for_language_excludes_other_languages(): + catalog = VoiceCatalog([_voice("k1", language="en-US")]) + assert catalog.for_language(TtsProvider.KOKORO, "fr") == [] + + +def test_an_any_language_voice_speaks_every_language(): + """Provider-agnostic voices (e.g. OpenAI) match whatever the text is in.""" + voice = _voice("o1", provider=TtsProvider.OPENAI, language=ANY_LANGUAGE) + assert voice.speaks("ja") + assert voice.speaks("pt-BR") + + +def test_supports_language_reports_availability(): + catalog = VoiceCatalog([_voice("k1", language="en-US")]) + assert catalog.supports_language(TtsProvider.KOKORO, "en") + assert not catalog.supports_language(TtsProvider.KOKORO, "de") + + +def test_get_raises_for_an_unknown_voice(): + catalog = VoiceCatalog([_voice("k1")]) + with pytest.raises(KeyError): + catalog.get("nope") + + +def test_a_catalog_rejects_duplicate_voice_ids(): + """Stored ids must be unique so a brief's voice_id resolves unambiguously.""" + with pytest.raises(ValueError): + VoiceCatalog([_voice("dup"), _voice("dup")]) + + +@pytest.mark.parametrize( + ("service", "expected"), + [ + ("openai/tts-1", TtsProvider.OPENAI), + ("azure/neural", TtsProvider.AZURE), + ("vertex_ai/some-model", TtsProvider.VERTEX_AI), + ("local/kokoro", TtsProvider.KOKORO), + ], +) +def test_provider_is_identified_from_the_config_string(service, expected): + assert provider_from_service(service) == expected + + +def test_unknown_provider_prefix_is_rejected(): + with pytest.raises(ValueError): + provider_from_service("madeup/model") From 36c201f9e23616b47ceb4ed71824b39a97822591 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 24/50] test(podcasts): cover structured json parsing --- .../tests/unit/podcasts/test_structured.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_structured.py diff --git a/surfsense_backend/tests/unit/podcasts/test_structured.py b/surfsense_backend/tests/unit/podcasts/test_structured.py new file mode 100644 index 000000000..8d7b2226a --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_structured.py @@ -0,0 +1,68 @@ +"""Parsing a model's reply into a structured shape. + +Agent LLMs wrap JSON in prose and markdown fences. ``invoke_json`` exists so +every generation node tolerates that the same way. The LLM is an external +boundary, so it is faked with a canned reply; the behavior under test is the +parsing, not the model. +""" + +from __future__ import annotations + +import pytest +from pydantic import BaseModel + +from app.podcasts.generation.structured import StructuredOutputError, invoke_json + +pytestmark = pytest.mark.unit + + +class _Shape(BaseModel): + name: str + count: int + + +class _CannedLLM: + """A TTS-free stand-in for the chat model: replies with one fixed string.""" + + def __init__(self, reply: str) -> None: + self._reply = reply + + async def ainvoke(self, _messages): + return SimpleReply(self._reply) + + +class SimpleReply: + def __init__(self, content: str) -> None: + self.content = content + + +async def _parse(reply: str) -> _Shape: + return await invoke_json(_CannedLLM(reply), [], _Shape) + + +async def test_parses_a_clean_json_reply(): + shape = await _parse('{"name": "alpha", "count": 3}') + assert shape == _Shape(name="alpha", count=3) + + +async def test_parses_json_wrapped_in_a_markdown_fence(): + reply = '```json\n{"name": "beta", "count": 7}\n```' + shape = await _parse(reply) + assert shape == _Shape(name="beta", count=7) + + +async def test_extracts_json_embedded_in_prose(): + """Reasoning models prepend/append chatter around the object.""" + reply = 'Sure, here you go: {"name": "gamma", "count": 1} — hope that helps!' + shape = await _parse(reply) + assert shape == _Shape(name="gamma", count=1) + + +async def test_raises_when_there_is_no_json_object(): + with pytest.raises(StructuredOutputError): + await _parse("I could not produce that.") + + +async def test_raises_when_the_json_does_not_match_the_shape(): + with pytest.raises(StructuredOutputError): + await _parse('{"name": "delta"}') From fa7ab8a06da2e3ad7fb0417fbc8fce5d9b5b6bfd Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 25/50] test(podcasts): cover renderer validation --- .../tests/unit/podcasts/test_renderer.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_renderer.py diff --git a/surfsense_backend/tests/unit/podcasts/test_renderer.py b/surfsense_backend/tests/unit/podcasts/test_renderer.py new file mode 100644 index 000000000..f80e2a4c4 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_renderer.py @@ -0,0 +1,90 @@ +"""The renderer refuses an inconsistent spec/transcript before spending work. + +Full synthesis-and-merge needs FFmpeg and a real provider, so it belongs to an +integration test. What is pure and worth securing here is the renderer's +contract that it validates the transcript against the brief up front: a turn +naming an unknown speaker, or a speaker naming an unknown voice, fails loudly +rather than producing silent or wrong audio. The TTS provider is an external +port, faked here and never expected to be called on these paths. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from app.podcasts.rendering import PodcastRenderer, RenderError +from app.podcasts.schemas import ( + DurationTarget, + PodcastSpec, + SpeakerRole, + SpeakerSpec, + Transcript, + TranscriptTurn, +) +from app.podcasts.tts import SynthesizedAudio +from app.podcasts.voices import CatalogVoice, TtsProvider, VoiceCatalog, VoiceGender + +pytestmark = pytest.mark.unit + + +class _UnusedTTS: + """A TTS port double that fails the test if it is ever asked to speak. + + These behaviors must short-circuit before synthesis, so any call here is a + regression. + """ + + @property + def container(self) -> str: + return "mp3" + + async def synthesize(self, _request): # pragma: no cover - must not run + raise AssertionError("synthesis should not be attempted") + return SynthesizedAudio(data=b"", container="mp3") + + +def _catalog_with(voice_id: str) -> VoiceCatalog: + return VoiceCatalog( + [ + CatalogVoice( + voice_id=voice_id, + provider=TtsProvider.KOKORO, + language="en-US", + display_name=voice_id, + gender=VoiceGender.MALE, + native_ref="am_adam", + ) + ] + ) + + +def _spec(voice_id: str) -> PodcastSpec: + return PodcastSpec( + language="en", + speakers=[ + SpeakerSpec(slot=0, name="Host", role=SpeakerRole.HOST, voice_id=voice_id) + ], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + + +async def test_render_rejects_a_turn_for_an_unknown_speaker(tmp_path): + renderer = PodcastRenderer(tts=_UnusedTTS(), catalog=_catalog_with("kokoro:am_adam")) + transcript = Transcript(turns=[TranscriptTurn(speaker=5, text="Who am I?")]) + + with pytest.raises(RenderError): + await renderer.render( + spec=_spec("kokoro:am_adam"), transcript=transcript, workdir=Path(tmp_path) + ) + + +async def test_render_rejects_a_speaker_whose_voice_is_not_in_the_catalog(tmp_path): + renderer = PodcastRenderer(tts=_UnusedTTS(), catalog=_catalog_with("kokoro:am_adam")) + transcript = Transcript(turns=[TranscriptTurn(speaker=0, text="Hello.")]) + + with pytest.raises(RenderError): + await renderer.render( + spec=_spec("kokoro:ghost"), transcript=transcript, workdir=Path(tmp_path) + ) From 0c7987cd9e45099c8f0c9ad3f5aec4b3adb787c0 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 26/50] test(podcasts): cover api read model --- .../tests/unit/podcasts/test_api_schemas.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_api_schemas.py diff --git a/surfsense_backend/tests/unit/podcasts/test_api_schemas.py b/surfsense_backend/tests/unit/podcasts/test_api_schemas.py new file mode 100644 index 000000000..8203d7fdb --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_api_schemas.py @@ -0,0 +1,78 @@ +"""The API read model the frontend renders from. + +``PodcastDetail.of`` is the contract the detail view and action responses +depend on: it exposes the deserialized brief and transcript and a simple +``has_audio`` flag the client can't derive from the published Zero columns. +These tests drive real podcasts through the service, then assert the read model +reflects their state. +""" + +from __future__ import annotations + +from datetime import UTC, datetime + +import pytest + +from app.podcasts.api.schemas import PodcastDetail +from app.podcasts.persistence import PodcastStatus +from app.podcasts.service import PodcastService + +pytestmark = pytest.mark.unit + + +def _stamp(podcast): + """Give a transient row the id and created_at a persisted one would have. + + A detail response is only ever built from a saved podcast; without a real + database, we stand in the primary key and timestamp the DB would assign. + """ + podcast.id = 1 + podcast.created_at = datetime.now(UTC) + return podcast + + +async def test_a_fresh_podcast_exposes_no_brief_transcript_or_audio(fake_session): + service = PodcastService(fake_session) + podcast = _stamp(await service.create(title="New", search_space_id=3)) + + detail = PodcastDetail.of(podcast) + + assert detail.status == PodcastStatus.PENDING + assert detail.spec is None + assert detail.transcript is None + assert detail.has_audio is False + + +async def test_an_awaiting_brief_podcast_exposes_the_deserialized_brief( + fake_session, make_spec +): + service = PodcastService(fake_session) + podcast = _stamp(await service.create(title="Brief", search_space_id=3)) + await service.attach_brief(podcast, make_spec(language="fr")) + + detail = PodcastDetail.of(podcast) + + assert detail.spec is not None + assert detail.spec.language == "fr" + + +async def test_a_ready_podcast_reports_available_audio( + fake_session, make_spec, make_transcript +): + service = PodcastService(fake_session) + podcast = _stamp(await service.create(title="Done", search_space_id=3)) + await service.attach_brief(podcast, make_spec()) + await service.begin_drafting(podcast) + await service.attach_transcript(podcast, make_transcript()) + await service.approve(podcast) + await service.attach_audio( + podcast, storage_backend="local", storage_key="k", duration_seconds=120 + ) + + detail = PodcastDetail.of(podcast) + + assert detail.status == PodcastStatus.READY + assert detail.has_audio is True + assert detail.duration_seconds == 120 + assert detail.transcript is not None + assert detail.error is None From 0bed4a0d3828df0753adb57803fb93da24224566 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH 27/50] test(podcasts): cover failure recording --- .../tests/unit/podcasts/test_runtime.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_runtime.py diff --git a/surfsense_backend/tests/unit/podcasts/test_runtime.py b/surfsense_backend/tests/unit/podcasts/test_runtime.py new file mode 100644 index 000000000..91c6ada77 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_runtime.py @@ -0,0 +1,57 @@ +"""Failure recording shared by the podcast tasks. + +When a task body raises, ``mark_failed`` is the safety net that records the +reason on the row. Its contract has two halves worth securing: a still-running +podcast is moved to FAILED with the reason, and a podcast that already reached a +terminal state is left exactly as it was rather than forced. Only the database +(a real boundary) is doubled; the lifecycle service runs for real. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import Podcast, PodcastStatus +from app.podcasts.tasks import runtime + +pytestmark = pytest.mark.unit + + +def _podcast(status: PodcastStatus) -> Podcast: + podcast = Podcast(title="Episode", search_space_id=1, status=status, spec_version=1) + podcast.id = 1 + return podcast + + +async def test_marking_failed_records_the_reason_on_a_running_podcast( + monkeypatch, session_maker_for, make_celery_session +): + podcast = _podcast(PodcastStatus.DRAFTING) + session = make_celery_session(podcast) + monkeypatch.setattr(runtime, "get_celery_session_maker", session_maker_for(session)) + + await runtime.mark_failed(1, "tts provider unavailable") + + assert podcast.status == PodcastStatus.FAILED + assert podcast.error == "tts provider unavailable" + + +async def test_marking_failed_leaves_an_already_terminal_podcast_untouched( + monkeypatch, session_maker_for, make_celery_session +): + podcast = _podcast(PodcastStatus.CANCELLED) + session = make_celery_session(podcast) + monkeypatch.setattr(runtime, "get_celery_session_maker", session_maker_for(session)) + + await runtime.mark_failed(1, "too late") + + assert podcast.status == PodcastStatus.CANCELLED + + +async def test_marking_a_missing_podcast_failed_is_a_no_op( + monkeypatch, session_maker_for, make_celery_session +): + session = make_celery_session(None) + monkeypatch.setattr(runtime, "get_celery_session_maker", session_maker_for(session)) + + await runtime.mark_failed(999, "gone") # must not raise From 15e44616f35f631592a194c6e8192f93fb4115e4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:26 +0200 Subject: [PATCH 28/50] test(podcasts): cover drafting billing gate --- .../tests/unit/podcasts/test_draft_task.py | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_draft_task.py diff --git a/surfsense_backend/tests/unit/podcasts/test_draft_task.py b/surfsense_backend/tests/unit/podcasts/test_draft_task.py new file mode 100644 index 000000000..d04692ae4 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_draft_task.py @@ -0,0 +1,135 @@ +"""The transcript-drafting task's billing gate. + +Drafting is the expensive LLM step, so it runs under ``billable_call``. The +behavior that protects users' money: if billing denies the reservation the +podcast must end FAILED with no transcript, and only when billing succeeds does +a drafted transcript open the review gate. These tests fake the true +boundaries — the database, the billing system, and the generation graph — and +assert the podcast's resulting state, never how those boundaries were called. +""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from types import SimpleNamespace +from uuid import uuid4 + +import pytest + +from app.podcasts.persistence import Podcast, PodcastStatus +from app.podcasts.service import read_transcript +from app.podcasts.tasks import draft +from app.services.billable_calls import ( + BillingSettlementError, + QuotaInsufficientError, +) + +pytestmark = pytest.mark.unit + + +def _drafting_podcast(make_spec) -> Podcast: + """A podcast already at DRAFTING with an approved brief, as the API leaves it.""" + podcast = Podcast( + title="Episode", + search_space_id=42, + status=PodcastStatus.DRAFTING, + spec_version=1, + ) + podcast.id = 1 + podcast.thread_id = None + podcast.spec = make_spec().model_dump(mode="json") + podcast.source_content = "Some source material to discuss." + return podcast + + +def _wire_boundaries(monkeypatch, *, session, billable_call, transcript=None): + """Replace every external dependency the task body reaches for.""" + monkeypatch.setattr(draft, "get_celery_session_maker", lambda: (lambda: session)) + + async def _resolver(_session, _search_space_id, *, thread_id=None): + return uuid4(), "free", "openrouter/model" + + monkeypatch.setattr( + draft, "_resolve_agent_billing_for_search_space", _resolver + ) + monkeypatch.setattr(draft, "billable_call", billable_call) + + async def _ainvoke(_state, config=None): + return {"transcript": transcript} + + monkeypatch.setattr(draft, "transcript_graph", SimpleNamespace(ainvoke=_ainvoke)) + + +async def test_successful_billing_opens_the_review_gate_with_a_transcript( + monkeypatch, make_celery_session, make_spec, make_transcript +): + podcast = _drafting_podcast(make_spec) + session = make_celery_session(podcast) + + @asynccontextmanager + async def _ok(**_kwargs): + yield SimpleNamespace() + + _wire_boundaries( + monkeypatch, session=session, billable_call=_ok, transcript=make_transcript() + ) + + result = await draft._draft_transcript(podcast_id=1, search_space_id=42) + + assert podcast.status == PodcastStatus.AWAITING_REVIEW + assert read_transcript(podcast) is not None + assert result["status"] == "awaiting_review" + + +async def test_quota_denial_fails_the_podcast_without_a_transcript( + monkeypatch, make_celery_session, make_spec +): + """A denied reservation must not leave a half-drafted, billable mess.""" + podcast = _drafting_podcast(make_spec) + session = make_celery_session(podcast) + + @asynccontextmanager + async def _deny(**_kwargs): + raise QuotaInsufficientError( + usage_type="podcast_generation", + used_micros=5_000_000, + limit_micros=5_000_000, + remaining_micros=0, + ) + yield # pragma: no cover - unreachable, satisfies the CM protocol + + _wire_boundaries(monkeypatch, session=session, billable_call=_deny) + + result = await draft._draft_transcript(podcast_id=1, search_space_id=42) + + assert podcast.status == PodcastStatus.FAILED + assert read_transcript(podcast) is None + assert result["reason"] == "quota" + + +async def test_billing_settlement_failure_fails_the_podcast( + monkeypatch, make_celery_session, make_spec, make_transcript +): + podcast = _drafting_podcast(make_spec) + session = make_celery_session(podcast) + + @asynccontextmanager + async def _settlement_fails(**_kwargs): + yield SimpleNamespace() + raise BillingSettlementError( + usage_type="podcast_generation", + user_id=uuid4(), + cause=RuntimeError("finalize failed"), + ) + + _wire_boundaries( + monkeypatch, + session=session, + billable_call=_settlement_fails, + transcript=make_transcript(), + ) + + result = await draft._draft_transcript(podcast_id=1, search_space_id=42) + + assert podcast.status == PodcastStatus.FAILED + assert result["reason"] == "billing" From e61308387cc8275d04ad83ff1128ea1a8ea97129 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:35 +0200 Subject: [PATCH 29/50] feat(podcasts): add zero schema and queries --- surfsense_web/zero/queries/index.ts | 2 ++ surfsense_web/zero/queries/podcasts.ts | 12 ++++++++++++ surfsense_web/zero/schema/index.ts | 2 ++ surfsense_web/zero/schema/podcasts.ts | 19 +++++++++++++++++++ 4 files changed, 35 insertions(+) create mode 100644 surfsense_web/zero/queries/podcasts.ts create mode 100644 surfsense_web/zero/schema/podcasts.ts diff --git a/surfsense_web/zero/queries/index.ts b/surfsense_web/zero/queries/index.ts index fe711f5d3..45df8fa98 100644 --- a/surfsense_web/zero/queries/index.ts +++ b/surfsense_web/zero/queries/index.ts @@ -4,6 +4,7 @@ import { chatSessionQueries, commentQueries, messageQueries } from "./chat"; import { connectorQueries, documentQueries } from "./documents"; import { folderQueries } from "./folders"; import { notificationQueries } from "./inbox"; +import { podcastQueries } from "./podcasts"; import { userQueries } from "./user"; export const queries = defineQueries({ @@ -16,4 +17,5 @@ export const queries = defineQueries({ chatSession: chatSessionQueries, user: userQueries, automationRuns: automationRunQueries, + podcasts: podcastQueries, }); diff --git a/surfsense_web/zero/queries/podcasts.ts b/surfsense_web/zero/queries/podcasts.ts new file mode 100644 index 000000000..5298534dd --- /dev/null +++ b/surfsense_web/zero/queries/podcasts.ts @@ -0,0 +1,12 @@ +import { defineQuery } from "@rocicorp/zero"; +import { z } from "zod"; +import { zql } from "../schema/index"; + +export const podcastQueries = { + bySpace: defineQuery(z.object({ searchSpaceId: z.number() }), ({ args: { searchSpaceId } }) => + zql.podcasts.where("searchSpaceId", searchSpaceId).orderBy("createdAt", "desc") + ), + byId: defineQuery(z.object({ podcastId: z.number() }), ({ args: { podcastId } }) => + zql.podcasts.where("id", podcastId).one() + ), +}; diff --git a/surfsense_web/zero/schema/index.ts b/surfsense_web/zero/schema/index.ts index d6731e371..d1187ddab 100644 --- a/surfsense_web/zero/schema/index.ts +++ b/surfsense_web/zero/schema/index.ts @@ -4,6 +4,7 @@ import { chatCommentTable, chatSessionStateTable, newChatMessageTable } from "./ import { documentTable, searchSourceConnectorTable } from "./documents"; import { folderTable } from "./folders"; import { notificationTable } from "./inbox"; +import { podcastTable } from "./podcasts"; import { userTable } from "./user"; const chatCommentRelationships = relationships(chatCommentTable, ({ one }) => ({ @@ -38,6 +39,7 @@ export const schema = createSchema({ chatSessionStateTable, userTable, automationRunTable, + podcastTable, ], relationships: [chatCommentRelationships, newChatMessageRelationships], }); diff --git a/surfsense_web/zero/schema/podcasts.ts b/surfsense_web/zero/schema/podcasts.ts new file mode 100644 index 000000000..d473d776f --- /dev/null +++ b/surfsense_web/zero/schema/podcasts.ts @@ -0,0 +1,19 @@ +import { json, number, string, table } from "@rocicorp/zero"; + +// Mirrors PODCAST_COLS in the backend zero_publication. status drives the +// lifecycle UI by push; spec is the reviewable brief. The bulky source_content +// and transcript are intentionally not published and are fetched over REST. +export const podcastTable = table("podcasts") + .columns({ + id: number(), + title: string(), + status: string(), + spec: json().optional(), + specVersion: number().from("spec_version"), + durationSeconds: number().optional().from("duration_seconds"), + error: string().optional(), + searchSpaceId: number().from("search_space_id"), + threadId: number().optional().from("thread_id"), + createdAt: number().from("created_at"), + }) + .primaryKey("id"); From aa7aa81c1677bc410fd658fccd0370e49242e712 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 20:51:38 +0200 Subject: [PATCH 30/50] refactor(podcasts): drop language detection from brief --- .../app/podcasts/generation/brief/config.py | 1 - .../podcasts/generation/brief/detection.py | 28 ----------- .../app/podcasts/generation/brief/graph.py | 8 ++-- .../app/podcasts/generation/brief/nodes.py | 46 +++---------------- .../app/podcasts/generation/brief/state.py | 7 +-- .../podcasts/generation/prompts/__init__.py | 2 - .../generation/prompts/detect_language.py | 22 --------- .../app/podcasts/resolution/__init__.py | 6 +-- .../app/podcasts/resolution/language.py | 18 ++------ .../app/podcasts/schemas/spec.py | 2 +- .../tests/unit/podcasts/test_resolution.py | 12 ++--- 11 files changed, 22 insertions(+), 130 deletions(-) delete mode 100644 surfsense_backend/app/podcasts/generation/brief/detection.py delete mode 100644 surfsense_backend/app/podcasts/generation/prompts/detect_language.py diff --git a/surfsense_backend/app/podcasts/generation/brief/config.py b/surfsense_backend/app/podcasts/generation/brief/config.py index a9f2f9dec..4f92585ae 100644 --- a/surfsense_backend/app/podcasts/generation/brief/config.py +++ b/surfsense_backend/app/podcasts/generation/brief/config.py @@ -16,7 +16,6 @@ DEFAULT_MAX_MINUTES = 20 class BriefConfig: """Signals used to propose a brief; everything here is non-LLM context.""" - search_space_id: int speaker_count: int = DEFAULT_SPEAKER_COUNT min_minutes: int = DEFAULT_MIN_MINUTES max_minutes: int = DEFAULT_MAX_MINUTES diff --git a/surfsense_backend/app/podcasts/generation/brief/detection.py b/surfsense_backend/app/podcasts/generation/brief/detection.py deleted file mode 100644 index d505d4993..000000000 --- a/surfsense_backend/app/podcasts/generation/brief/detection.py +++ /dev/null @@ -1,28 +0,0 @@ -"""The language-detection reply shape, normalised to a safe tag or ``None``.""" - -from __future__ import annotations - -from pydantic import BaseModel, field_validator - -from app.podcasts.schemas import normalize_language_tag - - -class DetectedLanguage(BaseModel): - """What the detector returns: a usable BCP-47 tag, or ``None`` when unsure. - - A malformed or non-language reply is coerced to ``None`` so a bad detection - quietly defers to the rest of the resolution chain rather than poisoning the - spec with an invalid tag. - """ - - language: str | None = None - - @field_validator("language") - @classmethod - def _normalise(cls, value: str | None) -> str | None: - if value is None: - return None - try: - return normalize_language_tag(value) - except ValueError: - return None diff --git a/surfsense_backend/app/podcasts/generation/brief/graph.py b/surfsense_backend/app/podcasts/generation/brief/graph.py index 328529e59..a643bdbb4 100644 --- a/surfsense_backend/app/podcasts/generation/brief/graph.py +++ b/surfsense_backend/app/podcasts/generation/brief/graph.py @@ -1,22 +1,20 @@ -"""The brief-planning graph: detect language, then propose a spec.""" +"""The brief-planning graph: propose a reviewable spec from defaults.""" from __future__ import annotations from langgraph.graph import StateGraph from .config import BriefConfig -from .nodes import detect_language, propose_spec +from .nodes import propose_spec from .state import BriefState def build_brief_graph(): workflow = StateGraph(BriefState, config_schema=BriefConfig) - workflow.add_node("detect_language", detect_language) workflow.add_node("propose_spec", propose_spec) - workflow.add_edge("__start__", "detect_language") - workflow.add_edge("detect_language", "propose_spec") + workflow.add_edge("__start__", "propose_spec") workflow.add_edge("propose_spec", "__end__") graph = workflow.compile() diff --git a/surfsense_backend/app/podcasts/generation/brief/nodes.py b/surfsense_backend/app/podcasts/generation/brief/nodes.py index e0477940c..c0a6f1ae1 100644 --- a/surfsense_backend/app/podcasts/generation/brief/nodes.py +++ b/surfsense_backend/app/podcasts/generation/brief/nodes.py @@ -1,15 +1,14 @@ -"""Brief-planning nodes: detect the language, then propose a full spec. +"""Brief-planning node: propose a full spec from deterministic defaults. -Only ``detect_language`` spends tokens, and only a small sample of source text; -``propose_spec`` is pure resolution. Together they open the brief gate pre-filled -so the common case needs no edits. +``propose_spec`` is pure resolution — it never spends tokens. It reuses the +user's last-used language/voices when available and otherwise falls back to +English, so the brief gate opens pre-filled and the common case needs no edits. """ from __future__ import annotations from typing import Any -from langchain_core.messages import HumanMessage, SystemMessage from langchain_core.runnables import RunnableConfig from app.config import config as app_config @@ -28,22 +27,15 @@ from app.podcasts.schemas import ( normalize_language_tag, ) from app.podcasts.voices import ( - VoiceCatalog, TtsProvider, + VoiceCatalog, get_voice_catalog, provider_from_service, ) -from app.services.llm_service import get_agent_llm -from ..prompts import detect_language_prompt -from ..structured import StructuredOutputError, invoke_json from .config import BriefConfig -from .detection import DetectedLanguage from .state import BriefState -# Only the head of the source is needed to judge language; this caps tokens. -_DETECTION_SAMPLE_CHARS = 4000 - # Default role per speaker slot; extra speakers beyond the list fall back to guest. _ROLE_BY_SLOT = ( SpeakerRole.HOST, @@ -54,30 +46,6 @@ _ROLE_BY_SLOT = ( ) -async def detect_language( - state: BriefState, config: RunnableConfig -) -> dict[str, Any]: - """Detect the source language; defer (``None``) on any uncertainty.""" - brief = BriefConfig.from_runnable_config(config) - llm = await get_agent_llm(state.db_session, brief.search_space_id) - if llm is None: - return {"detected_language": None} - - sample = (state.source_content or "")[:_DETECTION_SAMPLE_CHARS].strip() - if not sample: - return {"detected_language": None} - - messages = [ - SystemMessage(content=detect_language_prompt()), - HumanMessage(content=f"{sample}"), - ] - try: - detected = await invoke_json(llm, messages, DetectedLanguage) - except StructuredOutputError: - return {"detected_language": None} - return {"detected_language": detected.language} - - def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]: """Build a complete :class:`PodcastSpec` from the resolved defaults.""" brief = BriefConfig.from_runnable_config(config) @@ -85,7 +53,6 @@ def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]: catalog = get_voice_catalog() language = _supported_language( - detected=state.detected_language, last_used=brief.last_used_language, provider=provider, catalog=catalog, @@ -128,12 +95,11 @@ def _active_provider() -> TtsProvider: def _supported_language( *, - detected: str | None, last_used: str | None, provider: TtsProvider, catalog: VoiceCatalog, ) -> str: - raw = resolve_language(LanguageContext(detected=detected, last_used=last_used)) + raw = resolve_language(LanguageContext(last_used=last_used)) try: language = normalize_language_tag(raw) except ValueError: diff --git a/surfsense_backend/app/podcasts/generation/brief/state.py b/surfsense_backend/app/podcasts/generation/brief/state.py index 976a72df5..418fb6fa9 100644 --- a/surfsense_backend/app/podcasts/generation/brief/state.py +++ b/surfsense_backend/app/podcasts/generation/brief/state.py @@ -4,16 +4,11 @@ from __future__ import annotations from dataclasses import dataclass -from sqlalchemy.ext.asyncio import AsyncSession - from app.podcasts.schemas import PodcastSpec @dataclass class BriefState: - """Runtime inputs and the proposed spec the graph produces.""" + """The proposed spec the graph produces; inputs arrive via the config.""" - db_session: AsyncSession - source_content: str - detected_language: str | None = None spec: PodcastSpec | None = None diff --git a/surfsense_backend/app/podcasts/generation/prompts/__init__.py b/surfsense_backend/app/podcasts/generation/prompts/__init__.py index 1f6d3993b..041dd4e6d 100644 --- a/surfsense_backend/app/podcasts/generation/prompts/__init__.py +++ b/surfsense_backend/app/podcasts/generation/prompts/__init__.py @@ -2,13 +2,11 @@ from __future__ import annotations -from .detect_language import detect_language_prompt from .draft_segment import draft_segment_prompt from .plan_outline import plan_outline_prompt from .speakers import render_speaker_roster __all__ = [ - "detect_language_prompt", "draft_segment_prompt", "plan_outline_prompt", "render_speaker_roster", diff --git a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py b/surfsense_backend/app/podcasts/generation/prompts/detect_language.py deleted file mode 100644 index a5ab4da5c..000000000 --- a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Prompt for detecting the dominant natural language of source content.""" - -from __future__ import annotations - -_SYSTEM = """\ -You identify the dominant natural language of a piece of source content for a \ -podcast that will be generated from it. - -Rules: -- Report the language the listener-facing podcast should be spoken in, i.e. the \ -language most of the meaningful prose is written in. -- Ignore code, markup, URLs, numbers, and proper nouns when judging. -- If the content is too short, ambiguous, mixed without a clear majority, or not \ -natural-language prose, return null rather than guessing. - -Respond with strict JSON and nothing else: -{"language": ""} or {"language": null} -""" - - -def detect_language_prompt() -> str: - return _SYSTEM diff --git a/surfsense_backend/app/podcasts/resolution/__init__.py b/surfsense_backend/app/podcasts/resolution/__init__.py index ebfd3153a..19a7edfb3 100644 --- a/surfsense_backend/app/podcasts/resolution/__init__.py +++ b/surfsense_backend/app/podcasts/resolution/__init__.py @@ -1,8 +1,8 @@ """Resolution: deterministic default chains for a fresh brief. -Turns weak signals (detected language, last-used preferences) into concrete -language and voice defaults, so the brief gate opens pre-filled and most users -approve without editing. +Turns the user's last-used preferences into concrete language and voice +defaults, so the brief gate opens pre-filled and most users approve without +editing. """ from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/resolution/language.py b/surfsense_backend/app/podcasts/resolution/language.py index 2da90ef37..336d9036b 100644 --- a/surfsense_backend/app/podcasts/resolution/language.py +++ b/surfsense_backend/app/podcasts/resolution/language.py @@ -1,10 +1,9 @@ """Resolve the brief's language without spending tokens at the gate. -The chain mirrors the agreed policy: prefer a language detected from the source, -fall back to what the user last chose, and finally default to English (which the -user can still override in the brief). Detection itself is performed upstream -where an LLM is available and passed in as :attr:`LanguageContext.detected`, so -this layer stays pure and deterministic. +The chain mirrors the agreed policy: reuse the language the user last chose, and +otherwise default to English (which the user can still override in the brief). We +deliberately never guess the language from the source content — proposing a +language the user did not ask for is worse than a predictable default. """ from __future__ import annotations @@ -20,7 +19,6 @@ DEFAULT_LANGUAGE = "en" class LanguageContext: """Signals available when proposing a language for a fresh podcast.""" - detected: str | None = None last_used: str | None = None @@ -32,13 +30,6 @@ class LanguageResolver(ABC): """Return a language tag, or ``None`` to defer to the next resolver.""" -class DetectedLanguage(LanguageResolver): - """Use the language detected from the source, when confident enough.""" - - def resolve(self, context: LanguageContext) -> str | None: - return context.detected - - class LastUsedLanguage(LanguageResolver): """Reuse the language from the user's previous podcast.""" @@ -55,7 +46,6 @@ class DefaultLanguage(LanguageResolver): # Order encodes the policy; prepend stronger signals here as they appear. DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = ( - DetectedLanguage(), LastUsedLanguage(), DefaultLanguage(), ) diff --git a/surfsense_backend/app/podcasts/schemas/spec.py b/surfsense_backend/app/podcasts/schemas/spec.py index 2d3b3c74e..973e26167 100644 --- a/surfsense_backend/app/podcasts/schemas/spec.py +++ b/surfsense_backend/app/podcasts/schemas/spec.py @@ -30,7 +30,7 @@ _LANGUAGE_TAG = re.compile(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$") def normalize_language_tag(value: str) -> str: """Validate and canonicalise a BCP-47 tag (lowercased primary subtag). - Shared with the generation layer so detected and user-entered languages are + Shared with the generation layer so resolved and user-entered languages are normalised identically before they reach a :class:`PodcastSpec`. """ cleaned = value.strip() diff --git a/surfsense_backend/tests/unit/podcasts/test_resolution.py b/surfsense_backend/tests/unit/podcasts/test_resolution.py index 4fe3df5cd..48e834096 100644 --- a/surfsense_backend/tests/unit/podcasts/test_resolution.py +++ b/surfsense_backend/tests/unit/podcasts/test_resolution.py @@ -2,8 +2,9 @@ Resolution is what lets most briefs need no edits: it proposes a sensible language and a distinct voice per speaker. These tests state the policy -("detected wins, else last-used, else English"; "two speakers should sound +("reuse what the user last chose, else English"; "two speakers should sound like two people") through the public resolver functions and the real catalog. +We never guess the language from source content. """ from __future__ import annotations @@ -22,13 +23,8 @@ from app.podcasts.voices import TtsProvider, get_voice_catalog pytestmark = pytest.mark.unit -def test_detected_language_is_preferred_over_everything(): - context = LanguageContext(detected="es", last_used="fr") - assert resolve_language(context) == "es" - - -def test_falls_back_to_last_used_when_nothing_detected(): - context = LanguageContext(detected=None, last_used="fr") +def test_last_used_language_is_reused(): + context = LanguageContext(last_used="fr") assert resolve_language(context) == "fr" From bae59140a624526c3d56e9434818ece8d0c2ce93 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 20:51:51 +0200 Subject: [PATCH 31/50] refactor(podcasts): propose brief inline at create --- surfsense_backend/app/celery_app.py | 1 - surfsense_backend/app/podcasts/api/routes.py | 19 ++++-- .../app/podcasts/generation/brief/__init__.py | 5 +- .../app/podcasts/generation/brief/propose.py | 40 +++++++++++++ .../app/podcasts/tasks/__init__.py | 9 ++- surfsense_backend/app/podcasts/tasks/brief.py | 60 ------------------- 6 files changed, 61 insertions(+), 73 deletions(-) create mode 100644 surfsense_backend/app/podcasts/generation/brief/propose.py delete mode 100644 surfsense_backend/app/podcasts/tasks/brief.py diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index 9622647f3..f72d1385a 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -182,7 +182,6 @@ celery_app = Celery( include=[ "app.tasks.celery_tasks.document_tasks", "app.tasks.celery_tasks.podcast_tasks", - "app.podcasts.tasks.brief", "app.podcasts.tasks.draft", "app.podcasts.tasks.render", "app.tasks.celery_tasks.video_presentation_tasks", diff --git a/surfsense_backend/app/podcasts/api/routes.py b/surfsense_backend/app/podcasts/api/routes.py index 0d5de2463..7a998c85e 100644 --- a/surfsense_backend/app/podcasts/api/routes.py +++ b/surfsense_backend/app/podcasts/api/routes.py @@ -16,6 +16,7 @@ from fastapi.responses import StreamingResponse from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession +from app.config import config as app_config from app.db import ( Permission, SearchSpace, @@ -23,6 +24,7 @@ from app.db import ( User, get_async_session, ) +from app.podcasts.generation.brief import propose_brief from app.podcasts.persistence import Podcast, PodcastRepository from app.podcasts.service import ( InvalidTransition, @@ -33,11 +35,9 @@ from app.podcasts.service import ( from app.podcasts.storage import open_audio_stream, purge_audio from app.podcasts.tasks import ( draft_transcript_task, - propose_brief_task, render_audio_task, ) from app.podcasts.voices import get_voice_catalog, provider_from_service -from app.config import config as app_config from app.users import current_active_user from app.utils.rbac import check_permission @@ -118,15 +118,24 @@ async def create_podcast( ): await _require(session, user, body.search_space_id, Permission.PODCASTS_CREATE) - podcast = await PodcastService(session).create( + service = PodcastService(session) + podcast = await service.create( title=body.title, search_space_id=body.search_space_id, thread_id=body.thread_id, ) podcast.source_content = body.source_content - await session.commit() - propose_brief_task.delay(podcast.id, body.search_space_id) + spec = await propose_brief( + session, + search_space_id=body.search_space_id, + speaker_count=body.speaker_count, + min_minutes=body.min_minutes, + max_minutes=body.max_minutes, + focus=body.focus, + ) + await service.attach_brief(podcast, spec) + await session.commit() return PodcastDetail.of(podcast) diff --git a/surfsense_backend/app/podcasts/generation/brief/__init__.py b/surfsense_backend/app/podcasts/generation/brief/__init__.py index 0359a513d..5083c4708 100644 --- a/surfsense_backend/app/podcasts/generation/brief/__init__.py +++ b/surfsense_backend/app/podcasts/generation/brief/__init__.py @@ -1,9 +1,10 @@ -"""Brief planning: propose a reviewable spec from weak signals.""" +"""Brief planning: propose a reviewable spec from last-used preferences.""" from __future__ import annotations from .config import BriefConfig from .graph import build_brief_graph +from .propose import propose_brief from .state import BriefState -__all__ = ["BriefConfig", "BriefState", "build_brief_graph"] +__all__ = ["BriefConfig", "BriefState", "build_brief_graph", "propose_brief"] diff --git a/surfsense_backend/app/podcasts/generation/brief/propose.py b/surfsense_backend/app/podcasts/generation/brief/propose.py new file mode 100644 index 000000000..17344702b --- /dev/null +++ b/surfsense_backend/app/podcasts/generation/brief/propose.py @@ -0,0 +1,40 @@ +"""Propose a podcast's initial brief spec.""" + +from __future__ import annotations + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.podcasts.persistence import PodcastRepository +from app.podcasts.schemas import PodcastSpec +from app.podcasts.service import preferences_from + +from .config import DEFAULT_MAX_MINUTES, DEFAULT_MIN_MINUTES, DEFAULT_SPEAKER_COUNT +from .graph import graph as brief_graph +from .state import BriefState + + +async def propose_brief( + session: AsyncSession, + *, + search_space_id: int, + speaker_count: int = DEFAULT_SPEAKER_COUNT, + min_minutes: int = DEFAULT_MIN_MINUTES, + max_minutes: int = DEFAULT_MAX_MINUTES, + focus: str | None = None, +) -> PodcastSpec: + """Reuse the last-used language and voices, else English; return the spec.""" + last_language, last_voices = preferences_from( + await PodcastRepository(session).latest_with_spec(search_space_id) + ) + config = { + "configurable": { + "speaker_count": speaker_count, + "min_minutes": min_minutes, + "max_minutes": max_minutes, + "focus": focus, + "last_used_language": last_language, + "last_used_voices": last_voices, + } + } + result = await brief_graph.ainvoke(BriefState(), config=config) + return result["spec"] diff --git a/surfsense_backend/app/podcasts/tasks/__init__.py b/surfsense_backend/app/podcasts/tasks/__init__.py index 32f7c3a72..cd0b7e4c4 100644 --- a/surfsense_backend/app/podcasts/tasks/__init__.py +++ b/surfsense_backend/app/podcasts/tasks/__init__.py @@ -1,18 +1,17 @@ -"""Celery tasks driving the podcast lifecycle across its user gates. +"""Celery tasks driving the podcast lifecycle across its expensive phases. -One task per async phase: propose the brief, draft the transcript, render the -audio. Each is enqueued by the API after it performs the guarded status +One task per heavy async phase: draft the transcript (LLM) and render the audio +(TTS). The brief is deterministic and proposed inline at create time, so it has +no task. Each task is enqueued by the API after it performs the guarded status transition, and each pushes its result onto the row for the frontend to observe. """ from __future__ import annotations -from .brief import propose_brief_task from .draft import draft_transcript_task from .render import render_audio_task __all__ = [ "draft_transcript_task", - "propose_brief_task", "render_audio_task", ] diff --git a/surfsense_backend/app/podcasts/tasks/brief.py b/surfsense_backend/app/podcasts/tasks/brief.py deleted file mode 100644 index 2f1e3f240..000000000 --- a/surfsense_backend/app/podcasts/tasks/brief.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Brief-proposal task: PENDING -> AWAITING_BRIEF. - -Runs the (cheap, token-light) brief graph to detect language and propose a spec, -seeded with the user's last-used language/voice preferences. Pushes the result -straight onto the row so the frontend sees the brief gate open via Zero. -""" - -from __future__ import annotations - -import logging - -from app.celery_app import celery_app -from app.podcasts.generation.brief.graph import graph as brief_graph -from app.podcasts.generation.brief.state import BriefState -from app.podcasts.persistence import PodcastRepository -from app.podcasts.service import PodcastService, preferences_from -from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task - -from .runtime import mark_failed - -logger = logging.getLogger(__name__) - - -@celery_app.task(name="podcast.propose_brief", bind=True) -def propose_brief_task(self, podcast_id: int, search_space_id: int) -> dict: - try: - return run_async_celery_task( - lambda: _propose_brief(podcast_id, search_space_id) - ) - except Exception as exc: # noqa: BLE001 - record and report, never crash worker - logger.error("Podcast %s brief proposal failed: %s", podcast_id, exc) - run_async_celery_task(lambda: mark_failed(podcast_id, str(exc))) - return {"status": "failed", "podcast_id": podcast_id} - - -async def _propose_brief(podcast_id: int, search_space_id: int) -> dict: - async with get_celery_session_maker()() as session: - repo = PodcastRepository(session) - podcast = await repo.get(podcast_id) - if podcast is None: - raise ValueError(f"podcast {podcast_id} not found") - - last_language, last_voices = preferences_from( - await repo.latest_with_spec(search_space_id) - ) - state = BriefState( - db_session=session, source_content=podcast.source_content or "" - ) - config = { - "configurable": { - "search_space_id": search_space_id, - "last_used_language": last_language, - "last_used_voices": last_voices, - } - } - result = await brief_graph.ainvoke(state, config=config) - - await PodcastService(session).attach_brief(podcast, result["spec"]) - await session.commit() - return {"status": "awaiting_brief", "podcast_id": podcast_id} From b7604167d828e2c6039b31328c849e4940039b22 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 20:51:59 +0200 Subject: [PATCH 32/50] docs(podcasts): tighten comments and docstrings --- surfsense_backend/app/podcasts/__init__.py | 7 +++---- surfsense_backend/app/podcasts/api/__init__.py | 7 +------ surfsense_backend/app/podcasts/generation/__init__.py | 7 +++---- surfsense_backend/app/podcasts/generation/structured.py | 7 +++---- surfsense_backend/app/podcasts/tasks/draft.py | 7 +++---- surfsense_backend/app/podcasts/tts/adapters/litellm.py | 4 ++-- surfsense_backend/app/podcasts/voices/__init__.py | 5 ++--- surfsense_backend/app/podcasts/voices/catalog.py | 8 ++++---- 8 files changed, 21 insertions(+), 31 deletions(-) diff --git a/surfsense_backend/app/podcasts/__init__.py b/surfsense_backend/app/podcasts/__init__.py index 058274b4f..6a152af22 100644 --- a/surfsense_backend/app/podcasts/__init__.py +++ b/surfsense_backend/app/podcasts/__init__.py @@ -1,8 +1,7 @@ -"""Podcast generation: brief resolution, transcript drafting, and audio rendering. +"""Podcast feature: brief resolution, transcript drafting, and audio rendering. -The public surface grows as the module is built. For now it owns the -``podcasts`` table model, which :mod:`app.db` re-exports so existing -``from app.db import Podcast`` call sites keep working during the migration. +Owns the ``podcasts`` table model, which :mod:`app.db` re-exports so existing +``from app.db import Podcast`` imports keep resolving. """ from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/api/__init__.py b/surfsense_backend/app/podcasts/api/__init__.py index f943faeeb..4b5b12971 100644 --- a/surfsense_backend/app/podcasts/api/__init__.py +++ b/surfsense_backend/app/podcasts/api/__init__.py @@ -1,9 +1,4 @@ -"""HTTP API for the podcast lifecycle. - -The router is mounted at cutover (replacing the legacy podcast routes); it is -kept separate here so it can be wired in one step without colliding with the old -routes during parallel development. -""" +"""HTTP API for the podcast lifecycle.""" from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/generation/__init__.py b/surfsense_backend/app/podcasts/generation/__init__.py index 30a2425b0..a30b8f9af 100644 --- a/surfsense_backend/app/podcasts/generation/__init__.py +++ b/surfsense_backend/app/podcasts/generation/__init__.py @@ -1,8 +1,7 @@ -"""Generation: the LLM-driven brief and transcript controlled graphs. +"""Generation: the controlled graphs that produce a brief and a transcript. -Two small graphs hold all the intelligence: ``brief`` proposes a reviewable spec -(language detection + resolution), and ``transcript`` drafts long-form dialogue -outline-first. Everything else in the podcast pipeline is deterministic. +``brief`` proposes a reviewable spec from deterministic defaults; ``transcript`` +is the LLM-driven step, drafting long-form dialogue outline-first. """ from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/generation/structured.py b/surfsense_backend/app/podcasts/generation/structured.py index 9e9731c2f..bcc03a6c7 100644 --- a/surfsense_backend/app/podcasts/generation/structured.py +++ b/surfsense_backend/app/podcasts/generation/structured.py @@ -1,9 +1,8 @@ """Parse a model's reply into a Pydantic shape, tolerating chatty output. -Agent LLMs return JSON wrapped in prose, markdown fences, or reasoning blocks. -This mirrors the legacy podcaster's resilient parsing — strip fences, then fall -back to the outermost ``{...}`` span — so every generation node validates the -reply the same way instead of repeating ad-hoc parsing. +Agent LLMs return JSON wrapped in prose, markdown fences, or reasoning blocks, +so a plain ``model_validate_json`` is unreliable. Centralising the tolerant +parse here keeps every generation node validating replies the same way. """ from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/tasks/draft.py b/surfsense_backend/app/podcasts/tasks/draft.py index 8d461bf9b..575daf2ba 100644 --- a/surfsense_backend/app/podcasts/tasks/draft.py +++ b/surfsense_backend/app/podcasts/tasks/draft.py @@ -1,9 +1,8 @@ """Transcript-drafting task: DRAFTING -> AWAITING_REVIEW. -The expensive, LLM-heavy step, so it runs under ``billable_call`` exactly like -the legacy generator. The API has already moved the row to DRAFTING and stored -the approved brief; this task drafts the long-form transcript and opens the -go/no-go gate. +The expensive, LLM-heavy step, so it runs under ``billable_call``. The API has +already moved the row to DRAFTING and stored the approved brief; this task +drafts the long-form transcript and opens the go/no-go gate. """ from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/tts/adapters/litellm.py b/surfsense_backend/app/podcasts/tts/adapters/litellm.py index 55f49bd1e..181973b47 100644 --- a/surfsense_backend/app/podcasts/tts/adapters/litellm.py +++ b/surfsense_backend/app/podcasts/tts/adapters/litellm.py @@ -16,8 +16,8 @@ from ..request import SynthesisRequest # Hosted providers return MP3-encoded bytes from ``aspeech``. _CONTAINER = "mp3" -# Matches the legacy podcaster timeouts; long single segments still finish well -# under this, and retries cover transient upstream failures. +# A long single segment still finishes well under this; retries absorb transient +# upstream failures without failing the whole render. _TIMEOUT_SECONDS = 600 _MAX_RETRIES = 2 diff --git a/surfsense_backend/app/podcasts/voices/__init__.py b/surfsense_backend/app/podcasts/voices/__init__.py index 230b0b540..99560ab35 100644 --- a/surfsense_backend/app/podcasts/voices/__init__.py +++ b/surfsense_backend/app/podcasts/voices/__init__.py @@ -1,8 +1,7 @@ """Voices: the catalog of selectable TTS voices and the active provider. -Replaces the legacy hardcoded speaker-id voice maps. Callers obtain the -catalog via :func:`get_voice_catalog` and identify the configured provider via -:func:`provider_from_service`. +Callers obtain the catalog via :func:`get_voice_catalog` and identify the +configured provider via :func:`provider_from_service`. """ from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/voices/catalog.py b/surfsense_backend/app/podcasts/voices/catalog.py index 591812943..28914e742 100644 --- a/surfsense_backend/app/podcasts/voices/catalog.py +++ b/surfsense_backend/app/podcasts/voices/catalog.py @@ -1,9 +1,9 @@ """The voice catalog: look up and filter selectable voices. -A :class:`VoiceCatalog` is the single source of truth for which voices exist, -replacing the hardcoded speaker-id maps. Resolution uses it to pick defaults -for a brief, the API exposes it as picker options, and the renderer uses it to -turn a stored ``voice_id`` back into the provider-native reference. +A :class:`VoiceCatalog` is the single source of truth for which voices exist. +Resolution uses it to pick defaults for a brief, the API exposes it as picker +options, and the renderer uses it to turn a stored ``voice_id`` back into the +provider-native reference. """ from __future__ import annotations From 3eb7cdb2d8588ce75f9c5d932904e3d29b940444 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 21:44:50 +0200 Subject: [PATCH 33/50] refactor(podcasts): gate chat-triggered podcast on brief review --- .../builtins/deliverables/tools/podcast.py | 119 ++++++------------ .../deliverables/generate_podcast/emission.py | 20 +-- .../deliverables/generate_podcast/thinking.py | 24 ++-- 3 files changed, 59 insertions(+), 104 deletions(-) diff --git a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py index bfa3cc100..5b70eee81 100644 --- a/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py +++ b/surfsense_backend/app/agents/chat/multi_agent_chat/subagents/builtins/deliverables/tools/podcast.py @@ -1,11 +1,9 @@ """Factory for a podcast-generation tool. -Dispatches the heavy generation to Celery and then polls the podcast row -until it reaches a terminal status (READY/FAILED). The tool always -returns a real terminal ``Receipt`` — never a pending one. The wait is -bounded by the existing per-invocation safety net -(``SURFSENSE_SUBAGENT_INVOKE_TIMEOUT_SECONDS`` in multi-agent mode, -HTTP / process lifetime in single-agent mode). +Creates the podcast and proposes its brief (language, voices, length) inline, +then returns immediately with the row awaiting review. The user approves the +brief and the drafted transcript in the podcast panel before any audio is +rendered, so this tool never blocks on generation. """ import logging @@ -18,13 +16,12 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.agents.chat.multi_agent_chat.shared.receipts.command import with_receipt from app.agents.chat.multi_agent_chat.shared.receipts.receipt import make_receipt -from app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.deliverable_wait import ( - wait_for_deliverable, -) from app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.tools.thread_resolver import ( resolve_root_thread_id, ) -from app.db import Podcast, PodcastStatus, shielded_async_session +from app.db import PodcastStatus, shielded_async_session +from app.podcasts.generation.brief import propose_brief +from app.podcasts.service import PodcastService logger = logging.getLogger(__name__) @@ -45,7 +42,7 @@ def create_generate_podcast_tool( user_prompt: str | None = None, ) -> Command: """ - Generate a podcast from the provided content. + Prepare a podcast from the provided content for the user to review. Use this tool when the user asks to create, generate, or make a podcast. Common triggers include phrases like: @@ -55,100 +52,55 @@ def create_generate_podcast_tool( - "Make a podcast about..." - "Turn this into a podcast" + This sets up the podcast and proposes its brief (language, voices, + length). The user then reviews and approves the brief and transcript in + the podcast panel to produce the audio — generation does not start here. + Args: source_content: The text content to convert into a podcast. podcast_title: Title for the podcast (default: "SurfSense Podcast") - user_prompt: Optional instructions for podcast style, tone, or format. + user_prompt: Optional steer for what the episode should focus on. Returns: A dictionary containing: - - status: PodcastStatus value (pending, generating, or failed) - - podcast_id: The podcast ID for polling (when status is pending or generating) - - title: The podcast title - - message: Status message (or "error" field if status is failed) + - status: the podcast lifecycle status (awaiting_brief on success) + - podcast_id: the podcast ID to review in the panel + - title: the podcast title + - message: what the user should do next (or "error" when failed) """ try: # One DB session per tool call so parallel invocations never share an AsyncSession. async with shielded_async_session() as session: - podcast = Podcast( + service = PodcastService(session) + podcast = await service.create( title=podcast_title, - status=PodcastStatus.PENDING, search_space_id=search_space_id, thread_id=resolve_root_thread_id(runtime, thread_id), ) - session.add(podcast) + podcast.source_content = source_content + spec = await propose_brief( + session, + search_space_id=search_space_id, + focus=user_prompt, + ) + await service.attach_brief(podcast, spec) await session.commit() - await session.refresh(podcast) podcast_id = podcast.id - from app.tasks.celery_tasks.podcast_tasks import ( - generate_content_podcast_task, - ) - - task = generate_content_podcast_task.delay( - podcast_id=podcast_id, - source_content=source_content, - search_space_id=search_space_id, - user_prompt=user_prompt, - ) - logger.info( - "[generate_podcast] Created podcast %s, task: %s", + "[generate_podcast] Prepared podcast %s awaiting brief review", podcast_id, - task.id, ) - # Wait until the Celery worker flips the row to a terminal - # state. The wait is bounded only by the subagent invoke - # timeout (multi-agent) or HTTP lifetime (single-agent) — - # see app.agents.chat.multi_agent_chat.subagents.builtins.deliverables.deliverable_wait for details. - terminal_status, columns, elapsed = await wait_for_deliverable( - model=Podcast, - row_id=podcast_id, - columns=[Podcast.status, Podcast.file_location], - terminal_statuses={PodcastStatus.READY, PodcastStatus.FAILED}, - ) - - if terminal_status == PodcastStatus.READY: - file_location = columns[1] if columns else None - logger.info( - "[generate_podcast] Podcast %s READY in %.2fs (file=%s)", - podcast_id, - elapsed, - file_location, - ) - payload: dict[str, Any] = { - "status": PodcastStatus.READY.value, - "podcast_id": podcast_id, - "title": podcast_title, - "file_location": file_location, - "message": ("Podcast generated and saved to your podcast panel."), - } - return with_receipt( - payload=payload, - receipt=make_receipt( - route="deliverables", - type="podcast", - operation="generate", - status="success", - external_id=str(podcast_id), - preview=podcast_title, - ), - tool_call_id=runtime.tool_call_id, - ) - - # Only other terminal state is FAILED. - logger.warning( - "[generate_podcast] Podcast %s FAILED in %.2fs", - podcast_id, - elapsed, - ) - err = "Background worker reported FAILED status for this podcast." - payload = { - "status": PodcastStatus.FAILED.value, + payload: dict[str, Any] = { + "status": PodcastStatus.AWAITING_BRIEF.value, "podcast_id": podcast_id, "title": podcast_title, - "error": err, + "message": ( + "I've prepared a podcast brief — review the language, " + "voices, and length in the podcast panel, then approve it " + "to draft and generate the episode." + ), } return with_receipt( payload=payload, @@ -156,10 +108,9 @@ def create_generate_podcast_tool( route="deliverables", type="podcast", operation="generate", - status="failed", + status="success", external_id=str(podcast_id), preview=podcast_title, - error=err, ), tool_call_id=runtime.tool_call_id, ) diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py index f1a1e9c37..84f6ac4fc 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/emission.py @@ -15,22 +15,26 @@ def iter_completion_emission_frames( out = ctx.tool_output payload = out if isinstance(out, dict) else {"result": out} yield ctx.emit_tool_output_card(payload) - if isinstance(out, dict) and out.get("status") in ( + status = out.get("status") if isinstance(out, dict) else None + title = out.get("title", "Podcast") if isinstance(out, dict) else "Podcast" + if status in ( + "awaiting_brief", + "awaiting_review", "pending", - "generating", - "processing", + "drafting", + "rendering", ): yield ctx.streaming_service.format_terminal_info( - f"Podcast queued: {out.get('title', 'Podcast')}", + f"Podcast brief ready to review: {title}", "success", ) - elif isinstance(out, dict) and out.get("status") in ("ready", "success"): + elif status in ("ready", "success"): yield ctx.streaming_service.format_terminal_info( - f"Podcast generated successfully: {out.get('title', 'Podcast')}", + f"Podcast generated successfully: {title}", "success", ) - elif isinstance(out, dict) and out.get("status") in ("failed", "error"): - error_msg = out.get("error", "Unknown error") + elif status in ("failed", "error"): + error_msg = out.get("error", "Unknown error") if isinstance(out, dict) else "Unknown error" yield ctx.streaming_service.format_terminal_info( f"Podcast generation failed: {error_msg}", "error", diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py index 5cf78ea72..06dfc656b 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tools/deliverables/generate_podcast/thinking.py @@ -24,11 +24,11 @@ def resolve_start_thinking(tool_name: str, tool_input: Any) -> ToolStartThinking d.get("source_content", "") if isinstance(tool_input, dict) else "" ) return ToolStartThinking( - title="Generating podcast", + title="Preparing podcast", items=[ f"Title: {podcast_title}", f"Content: {content_len:,} characters", - "Preparing audio generation...", + "Proposing brief (language, voices, length)...", ], ) @@ -50,17 +50,17 @@ def resolve_completed_thinking( if isinstance(tool_output, dict) else "Podcast" ) - if podcast_status in ("pending", "generating", "processing"): + if podcast_status in ( + "awaiting_brief", + "awaiting_review", + "pending", + "drafting", + "rendering", + ): completed = [ f"Title: {podcast_title}", - "Podcast generation started", - "Processing in background...", - ] - elif podcast_status == "already_generating": - completed = [ - f"Title: {podcast_title}", - "Podcast already in progress", - "Please wait for it to complete", + "Brief ready for review", + "Approve it in the podcast panel to generate", ] elif podcast_status in ("failed", "error"): error_msg = ( @@ -79,4 +79,4 @@ def resolve_completed_thinking( ] else: completed = items - return ("Generating podcast", completed) + return ("Preparing podcast", completed) From 8b52cd0ac98ed7dc6f49ff5fe9cd720a30816391 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 21:44:57 +0200 Subject: [PATCH 34/50] refactor(podcasts): mount lifecycle router, drop legacy routes --- surfsense_backend/app/routes/__init__.py | 2 +- .../app/routes/podcasts_routes.py | 211 ------------------ 2 files changed, 1 insertion(+), 212 deletions(-) delete mode 100644 surfsense_backend/app/routes/podcasts_routes.py diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 5cc029884..a050651f6 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -4,6 +4,7 @@ from app.automations.api import router as automations_router from app.file_storage.api import router as file_storage_router from app.gateway import require_gateway_enabled from app.notifications.api import router as notifications_router +from app.podcasts.api import router as podcasts_router from .agent_action_log_route import router as agent_action_log_router from .agent_flags_route import router as agent_flags_router @@ -50,7 +51,6 @@ from .notes_routes import router as notes_router from .notion_add_connector_route import router as notion_add_connector_router from .obsidian_plugin_routes import router as obsidian_plugin_router from .onedrive_add_connector_route import router as onedrive_add_connector_router -from .podcasts_routes import router as podcasts_router from .prompts_routes import router as prompts_router from .public_chat_routes import router as public_chat_router from .rbac_routes import router as rbac_router diff --git a/surfsense_backend/app/routes/podcasts_routes.py b/surfsense_backend/app/routes/podcasts_routes.py deleted file mode 100644 index f991f698f..000000000 --- a/surfsense_backend/app/routes/podcasts_routes.py +++ /dev/null @@ -1,211 +0,0 @@ -""" -Podcast routes for CRUD operations and audio streaming. - -These routes support the podcast generation feature in new-chat. -Frontend polls GET /podcasts/{podcast_id} to check status field. -""" - -import os -from pathlib import Path - -from fastapi import APIRouter, Depends, HTTPException -from fastapi.responses import StreamingResponse -from sqlalchemy import select -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.ext.asyncio import AsyncSession - -from app.db import ( - Permission, - Podcast, - SearchSpace, - SearchSpaceMembership, - User, - get_async_session, -) -from app.schemas import PodcastRead -from app.users import current_active_user -from app.utils.rbac import check_permission - -router = APIRouter() - - -@router.get("/podcasts", response_model=list[PodcastRead]) -async def read_podcasts( - skip: int = 0, - limit: int = 100, - search_space_id: int | None = None, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - List podcasts the user has access to. - Requires PODCASTS_READ permission for the search space(s). - """ - if skip < 0 or limit < 1: - raise HTTPException(status_code=400, detail="Invalid pagination parameters") - try: - if search_space_id is not None: - # Check permission for specific search space - await check_permission( - session, - user, - search_space_id, - Permission.PODCASTS_READ.value, - "You don't have permission to read podcasts in this search space", - ) - result = await session.execute( - select(Podcast) - .filter(Podcast.search_space_id == search_space_id) - .offset(skip) - .limit(limit) - ) - else: - # Get podcasts from all search spaces user has membership in - result = await session.execute( - select(Podcast) - .join(SearchSpace) - .join(SearchSpaceMembership) - .filter(SearchSpaceMembership.user_id == user.id) - .offset(skip) - .limit(limit) - ) - return result.scalars().all() - except HTTPException: - raise - except SQLAlchemyError: - raise HTTPException( - status_code=500, detail="Database error occurred while fetching podcasts" - ) from None - - -@router.get("/podcasts/{podcast_id}", response_model=PodcastRead) -async def read_podcast( - podcast_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Get a specific podcast by ID. - - Requires authentication with PODCASTS_READ permission. - For public podcast access, use /public/{share_token}/podcasts/{podcast_id}/stream - """ - try: - result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) - podcast = result.scalars().first() - - if not podcast: - raise HTTPException( - status_code=404, - detail="Podcast not found", - ) - - await check_permission( - session, - user, - podcast.search_space_id, - Permission.PODCASTS_READ.value, - "You don't have permission to read podcasts in this search space", - ) - - return PodcastRead.from_orm_with_entries(podcast) - except HTTPException as he: - raise he - except SQLAlchemyError: - raise HTTPException( - status_code=500, detail="Database error occurred while fetching podcast" - ) from None - - -@router.delete("/podcasts/{podcast_id}", response_model=dict) -async def delete_podcast( - podcast_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Delete a podcast. - Requires PODCASTS_DELETE permission for the search space. - """ - try: - result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) - db_podcast = result.scalars().first() - - if not db_podcast: - raise HTTPException(status_code=404, detail="Podcast not found") - - # Check permission for the search space - await check_permission( - session, - user, - db_podcast.search_space_id, - Permission.PODCASTS_DELETE.value, - "You don't have permission to delete podcasts in this search space", - ) - - await session.delete(db_podcast) - await session.commit() - return {"message": "Podcast deleted successfully"} - except HTTPException as he: - raise he - except SQLAlchemyError: - await session.rollback() - raise HTTPException( - status_code=500, detail="Database error occurred while deleting podcast" - ) from None - - -@router.get("/podcasts/{podcast_id}/stream") -@router.get("/podcasts/{podcast_id}/audio") -async def stream_podcast( - podcast_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """ - Stream a podcast audio file. - - Requires authentication with PODCASTS_READ permission. - For public podcast access, use /public/{share_token}/podcasts/{podcast_id}/stream - - Note: Both /stream and /audio endpoints are supported for compatibility. - """ - try: - result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) - podcast = result.scalars().first() - - if not podcast: - raise HTTPException(status_code=404, detail="Podcast not found") - - await check_permission( - session, - user, - podcast.search_space_id, - Permission.PODCASTS_READ.value, - "You don't have permission to access podcasts in this search space", - ) - - file_path = podcast.file_location - - if not file_path or not os.path.isfile(file_path): - raise HTTPException(status_code=404, detail="Podcast audio file not found") - - def iterfile(): - with open(file_path, mode="rb") as file_like: - yield from file_like - - return StreamingResponse( - iterfile(), - media_type="audio/mpeg", - headers={ - "Accept-Ranges": "bytes", - "Content-Disposition": f"inline; filename={Path(file_path).name}", - }, - ) - - except HTTPException as he: - raise he - except Exception as e: - raise HTTPException( - status_code=500, detail=f"Error streaming podcast: {e!s}" - ) from e From 003d1d2b95511dd83e82cc1bfe399dcae62a4b87 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 21:44:57 +0200 Subject: [PATCH 35/50] refactor(podcasts): stream public podcast audio via storage backend --- surfsense_backend/app/routes/public_chat_routes.py | 11 +++++++++++ surfsense_backend/app/services/public_chat_service.py | 5 +++++ 2 files changed, 16 insertions(+) diff --git a/surfsense_backend/app/routes/public_chat_routes.py b/surfsense_backend/app/routes/public_chat_routes.py index 3181e117c..53f4c2651 100644 --- a/surfsense_backend/app/routes/public_chat_routes.py +++ b/surfsense_backend/app/routes/public_chat_routes.py @@ -99,6 +99,17 @@ async def stream_public_podcast( if not podcast_info: raise HTTPException(status_code=404, detail="Podcast not found") + storage_key = podcast_info.get("storage_key") + if storage_key: + from app.file_storage.factory import get_storage_backend + + return StreamingResponse( + get_storage_backend().open_stream(storage_key), + media_type="audio/mpeg", + headers={"Accept-Ranges": "bytes"}, + ) + + # Legacy fallback for snapshots taken before the storage migration. file_path = podcast_info.get("file_path") if not file_path or not os.path.isfile(file_path): diff --git a/surfsense_backend/app/services/public_chat_service.py b/surfsense_backend/app/services/public_chat_service.py index e4e0dd33a..d17f411b8 100644 --- a/surfsense_backend/app/services/public_chat_service.py +++ b/surfsense_backend/app/services/public_chat_service.py @@ -337,6 +337,9 @@ async def _get_podcast_for_snapshot( "original_id": podcast.id, "title": podcast.title, "transcript": podcast.podcast_transcript, + "storage_backend": podcast.storage_backend, + "storage_key": podcast.storage_key, + # Legacy fallback for rows rendered before the storage migration. "file_path": podcast.file_location, } @@ -717,6 +720,8 @@ async def clone_from_snapshot( new_podcast = Podcast( title=podcast_info.get("title", "Cloned Podcast"), podcast_transcript=podcast_info.get("transcript"), + storage_backend=podcast_info.get("storage_backend"), + storage_key=podcast_info.get("storage_key"), file_location=podcast_info.get("file_path"), status=PodcastStatus.READY, search_space_id=target_search_space_id, From 97ab7a88fda402a3fa9319ecaf7768e54df59459 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 21:45:04 +0200 Subject: [PATCH 36/50] refactor(podcasts): remove legacy podcaster agent, task, and schema --- .../app/agents/podcaster/__init__.py | 8 - .../app/agents/podcaster/configuration.py | 29 --- .../app/agents/podcaster/graph.py | 29 --- .../app/agents/podcaster/nodes.py | 195 --------------- .../app/agents/podcaster/prompts.py | 122 --------- .../app/agents/podcaster/state.py | 43 ---- .../app/agents/podcaster/utils.py | 84 ------- .../app/agents/video_presentation/__init__.py | 5 +- surfsense_backend/app/celery_app.py | 1 - surfsense_backend/app/schemas/__init__.py | 5 - surfsense_backend/app/schemas/podcasts.py | 66 ----- .../app/tasks/celery_tasks/podcast_tasks.py | 236 ------------------ 12 files changed, 2 insertions(+), 821 deletions(-) delete mode 100644 surfsense_backend/app/agents/podcaster/__init__.py delete mode 100644 surfsense_backend/app/agents/podcaster/configuration.py delete mode 100644 surfsense_backend/app/agents/podcaster/graph.py delete mode 100644 surfsense_backend/app/agents/podcaster/nodes.py delete mode 100644 surfsense_backend/app/agents/podcaster/prompts.py delete mode 100644 surfsense_backend/app/agents/podcaster/state.py delete mode 100644 surfsense_backend/app/agents/podcaster/utils.py delete mode 100644 surfsense_backend/app/schemas/podcasts.py delete mode 100644 surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py diff --git a/surfsense_backend/app/agents/podcaster/__init__.py b/surfsense_backend/app/agents/podcaster/__init__.py deleted file mode 100644 index 8459b2977..000000000 --- a/surfsense_backend/app/agents/podcaster/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -"""New LangGraph Agent. - -This module defines a custom graph. -""" - -from .graph import graph - -__all__ = ["graph"] diff --git a/surfsense_backend/app/agents/podcaster/configuration.py b/surfsense_backend/app/agents/podcaster/configuration.py deleted file mode 100644 index 6a903f9df..000000000 --- a/surfsense_backend/app/agents/podcaster/configuration.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Define the configurable parameters for the agent.""" - -from __future__ import annotations - -from dataclasses import dataclass, fields - -from langchain_core.runnables import RunnableConfig - - -@dataclass(kw_only=True) -class Configuration: - """The configuration for the agent.""" - - # Changeme: Add configurable values here! - # these values can be pre-set when you - # create assistants (https://langchain-ai.github.io/langgraph/cloud/how-tos/configuration_cloud/) - # and when you invoke the graph - podcast_title: str - search_space_id: int - user_prompt: str | None = None - - @classmethod - def from_runnable_config( - cls, config: RunnableConfig | None = None - ) -> Configuration: - """Create a Configuration instance from a RunnableConfig object.""" - configurable = (config.get("configurable") or {}) if config else {} - _fields = {f.name for f in fields(cls) if f.init} - return cls(**{k: v for k, v in configurable.items() if k in _fields}) diff --git a/surfsense_backend/app/agents/podcaster/graph.py b/surfsense_backend/app/agents/podcaster/graph.py deleted file mode 100644 index 94045566b..000000000 --- a/surfsense_backend/app/agents/podcaster/graph.py +++ /dev/null @@ -1,29 +0,0 @@ -from langgraph.graph import StateGraph - -from .configuration import Configuration -from .nodes import create_merged_podcast_audio, create_podcast_transcript -from .state import State - - -def build_graph(): - # Define a new graph - workflow = StateGraph(State, config_schema=Configuration) - - # Add the node to the graph - workflow.add_node("create_podcast_transcript", create_podcast_transcript) - workflow.add_node("create_merged_podcast_audio", create_merged_podcast_audio) - - # Set the entrypoint as `call_model` - workflow.add_edge("__start__", "create_podcast_transcript") - workflow.add_edge("create_podcast_transcript", "create_merged_podcast_audio") - workflow.add_edge("create_merged_podcast_audio", "__end__") - - # Compile the workflow into an executable graph - graph = workflow.compile() - graph.name = "Surfsense Podcaster" # This defines the custom name in LangSmith - - return graph - - -# Compile the graph once when the module is loaded -graph = build_graph() diff --git a/surfsense_backend/app/agents/podcaster/nodes.py b/surfsense_backend/app/agents/podcaster/nodes.py deleted file mode 100644 index d1f140a44..000000000 --- a/surfsense_backend/app/agents/podcaster/nodes.py +++ /dev/null @@ -1,195 +0,0 @@ -import asyncio -import json -import os -import uuid -from pathlib import Path -from typing import Any - -from ffmpeg.asyncio import FFmpeg -from langchain_core.messages import HumanMessage, SystemMessage -from langchain_core.runnables import RunnableConfig -from litellm import aspeech - -from app.config import config as app_config -from app.services.kokoro_tts_service import get_kokoro_tts_service -from app.services.llm_service import get_agent_llm -from app.utils.content_utils import extract_text_content, strip_markdown_fences - -from .configuration import Configuration -from .prompts import get_podcast_generation_prompt -from .state import PodcastTranscriptEntry, PodcastTranscripts, State -from .utils import get_voice_for_provider - - -async def create_podcast_transcript( - state: State, config: RunnableConfig -) -> dict[str, Any]: - """Generate the podcast transcript from the source content.""" - configuration = Configuration.from_runnable_config(config) - search_space_id = configuration.search_space_id - user_prompt = configuration.user_prompt - - llm = await get_agent_llm(state.db_session, search_space_id) - if not llm: - error_message = f"No agent LLM configured for search space {search_space_id}" - print(error_message) - raise RuntimeError(error_message) - - prompt = get_podcast_generation_prompt(user_prompt) - messages = [ - SystemMessage(content=prompt), - HumanMessage( - content=f"{state.source_content}" - ), - ] - llm_response = await llm.ainvoke(messages) - - # Reasoning models may return content as blocks; normalise to a string. - content = strip_markdown_fences(extract_text_content(llm_response.content)) - - try: - podcast_transcript = PodcastTranscripts.model_validate(json.loads(content)) - except (json.JSONDecodeError, TypeError, ValueError) as e: - print(f"Direct JSON parsing failed, trying fallback approach: {e!s}") - - try: - json_start = content.find("{") - json_end = content.rfind("}") + 1 - if json_start >= 0 and json_end > json_start: - json_str = content[json_start:json_end] - parsed_data = json.loads(json_str) - podcast_transcript = PodcastTranscripts.model_validate(parsed_data) - print("Successfully parsed podcast transcript using fallback approach") - else: - error_message = f"Could not find valid JSON in LLM response. Raw response: {content}" - print(error_message) - raise ValueError(error_message) - - except (json.JSONDecodeError, TypeError, ValueError) as e2: - error_message = f"Error parsing LLM response (fallback also failed): {e2!s}" - print(f"Error parsing LLM response: {e2!s}") - print(f"Raw response: {content}") - raise - - return {"podcast_transcript": podcast_transcript.podcast_transcripts} - - -async def create_merged_podcast_audio( - state: State, config: RunnableConfig -) -> dict[str, Any]: - """Generate audio for each transcript and merge them into a single podcast file.""" - starting_transcript = PodcastTranscriptEntry( - speaker_id=1, dialog="Welcome to Surfsense Podcast." - ) - - transcript = state.podcast_transcript - - # transcript may be a PodcastTranscripts object or already a list. - if hasattr(transcript, "podcast_transcripts"): - transcript_entries = transcript.podcast_transcripts - else: - transcript_entries = transcript - - merged_transcript = [starting_transcript, *transcript_entries] - - temp_dir = Path("temp_audio") - temp_dir.mkdir(exist_ok=True) - - session_id = str(uuid.uuid4()) - output_path = f"podcasts/{session_id}_podcast.mp3" - os.makedirs("podcasts", exist_ok=True) - - audio_files = [] - - async def generate_speech_for_segment(segment, index): - if hasattr(segment, "speaker_id"): - speaker_id = segment.speaker_id - dialog = segment.dialog - else: - speaker_id = segment.get("speaker_id", 0) - dialog = segment.get("dialog", "") - - voice = get_voice_for_provider(app_config.TTS_SERVICE, speaker_id) - - if app_config.TTS_SERVICE == "local/kokoro": - filename = f"{temp_dir}/{session_id}_{index}.wav" - else: - filename = f"{temp_dir}/{session_id}_{index}.mp3" - - try: - if app_config.TTS_SERVICE == "local/kokoro": - kokoro_service = await get_kokoro_tts_service( - lang_code="a" - ) # American English - audio_path = await kokoro_service.generate_speech( - text=dialog, voice=voice, speed=1.0, output_path=filename - ) - return audio_path - else: - if app_config.TTS_SERVICE_API_BASE: - response = await aspeech( - model=app_config.TTS_SERVICE, - api_base=app_config.TTS_SERVICE_API_BASE, - api_key=app_config.TTS_SERVICE_API_KEY, - voice=voice, - input=dialog, - max_retries=2, - timeout=600, - ) - else: - response = await aspeech( - model=app_config.TTS_SERVICE, - api_key=app_config.TTS_SERVICE_API_KEY, - voice=voice, - input=dialog, - max_retries=2, - timeout=600, - ) - - with open(filename, "wb") as f: - f.write(response.content) - - return filename - except Exception as e: - print(f"Error generating speech for segment {index}: {e!s}") - raise - - tasks = [ - generate_speech_for_segment(segment, i) - for i, segment in enumerate(merged_transcript) - ] - audio_files = await asyncio.gather(*tasks) - - try: - ffmpeg = FFmpeg().option("y") - for audio_file in audio_files: - ffmpeg = ffmpeg.input(audio_file) - - filter_complex = [] - for i in range(len(audio_files)): - filter_complex.append(f"[{i}:0]") - - filter_complex_str = ( - "".join(filter_complex) + f"concat=n={len(audio_files)}:v=0:a=1[outa]" - ) - ffmpeg = ffmpeg.option("filter_complex", filter_complex_str) - ffmpeg = ffmpeg.output(output_path, map="[outa]") - await ffmpeg.execute() - - print(f"Successfully created podcast audio: {output_path}") - - except Exception as e: - print(f"Error merging audio files: {e!s}") - raise - finally: - for audio_file in audio_files: - try: - os.remove(audio_file) - except Exception as e: - print(f"Error removing audio file {audio_file}: {e!s}") - pass - - return { - "podcast_transcript": merged_transcript, - "final_podcast_file_path": output_path, - } diff --git a/surfsense_backend/app/agents/podcaster/prompts.py b/surfsense_backend/app/agents/podcaster/prompts.py deleted file mode 100644 index efaa79788..000000000 --- a/surfsense_backend/app/agents/podcaster/prompts.py +++ /dev/null @@ -1,122 +0,0 @@ -import datetime - - -def get_podcast_generation_prompt(user_prompt: str | None = None): - return f""" -Today's date: {datetime.datetime.now().strftime("%Y-%m-%d")} - -You are a master podcast scriptwriter, adept at transforming diverse input content into a lively, engaging, and natural-sounding conversation between two distinct podcast hosts. Your primary objective is to craft authentic, flowing dialogue that captures the spontaneity and chemistry of a real podcast discussion, completely avoiding any hint of robotic scripting or stiff formality. Think dynamic interplay, not just information delivery. - -{ - f''' -You **MUST** strictly adhere to the following user instruction while generating the podcast script: - -{user_prompt} - -''' - if user_prompt - else "" - } - - -- '': A block of text containing the information to be discussed in the podcast. This could be research findings, an article summary, a detailed outline, user chat history related to the topic, or any other relevant raw information. The content might be unstructured but serves as the factual basis for the podcast dialogue. - - - -A JSON object containing the podcast transcript with alternating speakers: -{{ - "podcast_transcripts": [ - {{ - "speaker_id": 0, - "dialog": "Speaker 0 dialog here" - }}, - {{ - "speaker_id": 1, - "dialog": "Speaker 1 dialog here" - }}, - {{ - "speaker_id": 0, - "dialog": "Speaker 0 dialog here" - }}, - {{ - "speaker_id": 1, - "dialog": "Speaker 1 dialog here" - }} - ] -}} - - - -1. **Establish Distinct & Consistent Host Personas:** - * **Speaker 0 (Lead Host):** Drives the conversation forward, introduces segments, poses key questions derived from the source content, and often summarizes takeaways. Maintain a guiding, clear, and engaging tone. - * **Speaker 1 (Co-Host/Expert):** Offers deeper insights, provides alternative viewpoints or elaborations on the source content, asks clarifying or challenging questions, and shares relevant anecdotes or examples. Adopt a complementary tone (e.g., analytical, enthusiastic, reflective, slightly skeptical). - * **Consistency is Key:** Ensure each speaker maintains their distinct voice, vocabulary choice, sentence structure, and perspective throughout the entire script. Avoid having them sound interchangeable. Their interaction should feel like a genuine partnership. - -2. **Craft Natural & Dynamic Dialogue:** - * **Emulate Real Conversation:** Use contractions (e.g., "don't", "it's"), interjections ("Oh!", "Wow!", "Hmm"), discourse markers ("you know", "right?", "well"), and occasional natural pauses or filler words. Avoid overly formal language or complex sentence structures typical of written text. - * **Foster Interaction & Chemistry:** Write dialogue where speakers genuinely react *to each other*. They should build on points ("Exactly, and that reminds me..."), ask follow-up questions ("Could you expand on that?"), express agreement/disagreement respectfully ("That's a fair point, but have you considered...?"), and show active listening. - * **Vary Rhythm & Pace:** Mix short, punchy lines with longer, more explanatory ones. Vary sentence beginnings. Use questions to break up exposition. The rhythm should feel spontaneous, not monotonous. - * **Inject Personality & Relatability:** Allow for appropriate humor, moments of surprise or curiosity, brief personal reflections ("I actually experienced something similar..."), or relatable asides that fit the hosts' personas and the topic. Lightly reference past discussions if it enhances context ("Remember last week when we touched on...?"). - -3. **Structure for Flow and Listener Engagement:** - * **Natural Beginning:** Start with dialogue that flows naturally after an introduction (which will be added manually). Avoid redundant greetings or podcast name mentions since these will be added separately. - * **Logical Progression & Signposting:** Guide the listener through the information smoothly. Use clear transitions to link different ideas or segments ("So, now that we've covered X, let's dive into Y...", "That actually brings me to another key finding..."). Ensure topics flow logically from one to the next. - * **Meaningful Conclusion:** Summarize the key takeaways or main points discussed, reinforcing the core message derived from the source content. End with a final thought, a lingering question for the audience, or a brief teaser for what's next, providing a sense of closure. Avoid abrupt endings. - -4. **Integrate Source Content Seamlessly & Accurately:** - * **Translate, Don't Recite:** Rephrase information from the `` into conversational language suitable for each host's persona. Avoid directly copying dense sentences or technical jargon without explanation. The goal is discussion, not narration. - * **Explain & Contextualize:** Use analogies, simple examples, storytelling, or have one host ask clarifying questions (acting as a listener surrogate) to break down complex ideas from the source. - * **Weave Information Naturally:** Integrate facts, data, or key points from the source *within* the dialogue, not as standalone, undigested blocks. Attribute information conversationally where appropriate ("The research mentioned...", "Apparently, the key factor is..."). - * **Balance Depth & Accessibility:** Ensure the conversation is informative and factually accurate based on the source content, but prioritize clear communication and engaging delivery over exhaustive technical detail. Make it understandable and interesting for a general audience. - -5. **Length & Pacing:** - * **Six-Minute Duration:** Create a transcript that, when read at a natural speaking pace, would result in approximately 6 minutes of audio. Typically, this means around 1000 words total (based on average speaking rate of 150 words per minute). - * **Concise Speaking Turns:** Keep most speaking turns relatively brief and focused. Aim for a natural back-and-forth rhythm rather than extended monologues. - * **Essential Content Only:** Prioritize the most important information from the source content. Focus on quality over quantity, ensuring every line contributes meaningfully to the topic. - - - -Input: "Quantum computing uses quantum bits or qubits which can exist in multiple states simultaneously due to superposition." - -Output: -{{ - "podcast_transcripts": [ - {{ - "speaker_id": 0, - "dialog": "Today we're diving into the mind-bending world of quantum computing. You know, this is a topic I've been excited to cover for weeks." - }}, - {{ - "speaker_id": 1, - "dialog": "Same here! And I know our listeners have been asking for it. But I have to admit, the concept of quantum computing makes my head spin a little. Can we start with the basics?" - }}, - {{ - "speaker_id": 0, - "dialog": "Absolutely. So regular computers use bits, right? Little on-off switches that are either 1 or 0. But quantum computers use something called qubits, and this is where it gets fascinating." - }}, - {{ - "speaker_id": 1, - "dialog": "Wait, what makes qubits so special compared to regular bits?" - }}, - {{ - "speaker_id": 0, - "dialog": "The magic is in something called superposition. These qubits can exist in multiple states at the same time, not just 1 or 0." - }}, - {{ - "speaker_id": 1, - "dialog": "That sounds impossible! How would you even picture that?" - }}, - {{ - "speaker_id": 0, - "dialog": "Think of it like a coin spinning in the air. Before it lands, is it heads or tails?" - }}, - {{ - "speaker_id": 1, - "dialog": "Well, it's... neither? Or I guess both, until it lands? Oh, I think I see where you're going with this." - }} - ] -}} - - -Transform the source material into a lively and engaging podcast conversation. Craft dialogue that showcases authentic host chemistry and natural interaction (including occasional disagreement, building on points, or asking follow-up questions). Use varied speech patterns reflecting real human conversation, ensuring the final script effectively educates *and* entertains the listener while keeping within a 5-minute audio duration. - -""" diff --git a/surfsense_backend/app/agents/podcaster/state.py b/surfsense_backend/app/agents/podcaster/state.py deleted file mode 100644 index 62eb0537b..000000000 --- a/surfsense_backend/app/agents/podcaster/state.py +++ /dev/null @@ -1,43 +0,0 @@ -"""Define the state structures for the agent.""" - -from __future__ import annotations - -from dataclasses import dataclass - -from pydantic import BaseModel, Field -from sqlalchemy.ext.asyncio import AsyncSession - - -class PodcastTranscriptEntry(BaseModel): - """ - Represents a single entry in a podcast transcript. - """ - - speaker_id: int = Field(..., description="The ID of the speaker (0 or 1)") - dialog: str = Field(..., description="The dialog text spoken by the speaker") - - -class PodcastTranscripts(BaseModel): - """ - Represents the full podcast transcript structure. - """ - - podcast_transcripts: list[PodcastTranscriptEntry] = Field( - ..., description="List of transcript entries with alternating speakers" - ) - - -@dataclass -class State: - """Defines the input state for the agent, representing a narrower interface to the outside world. - - This class is used to define the initial state and structure of incoming data. - See: https://langchain-ai.github.io/langgraph/concepts/low_level/#state - for more information. - """ - - # Runtime context - db_session: AsyncSession - source_content: str - podcast_transcript: list[PodcastTranscriptEntry] | None = None - final_podcast_file_path: str | None = None diff --git a/surfsense_backend/app/agents/podcaster/utils.py b/surfsense_backend/app/agents/podcaster/utils.py deleted file mode 100644 index 96ea1d51e..000000000 --- a/surfsense_backend/app/agents/podcaster/utils.py +++ /dev/null @@ -1,84 +0,0 @@ -def get_voice_for_provider(provider: str, speaker_id: int) -> dict | str: - """ - Get the appropriate voice configuration based on the TTS provider and speaker ID. - - Args: - provider: The TTS provider (e.g., "openai/tts-1", "vertex_ai/test") - speaker_id: The ID of the speaker (0-5) - - Returns: - Voice configuration - string for OpenAI, dict for Vertex AI - """ - if provider == "local/kokoro": - # Kokoro voice mapping - https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices - kokoro_voices = { - 0: "am_adam", # Default/intro voice - 1: "af_bella", # First speaker - } - return kokoro_voices.get(speaker_id, "af_heart") - - # Extract provider type from the model string - provider_type = ( - provider.split("/")[0].lower() if "/" in provider else provider.lower() - ) - - if provider_type == "openai": - # OpenAI voice mapping - simple string values - openai_voices = { - 0: "alloy", # Default/intro voice - 1: "echo", # First speaker - 2: "fable", # Second speaker - 3: "onyx", # Third speaker - 4: "nova", # Fourth speaker - 5: "shimmer", # Fifth speaker - } - return openai_voices.get(speaker_id, "alloy") - - elif provider_type == "vertex_ai": - # Vertex AI voice mapping - dict with languageCode and name - vertex_voices = { - 0: { - "languageCode": "en-US", - "name": "en-US-Studio-O", - }, - 1: { - "languageCode": "en-US", - "name": "en-US-Studio-M", - }, - 2: { - "languageCode": "en-UK", - "name": "en-UK-Studio-A", - }, - 3: { - "languageCode": "en-UK", - "name": "en-UK-Studio-B", - }, - 4: { - "languageCode": "en-AU", - "name": "en-AU-Studio-A", - }, - 5: { - "languageCode": "en-AU", - "name": "en-AU-Studio-B", - }, - } - return vertex_voices.get(speaker_id, vertex_voices[0]) - elif provider_type == "azure": - # OpenAI voice mapping - simple string values - azure_voices = { - 0: "alloy", # Default/intro voice - 1: "echo", # First speaker - 2: "fable", # Second speaker - 3: "onyx", # Third speaker - 4: "nova", # Fourth speaker - 5: "shimmer", # Fifth speaker - } - return azure_voices.get(speaker_id, "alloy") - - else: - # Default fallback to OpenAI format for unknown providers - default_voices = { - 0: {}, - 1: {}, - } - return default_voices.get(speaker_id, default_voices[0]) diff --git a/surfsense_backend/app/agents/video_presentation/__init__.py b/surfsense_backend/app/agents/video_presentation/__init__.py index caf885218..8a51eb0ef 100644 --- a/surfsense_backend/app/agents/video_presentation/__init__.py +++ b/surfsense_backend/app/agents/video_presentation/__init__.py @@ -1,8 +1,7 @@ """Video Presentation LangGraph Agent. -This module defines a graph for generating video presentations -from source content, similar to the podcaster agent but producing -slide-based video presentations with TTS narration. +This module defines a graph for generating slide-based video presentations +from source content, with TTS narration per slide. """ from .graph import graph diff --git a/surfsense_backend/app/celery_app.py b/surfsense_backend/app/celery_app.py index f72d1385a..093c65450 100644 --- a/surfsense_backend/app/celery_app.py +++ b/surfsense_backend/app/celery_app.py @@ -181,7 +181,6 @@ celery_app = Celery( backend=CELERY_RESULT_BACKEND, include=[ "app.tasks.celery_tasks.document_tasks", - "app.tasks.celery_tasks.podcast_tasks", "app.podcasts.tasks.draft", "app.podcasts.tasks.render", "app.tasks.celery_tasks.video_presentation_tasks", diff --git a/surfsense_backend/app/schemas/__init__.py b/surfsense_backend/app/schemas/__init__.py index fdf34672b..cbd7e3f92 100644 --- a/surfsense_backend/app/schemas/__init__.py +++ b/surfsense_backend/app/schemas/__init__.py @@ -68,7 +68,6 @@ from .new_llm_config import ( NewLLMConfigRead, NewLLMConfigUpdate, ) -from .podcasts import PodcastBase, PodcastCreate, PodcastRead, PodcastUpdate from .rbac_schemas import ( InviteAcceptRequest, InviteAcceptResponse, @@ -232,10 +231,6 @@ __all__ = [ "PermissionInfo", "PermissionsListResponse", # Podcast schemas - "PodcastBase", - "PodcastCreate", - "PodcastRead", - "PodcastUpdate", "RefreshTokenRequest", "RefreshTokenResponse", # Report schemas diff --git a/surfsense_backend/app/schemas/podcasts.py b/surfsense_backend/app/schemas/podcasts.py deleted file mode 100644 index d41f1ca36..000000000 --- a/surfsense_backend/app/schemas/podcasts.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Podcast schemas for API responses.""" - -from datetime import datetime -from enum import StrEnum -from typing import Any - -from pydantic import BaseModel - - -class PodcastStatusEnum(StrEnum): - PENDING = "pending" - GENERATING = "generating" - READY = "ready" - FAILED = "failed" - - -class PodcastBase(BaseModel): - """Base podcast schema.""" - - title: str - podcast_transcript: list[dict[str, Any]] | None = None - file_location: str | None = None - search_space_id: int - - -class PodcastCreate(PodcastBase): - """Schema for creating a podcast.""" - - pass - - -class PodcastUpdate(BaseModel): - """Schema for updating a podcast.""" - - title: str | None = None - podcast_transcript: list[dict[str, Any]] | None = None - file_location: str | None = None - - -class PodcastRead(PodcastBase): - """Schema for reading a podcast.""" - - id: int - status: PodcastStatusEnum = PodcastStatusEnum.READY - created_at: datetime - transcript_entries: int | None = None - - class Config: - from_attributes = True - - @classmethod - def from_orm_with_entries(cls, obj): - """Create PodcastRead with transcript_entries computed.""" - data = { - "id": obj.id, - "title": obj.title, - "podcast_transcript": obj.podcast_transcript, - "file_location": obj.file_location, - "search_space_id": obj.search_space_id, - "status": obj.status, - "created_at": obj.created_at, - "transcript_entries": len(obj.podcast_transcript) - if obj.podcast_transcript - else None, - } - return cls(**data) diff --git a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py b/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py deleted file mode 100644 index 8b311576e..000000000 --- a/surfsense_backend/app/tasks/celery_tasks/podcast_tasks.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Celery tasks for podcast generation.""" - -import asyncio -import logging -import sys -from contextlib import asynccontextmanager - -from sqlalchemy import select - -from app.agents.podcaster.graph import graph as podcaster_graph -from app.agents.podcaster.state import State as PodcasterState -from app.celery_app import celery_app -from app.config import config as app_config -from app.db import Podcast, PodcastStatus -from app.services.billable_calls import ( - BillingSettlementError, - QuotaInsufficientError, - _resolve_agent_billing_for_search_space, - billable_call, -) -from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task - -logger = logging.getLogger(__name__) - -if sys.platform.startswith("win"): - try: - asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy()) - except AttributeError: - logger.warning( - "WindowsProactorEventLoopPolicy is unavailable; async subprocess support may fail." - ) - - -# ============================================================================= -# Content-based podcast generation (for new-chat) -# ============================================================================= - - -@asynccontextmanager -async def _celery_billable_session(): - """Session factory used by billable_call inside the Celery worker loop.""" - async with get_celery_session_maker()() as session: - yield session - - -@celery_app.task(name="generate_content_podcast", bind=True) -def generate_content_podcast_task( - self, - podcast_id: int, - source_content: str, - search_space_id: int, - user_prompt: str | None = None, -) -> dict: - """ - Celery task to generate podcast from source content. - Updates existing podcast record created by the tool. - """ - try: - return run_async_celery_task( - lambda: _generate_content_podcast( - podcast_id, - source_content, - search_space_id, - user_prompt, - ) - ) - except Exception as e: - logger.error(f"Error generating content podcast: {e!s}") - try: - run_async_celery_task(lambda: _mark_podcast_failed(podcast_id)) - except Exception: - logger.exception("Failed to mark podcast %s as failed", podcast_id) - return {"status": "failed", "podcast_id": podcast_id} - - -async def _mark_podcast_failed(podcast_id: int) -> None: - """Mark a podcast as failed in the database.""" - async with get_celery_session_maker()() as session: - try: - result = await session.execute( - select(Podcast).filter(Podcast.id == podcast_id) - ) - podcast = result.scalars().first() - if podcast: - podcast.status = PodcastStatus.FAILED - await session.commit() - except Exception as e: - logger.error(f"Failed to mark podcast as failed: {e}") - - -async def _generate_content_podcast( - podcast_id: int, - source_content: str, - search_space_id: int, - user_prompt: str | None = None, -) -> dict: - """Generate content-based podcast and update existing record.""" - async with get_celery_session_maker()() as session: - result = await session.execute(select(Podcast).filter(Podcast.id == podcast_id)) - podcast = result.scalars().first() - - if not podcast: - raise ValueError(f"Podcast {podcast_id} not found") - - try: - podcast.status = PodcastStatus.GENERATING - await session.commit() - - try: - ( - owner_user_id, - billing_tier, - base_model, - ) = await _resolve_agent_billing_for_search_space( - session, - search_space_id, - thread_id=podcast.thread_id, - ) - except ValueError as resolve_err: - logger.error( - "Podcast %s: cannot resolve billing for search_space=%s: %s", - podcast.id, - search_space_id, - resolve_err, - ) - podcast.status = PodcastStatus.FAILED - await session.commit() - return { - "status": "failed", - "podcast_id": podcast.id, - "reason": "billing_resolution_failed", - } - - graph_config = { - "configurable": { - "podcast_title": podcast.title, - "search_space_id": search_space_id, - "user_prompt": user_prompt, - } - } - - initial_state = PodcasterState( - source_content=source_content, - db_session=session, - ) - - try: - async with billable_call( - user_id=owner_user_id, - search_space_id=search_space_id, - billing_tier=billing_tier, - base_model=base_model, - quota_reserve_micros_override=app_config.QUOTA_DEFAULT_PODCAST_RESERVE_MICROS, - usage_type="podcast_generation", - call_details={ - "podcast_id": podcast.id, - "title": podcast.title, - "thread_id": podcast.thread_id, - }, - billable_session_factory=_celery_billable_session, - ): - graph_result = await podcaster_graph.ainvoke( - initial_state, config=graph_config - ) - except QuotaInsufficientError as exc: - logger.info( - "Podcast %s denied: out of premium credits " - "(used=%d/%d remaining=%d)", - podcast.id, - exc.used_micros, - exc.limit_micros, - exc.remaining_micros, - ) - podcast.status = PodcastStatus.FAILED - await session.commit() - return { - "status": "failed", - "podcast_id": podcast.id, - "reason": "premium_quota_exhausted", - } - except BillingSettlementError: - logger.exception( - "Podcast %s: premium billing settlement failed", - podcast.id, - ) - podcast.status = PodcastStatus.FAILED - await session.commit() - return { - "status": "failed", - "podcast_id": podcast.id, - "reason": "billing_settlement_failed", - } - - podcast_transcript = graph_result.get("podcast_transcript", []) - file_path = graph_result.get("final_podcast_file_path", "") - - serializable_transcript = [] - for entry in podcast_transcript: - if hasattr(entry, "speaker_id"): - serializable_transcript.append( - {"speaker_id": entry.speaker_id, "dialog": entry.dialog} - ) - else: - serializable_transcript.append( - { - "speaker_id": entry.get("speaker_id", 0), - "dialog": entry.get("dialog", ""), - } - ) - - podcast.podcast_transcript = serializable_transcript - podcast.file_location = file_path - podcast.status = PodcastStatus.READY - logger.info( - "Podcast %s: committing READY transcript_entries=%d file=%s", - podcast.id, - len(serializable_transcript), - file_path, - ) - await session.commit() - logger.info("Podcast %s: READY commit complete", podcast.id) - - logger.info(f"Successfully generated podcast: {podcast.id}") - - return { - "status": "ready", - "podcast_id": podcast.id, - "title": podcast.title, - "transcript_entries": len(serializable_transcript), - } - - except Exception as e: - logger.error(f"Error in _generate_content_podcast: {e!s}") - podcast.status = PodcastStatus.FAILED - await session.commit() - raise From 8f38737ad98a27052820a90696b28abbdbb762ba Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 21:45:04 +0200 Subject: [PATCH 37/50] test(podcasts): retarget celery and observability tests to new tasks --- .../tests/unit/observability/test_helpers.py | 3 +- .../unit/tasks/test_celery_async_runner.py | 13 +- .../tests/unit/tasks/test_podcast_billing.py | 388 ------------------ 3 files changed, 9 insertions(+), 395 deletions(-) delete mode 100644 surfsense_backend/tests/unit/tasks/test_podcast_billing.py diff --git a/surfsense_backend/tests/unit/observability/test_helpers.py b/surfsense_backend/tests/unit/observability/test_helpers.py index ae60c1939..61cf3d591 100644 --- a/surfsense_backend/tests/unit/observability/test_helpers.py +++ b/surfsense_backend/tests/unit/observability/test_helpers.py @@ -31,7 +31,8 @@ def _disable_otel(monkeypatch: pytest.MonkeyPatch): ("process_file_upload_with_document", "process"), ("process_circleback_meeting", "process"), ("generate_video_presentation", "generate"), - ("generate_content_podcast", "generate"), + ("podcast.draft_transcript", "podcast.draft"), + ("podcast.render_audio", "podcast.render"), ("cleanup_stale_indexing_notifications", "cleanup"), ("reconcile_pending_stripe_page_purchases", "reconcile"), ("reconcile_pending_stripe_token_purchases", "reconcile"), diff --git a/surfsense_backend/tests/unit/tasks/test_celery_async_runner.py b/surfsense_backend/tests/unit/tasks/test_celery_async_runner.py index a5bb3f58a..2342dd8da 100644 --- a/surfsense_backend/tests/unit/tasks/test_celery_async_runner.py +++ b/surfsense_backend/tests/unit/tasks/test_celery_async_runner.py @@ -239,17 +239,18 @@ def test_video_presentation_task_uses_runner_helper() -> None: ) -def test_podcast_task_uses_runner_helper() -> None: - """Symmetric assertion for the podcast task — same root cause, same +def test_podcast_tasks_use_runner_helper() -> None: + """Symmetric assertion for the podcast tasks — same root cause, same fix, same regression risk. """ import inspect - from app.tasks.celery_tasks import podcast_tasks + from app.podcasts.tasks import draft, render - src = inspect.getsource(podcast_tasks) - assert "run_async_celery_task" in src - assert "asyncio.new_event_loop" not in src + for module in (draft, render): + src = inspect.getsource(module) + assert "run_async_celery_task" in src + assert "asyncio.new_event_loop" not in src def test_runner_runs_shutdown_asyncgens_before_close() -> None: diff --git a/surfsense_backend/tests/unit/tasks/test_podcast_billing.py b/surfsense_backend/tests/unit/tasks/test_podcast_billing.py deleted file mode 100644 index 699297df1..000000000 --- a/surfsense_backend/tests/unit/tasks/test_podcast_billing.py +++ /dev/null @@ -1,388 +0,0 @@ -"""Unit tests for podcast Celery task billing integration. - -Validates ``_generate_content_podcast`` correctly wraps -``podcaster_graph.ainvoke`` in a ``billable_call`` envelope, propagates the -search-space owner's billing decision, and degrades cleanly when the -resolver fails or premium credit is exhausted. - -Coverage: - -* Happy-path free config: resolver → ``billable_call`` enters with - ``usage_type='podcast_generation'`` and the configured reserve override, - graph runs, podcast row flips to ``READY``. -* Happy-path premium config: same wiring with ``billing_tier='premium'``. -* Quota denial: ``billable_call`` raises ``QuotaInsufficientError`` → - graph is *not* invoked, podcast row flips to ``FAILED``, return dict - carries ``reason='premium_quota_exhausted'``. -* Resolver failure: ``ValueError`` from the resolver → podcast row flips - to ``FAILED``, return dict carries ``reason='billing_resolution_failed'``. -""" - -from __future__ import annotations - -import contextlib -from types import SimpleNamespace -from typing import Any -from uuid import uuid4 - -import pytest - -pytestmark = pytest.mark.unit - - -# --------------------------------------------------------------------------- -# Fakes -# --------------------------------------------------------------------------- - - -class _FakeExecResult: - def __init__(self, obj): - self._obj = obj - - def scalars(self): - return self - - def first(self): - return self._obj - - def filter(self, *_args, **_kwargs): - return self - - -class _FakeSession: - def __init__(self, podcast): - self._podcast = podcast - self.commit_count = 0 - - async def execute(self, _stmt): - return _FakeExecResult(self._podcast) - - async def commit(self): - self.commit_count += 1 - - async def __aenter__(self): - return self - - async def __aexit__(self, *args): - return None - - -class _FakeSessionMaker: - def __init__(self, session: _FakeSession): - self._session = session - - def __call__(self): - return self._session - - -def _make_podcast(podcast_id: int = 7, thread_id: int = 99) -> SimpleNamespace: - """Stand-in for a ``Podcast`` row. Importing ``PodcastStatus`` lazily - inside helpers keeps this fixture cheap.""" - return SimpleNamespace( - id=podcast_id, - title="Test Podcast", - thread_id=thread_id, - status=None, - podcast_transcript=None, - file_location=None, - ) - - -@contextlib.asynccontextmanager -async def _ok_billable_call(**kwargs): - """Stand-in for ``billable_call`` that records its kwargs and yields a - no-op accumulator-shaped object.""" - _CALL_LOG.append(kwargs) - yield SimpleNamespace() - - -_CALL_LOG: list[dict[str, Any]] = [] - - -@contextlib.asynccontextmanager -async def _denying_billable_call(**kwargs): - from app.services.billable_calls import QuotaInsufficientError - - _CALL_LOG.append(kwargs) - raise QuotaInsufficientError( - usage_type=kwargs.get("usage_type", "?"), - used_micros=5_000_000, - limit_micros=5_000_000, - remaining_micros=0, - ) - yield SimpleNamespace() # pragma: no cover — for grammar only - - -@contextlib.asynccontextmanager -async def _settlement_failing_billable_call(**kwargs): - from app.services.billable_calls import BillingSettlementError - - _CALL_LOG.append(kwargs) - yield SimpleNamespace() - raise BillingSettlementError( - usage_type=kwargs.get("usage_type", "?"), - user_id=kwargs["user_id"], - cause=RuntimeError("finalize failed"), - ) - - -# --------------------------------------------------------------------------- -# Tests -# --------------------------------------------------------------------------- - - -@pytest.fixture(autouse=True) -def _reset_call_log(): - _CALL_LOG.clear() - yield - _CALL_LOG.clear() - - -@pytest.mark.asyncio -async def test_billable_call_invoked_with_correct_kwargs_for_free_config(monkeypatch): - """Happy path: free billing tier still wraps the graph call so the - audit row is recorded. Verifies kwargs threading.""" - from app.config import config as app_config - from app.db import PodcastStatus - from app.tasks.celery_tasks import podcast_tasks - - podcast = _make_podcast(podcast_id=7, thread_id=99) - session = _FakeSession(podcast) - monkeypatch.setattr( - podcast_tasks, - "get_celery_session_maker", - lambda: _FakeSessionMaker(session), - ) - - user_id = uuid4() - - async def _fake_resolver(sess, search_space_id, *, thread_id=None): - assert search_space_id == 555 - assert thread_id == 99 - return user_id, "free", "openrouter/some-free-model" - - monkeypatch.setattr( - podcast_tasks, "_resolve_agent_billing_for_search_space", _fake_resolver - ) - monkeypatch.setattr(podcast_tasks, "billable_call", _ok_billable_call) - - async def _fake_graph_invoke(state, config): - return { - "podcast_transcript": [ - SimpleNamespace(speaker_id=0, dialog="Hi"), - SimpleNamespace(speaker_id=1, dialog="Hello"), - ], - "final_podcast_file_path": "/tmp/podcast.wav", - } - - monkeypatch.setattr(podcast_tasks.podcaster_graph, "ainvoke", _fake_graph_invoke) - - result = await podcast_tasks._generate_content_podcast( - podcast_id=7, - source_content="hello world", - search_space_id=555, - user_prompt="make it short", - ) - - assert result["status"] == "ready" - assert result["podcast_id"] == 7 - assert podcast.status == PodcastStatus.READY - assert podcast.file_location == "/tmp/podcast.wav" - - assert len(_CALL_LOG) == 1 - call = _CALL_LOG[0] - assert call["user_id"] == user_id - assert call["search_space_id"] == 555 - assert call["billing_tier"] == "free" - assert call["base_model"] == "openrouter/some-free-model" - assert call["usage_type"] == "podcast_generation" - assert ( - call["quota_reserve_micros_override"] - == app_config.QUOTA_DEFAULT_PODCAST_RESERVE_MICROS - ) - # Background artifact audit rows intentionally omit the TokenUsage.thread_id - # FK to avoid coupling Celery audit commits to an active chat transaction. - assert "thread_id" not in call - assert call["call_details"] == { - "podcast_id": 7, - "title": "Test Podcast", - "thread_id": 99, - } - assert callable(call["billable_session_factory"]) - - -@pytest.mark.asyncio -async def test_billable_call_invoked_with_premium_tier(monkeypatch): - """Premium resolution flows through to ``billable_call`` so the - reserve/finalize path triggers.""" - from app.tasks.celery_tasks import podcast_tasks - - podcast = _make_podcast() - session = _FakeSession(podcast) - monkeypatch.setattr( - podcast_tasks, - "get_celery_session_maker", - lambda: _FakeSessionMaker(session), - ) - - user_id = uuid4() - - async def _fake_resolver(sess, search_space_id, *, thread_id=None): - return user_id, "premium", "gpt-5.4" - - monkeypatch.setattr( - podcast_tasks, "_resolve_agent_billing_for_search_space", _fake_resolver - ) - monkeypatch.setattr(podcast_tasks, "billable_call", _ok_billable_call) - - async def _fake_graph_invoke(state, config): - return {"podcast_transcript": [], "final_podcast_file_path": "x.wav"} - - monkeypatch.setattr(podcast_tasks.podcaster_graph, "ainvoke", _fake_graph_invoke) - - await podcast_tasks._generate_content_podcast( - podcast_id=7, - source_content="hi", - search_space_id=555, - user_prompt=None, - ) - - assert _CALL_LOG[0]["billing_tier"] == "premium" - assert _CALL_LOG[0]["base_model"] == "gpt-5.4" - - -@pytest.mark.asyncio -async def test_quota_insufficient_marks_podcast_failed_and_skips_graph(monkeypatch): - """When ``billable_call`` denies the reservation, the graph never - runs and the podcast row flips to FAILED with the documented reason - code.""" - from app.db import PodcastStatus - from app.tasks.celery_tasks import podcast_tasks - - podcast = _make_podcast(podcast_id=8) - session = _FakeSession(podcast) - monkeypatch.setattr( - podcast_tasks, - "get_celery_session_maker", - lambda: _FakeSessionMaker(session), - ) - - async def _fake_resolver(sess, search_space_id, *, thread_id=None): - return uuid4(), "premium", "gpt-5.4" - - monkeypatch.setattr( - podcast_tasks, "_resolve_agent_billing_for_search_space", _fake_resolver - ) - monkeypatch.setattr(podcast_tasks, "billable_call", _denying_billable_call) - - graph_invoked = [] - - async def _fake_graph_invoke(state, config): - graph_invoked.append(True) - return {} - - monkeypatch.setattr(podcast_tasks.podcaster_graph, "ainvoke", _fake_graph_invoke) - - result = await podcast_tasks._generate_content_podcast( - podcast_id=8, - source_content="hi", - search_space_id=555, - user_prompt=None, - ) - - assert result == { - "status": "failed", - "podcast_id": 8, - "reason": "premium_quota_exhausted", - } - assert podcast.status == PodcastStatus.FAILED - assert graph_invoked == [] # Graph never ran on denied reservation. - - -@pytest.mark.asyncio -async def test_billing_settlement_failure_marks_podcast_failed(monkeypatch): - from app.db import PodcastStatus - from app.tasks.celery_tasks import podcast_tasks - - podcast = _make_podcast(podcast_id=10) - session = _FakeSession(podcast) - monkeypatch.setattr( - podcast_tasks, - "get_celery_session_maker", - lambda: _FakeSessionMaker(session), - ) - - async def _fake_resolver(sess, search_space_id, *, thread_id=None): - return uuid4(), "premium", "gpt-5.4" - - monkeypatch.setattr( - podcast_tasks, "_resolve_agent_billing_for_search_space", _fake_resolver - ) - monkeypatch.setattr( - podcast_tasks, "billable_call", _settlement_failing_billable_call - ) - - async def _fake_graph_invoke(state, config): - return {"podcast_transcript": [], "final_podcast_file_path": "x.wav"} - - monkeypatch.setattr(podcast_tasks.podcaster_graph, "ainvoke", _fake_graph_invoke) - - result = await podcast_tasks._generate_content_podcast( - podcast_id=10, - source_content="hi", - search_space_id=555, - user_prompt=None, - ) - - assert result == { - "status": "failed", - "podcast_id": 10, - "reason": "billing_settlement_failed", - } - assert podcast.status == PodcastStatus.FAILED - - -@pytest.mark.asyncio -async def test_resolver_failure_marks_podcast_failed(monkeypatch): - """If the resolver raises (e.g. search-space deleted), the task fails - cleanly without invoking the graph.""" - from app.db import PodcastStatus - from app.tasks.celery_tasks import podcast_tasks - - podcast = _make_podcast(podcast_id=9) - session = _FakeSession(podcast) - monkeypatch.setattr( - podcast_tasks, - "get_celery_session_maker", - lambda: _FakeSessionMaker(session), - ) - - async def _failing_resolver(sess, search_space_id, *, thread_id=None): - raise ValueError("Search space 555 not found") - - monkeypatch.setattr( - podcast_tasks, "_resolve_agent_billing_for_search_space", _failing_resolver - ) - - graph_invoked = [] - - async def _fake_graph_invoke(state, config): - graph_invoked.append(True) - return {} - - monkeypatch.setattr(podcast_tasks.podcaster_graph, "ainvoke", _fake_graph_invoke) - - result = await podcast_tasks._generate_content_podcast( - podcast_id=9, - source_content="hi", - search_space_id=555, - user_prompt=None, - ) - - assert result == { - "status": "failed", - "podcast_id": 9, - "reason": "billing_resolution_failed", - } - assert podcast.status == PodcastStatus.FAILED - assert graph_invoked == [] From c84525897b472e40c969f68ae6978f440401c41a Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 11 Jun 2026 06:27:00 +0200 Subject: [PATCH 38/50] test(podcasts): relocate stateful tests to integration Move the lifecycle service, Celery task bodies, and mark_failed coverage out of DB-faking unit tests and into integration tests against a real Postgres, faking only true externals (broker, object store, TTS, ffmpeg, billing, LLM). Add HTTP slices for cancel, voices, scoping, and public-chat streaming. The unit tier is now fake-free pure logic with no session doubles. --- .../tests/integration/podcasts/conftest.py | 319 ++++++++++++++++++ .../integration/podcasts/test_brief_gate.py | 80 +++++ .../tests/integration/podcasts/test_cancel.py | 39 +++ .../tests/integration/podcasts/test_create.py | 51 +++ .../integration/podcasts/test_draft_task.py | 115 +++++++ .../podcasts/test_public_stream.py | 64 ++++ .../integration/podcasts/test_render_task.py | 32 ++ .../integration/podcasts/test_scoping.py | 53 +++ .../integration/podcasts/test_streaming.py | 41 +++ .../integration/podcasts/test_task_failure.py | 45 +++ .../podcasts/test_transcript_gate.py | 81 +++++ .../tests/integration/podcasts/test_voices.py | 31 ++ .../tests/unit/podcasts/conftest.py | 80 +---- .../tests/unit/podcasts/test_api_schemas.py | 64 ++-- .../tests/unit/podcasts/test_draft_task.py | 135 -------- .../tests/unit/podcasts/test_lifecycle.py | 163 --------- .../tests/unit/podcasts/test_runtime.py | 57 ---- 17 files changed, 985 insertions(+), 465 deletions(-) create mode 100644 surfsense_backend/tests/integration/podcasts/conftest.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_brief_gate.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_cancel.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_create.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_draft_task.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_public_stream.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_render_task.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_scoping.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_streaming.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_task_failure.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_transcript_gate.py create mode 100644 surfsense_backend/tests/integration/podcasts/test_voices.py delete mode 100644 surfsense_backend/tests/unit/podcasts/test_draft_task.py delete mode 100644 surfsense_backend/tests/unit/podcasts/test_lifecycle.py delete mode 100644 surfsense_backend/tests/unit/podcasts/test_runtime.py diff --git a/surfsense_backend/tests/integration/podcasts/conftest.py b/surfsense_backend/tests/integration/podcasts/conftest.py new file mode 100644 index 000000000..e2702fdfd --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/conftest.py @@ -0,0 +1,319 @@ +"""Podcast API + task integration fixtures. + +The app's DB session and current-user dependencies ride the test's transactional +`db_session`, so seeded rows and rows touched through the endpoints (or the task +bodies) share one transaction that rolls back per test. Only true externals are +faked: the Celery broker (`*_task.delay`) is captured instead of dispatched, the +object store is a tiny in-memory backend, the Celery tasks' own session maker is +bound to the test transaction, and — for the render task — the TTS provider and +the FFmpeg merge are stubbed. `TTS_SERVICE` is pinned so the deterministic brief +proposal can resolve voices. +""" + +from __future__ import annotations + +import contextlib +import uuid +from collections.abc import AsyncGenerator, AsyncIterator +from pathlib import Path + +import httpx +import pytest +import pytest_asyncio +from httpx import ASGITransport +from sqlalchemy.ext.asyncio import AsyncSession + +from app.app import app, limiter +from app.config import config as app_config +from app.db import SearchSpace, User, get_async_session +from app.routes.search_spaces_routes import create_default_roles_and_membership +from app.podcasts.persistence import Podcast, PodcastStatus +from app.podcasts.schemas import ( + DurationTarget, + PodcastSpec, + PodcastStyle, + SpeakerRole, + SpeakerSpec, + Transcript, + TranscriptTurn, +) +from app.podcasts.service import PodcastService +from app.podcasts.tts import SynthesisRequest, SynthesizedAudio, TextToSpeech +from app.users import current_active_user + +pytestmark = pytest.mark.integration + +limiter.enabled = False + + +@pytest_asyncio.fixture +async def client( + db_session: AsyncSession, + db_user: User, +) -> AsyncGenerator[httpx.AsyncClient, None]: + async def override_session() -> AsyncGenerator[AsyncSession, None]: + yield db_session + + async def override_user() -> User: + return db_user + + previous_overrides = app.dependency_overrides.copy() + app.dependency_overrides[get_async_session] = override_session + app.dependency_overrides[current_active_user] = override_user + + try: + async with httpx.AsyncClient( + transport=ASGITransport(app=app), + base_url="http://test", + timeout=30.0, + follow_redirects=False, + ) as test_client: + yield test_client + finally: + app.dependency_overrides.clear() + app.dependency_overrides.update(previous_overrides) + + +@pytest.fixture(autouse=True) +def tts_service(monkeypatch) -> str: + """Pin a provider with language-agnostic voices so brief proposal resolves.""" + service = "openai/tts-1" + monkeypatch.setattr(app_config, "TTS_SERVICE", service) + return service + + +class CapturedTasks: + """Records the args each podcast Celery task was enqueued with.""" + + def __init__(self) -> None: + self.draft: list[tuple] = [] + self.render: list[tuple] = [] + + +@pytest.fixture(autouse=True) +def captured_tasks(monkeypatch) -> CapturedTasks: + """Capture `*_task.delay` instead of hitting the broker (a boundary).""" + captured = CapturedTasks() + from app.podcasts.tasks import draft_transcript_task, render_audio_task + + monkeypatch.setattr( + draft_transcript_task, "delay", lambda *a, **k: captured.draft.append((a, k)) + ) + monkeypatch.setattr( + render_audio_task, "delay", lambda *a, **k: captured.render.append((a, k)) + ) + return captured + + +class FakeStorageBackend: + """In-memory object store standing in for the real audio backend.""" + + backend_name = "memory" + + def __init__(self) -> None: + self.objects: dict[str, bytes] = {} + self.deleted: list[str] = [] + + async def put(self, key: str, data: bytes, content_type: str | None = None) -> None: + self.objects[key] = data + + async def open_stream(self, key: str) -> AsyncIterator[bytes]: + yield self.objects.get(key, b"audio-bytes") + + async def delete(self, key: str) -> None: + self.deleted.append(key) + + +@pytest.fixture +def fake_storage(monkeypatch) -> FakeStorageBackend: + """Route audio storage to an in-memory backend for the stream routes.""" + backend = FakeStorageBackend() + monkeypatch.setattr( + "app.podcasts.storage.get_storage_backend", lambda: backend + ) + monkeypatch.setattr( + "app.file_storage.factory.get_storage_backend", lambda: backend + ) + return backend + + +@pytest.fixture +def bind_task_session(db_session: AsyncSession, monkeypatch) -> AsyncSession: + """Bind the Celery tasks' own session maker to the test transaction. + + Task bodies open ``get_celery_session_maker()()`` rather than receiving a + session, so this hands them the test's session without closing it on exit; a + task's ``commit()`` then releases a savepoint and the per-test rollback still + cleans up. + """ + + def _make_session(): + @contextlib.asynccontextmanager + async def _ctx() -> AsyncIterator[AsyncSession]: + yield db_session + + return _ctx() + + for module in ( + "app.podcasts.tasks.draft", + "app.podcasts.tasks.render", + "app.podcasts.tasks.runtime", + ): + monkeypatch.setattr( + f"{module}.get_celery_session_maker", lambda: _make_session + ) + return db_session + + +class FakeTextToSpeech(TextToSpeech): + """In-memory TTS provider: every segment yields fixed bytes (the boundary).""" + + @property + def container(self) -> str: + return "mp3" + + async def synthesize(self, request: SynthesisRequest) -> SynthesizedAudio: + return SynthesizedAudio(data=b"segment-audio", container="mp3") + + +@pytest.fixture +def fake_tts(monkeypatch) -> FakeTextToSpeech: + """Stand in for the configured TTS provider in the render task.""" + provider = FakeTextToSpeech() + monkeypatch.setattr( + "app.podcasts.tasks.render.get_text_to_speech", lambda: provider + ) + return provider + + +@pytest.fixture +def fake_merge(monkeypatch) -> None: + """Stub the FFmpeg merge (an external binary) to emit a fixed MP3.""" + + async def _merge(segment_paths: list[Path], output_path: Path) -> None: + output_path.write_bytes(b"merged-audio") + + monkeypatch.setattr("app.podcasts.rendering.renderer.concat_to_mp3", _merge) + + +def build_spec( + *, + language: str = "en", + voice_ids: tuple[str, str] = ("openai:alloy", "openai:nova"), +) -> PodcastSpec: + """A valid two-speaker brief; tests override only what they assert on.""" + return PodcastSpec( + language=language, + style=PodcastStyle.CONVERSATIONAL, + speakers=[ + SpeakerSpec(slot=0, name="Host", role=SpeakerRole.HOST, voice_id=voice_ids[0]), + SpeakerSpec(slot=1, name="Guest", role=SpeakerRole.GUEST, voice_id=voice_ids[1]), + ], + duration=DurationTarget(min_minutes=10, max_minutes=20), + ) + + +def build_transcript() -> Transcript: + return Transcript( + turns=[ + TranscriptTurn(speaker=0, text="Welcome to the show."), + TranscriptTurn(speaker=1, text="Glad to be here."), + ] + ) + + +@pytest.fixture +def make_podcast(db_session: AsyncSession): + """Create a podcast advanced to a target lifecycle state via the service. + + Setup runs through the same public service the API uses, on the test's + session, so the endpoint under test reads a realistically-built row. + """ + + _LADDER = [ + PodcastStatus.AWAITING_BRIEF, + PodcastStatus.DRAFTING, + PodcastStatus.AWAITING_REVIEW, + PodcastStatus.RENDERING, + PodcastStatus.READY, + ] + + async def _make( + *, + search_space_id: int, + status: PodcastStatus = PodcastStatus.AWAITING_BRIEF, + title: str = "Test Podcast", + thread_id: int | None = None, + ) -> Podcast: + service = PodcastService(db_session) + podcast = await service.create( + title=title, search_space_id=search_space_id, thread_id=thread_id + ) + if status is PodcastStatus.PENDING: + await db_session.flush() + return podcast + + targets = _LADDER[: _LADDER.index(status) + 1] + for target in targets: + if target is PodcastStatus.AWAITING_BRIEF: + await service.attach_brief(podcast, build_spec()) + elif target is PodcastStatus.DRAFTING: + await service.begin_drafting(podcast) + elif target is PodcastStatus.AWAITING_REVIEW: + await service.attach_transcript(podcast, build_transcript()) + elif target is PodcastStatus.RENDERING: + await service.approve(podcast) + elif target is PodcastStatus.READY: + await service.attach_audio( + podcast, + storage_backend="memory", + storage_key="podcasts/audio.mp3", + duration_seconds=123, + ) + await db_session.flush() + return podcast + + return _make + + +@pytest.fixture +def act_as(): + """Switch the authenticated user for subsequent requests on ``client``. + + The ``client`` fixture installs db_user and restores the prior overrides on + teardown, so re-pointing the auth dependency here is undone per test. + """ + + def _act(user: User) -> None: + app.dependency_overrides[current_active_user] = lambda: user + + return _act + + +@pytest_asyncio.fixture +async def db_other_user(db_session: AsyncSession) -> User: + """A second user who is not a member of ``db_search_space``.""" + user = User( + id=uuid.uuid4(), + email="stranger@surfsense.net", + hashed_password="hashed", + is_active=True, + is_superuser=False, + is_verified=True, + ) + db_session.add(user) + await db_session.flush() + return user + + +@pytest_asyncio.fixture +async def foreign_podcast( + db_session: AsyncSession, db_other_user: User, make_podcast +) -> Podcast: + """A podcast in a space owned by the other user, invisible to db_user.""" + space = SearchSpace(name="Stranger Space", user_id=db_other_user.id) + db_session.add(space) + await db_session.flush() + await create_default_roles_and_membership(db_session, space.id, db_other_user.id) + await db_session.flush() + return await make_podcast(search_space_id=space.id, title="Foreign") diff --git a/surfsense_backend/tests/integration/podcasts/test_brief_gate.py b/surfsense_backend/tests/integration/podcasts/test_brief_gate.py new file mode 100644 index 000000000..46d97172d --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_brief_gate.py @@ -0,0 +1,80 @@ +"""The brief review gate: edit the spec, then approve to start drafting. + +Covers what the user can do while ``awaiting_brief`` — edit the brief under +optimistic concurrency and approve it — and the HTTP status codes the service's +guards map to when an edit races or comes too late. +""" + +from __future__ import annotations + +import pytest + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def _create(client, search_space_id: int) -> dict: + resp = await client.post( + BASE, + json={ + "title": "Episode", + "search_space_id": search_space_id, + "source_content": "Source content.", + }, + ) + assert resp.status_code == 201 + return resp.json() + + +async def test_approve_brief_starts_drafting_and_enqueues_draft( + client, db_search_space, captured_tasks +): + podcast = await _create(client, db_search_space.id) + + resp = await client.post(f"{BASE}/{podcast['id']}/brief/approve") + + assert resp.status_code == 200 + assert resp.json()["status"] == "drafting" + assert captured_tasks.draft == [((podcast["id"], db_search_space.id), {})] + assert captured_tasks.render == [] + + +async def test_update_spec_bumps_version_and_persists(client, db_search_space): + podcast = await _create(client, db_search_space.id) + spec = podcast["spec"] + spec["focus"] = "A sharper angle" + + resp = await client.patch( + f"{BASE}/{podcast['id']}/spec", + json={"spec": spec, "expected_version": podcast["spec_version"]}, + ) + + assert resp.status_code == 200 + body = resp.json() + assert body["spec_version"] == podcast["spec_version"] + 1 + assert body["spec"]["focus"] == "A sharper angle" + assert body["status"] == "awaiting_brief" + + +async def test_update_spec_with_stale_version_conflicts(client, db_search_space): + podcast = await _create(client, db_search_space.id) + + resp = await client.patch( + f"{BASE}/{podcast['id']}/spec", + json={"spec": podcast["spec"], "expected_version": 999}, + ) + + assert resp.status_code == 409 + + +async def test_update_spec_after_approval_is_rejected(client, db_search_space): + podcast = await _create(client, db_search_space.id) + await client.post(f"{BASE}/{podcast['id']}/brief/approve") + + resp = await client.patch( + f"{BASE}/{podcast['id']}/spec", + json={"spec": podcast["spec"], "expected_version": podcast["spec_version"]}, + ) + + assert resp.status_code == 409 diff --git a/surfsense_backend/tests/integration/podcasts/test_cancel.py b/surfsense_backend/tests/integration/podcasts/test_cancel.py new file mode 100644 index 000000000..fb5239e95 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_cancel.py @@ -0,0 +1,39 @@ +"""Cancelling a podcast: allowed while in flight, refused once terminal. + +Cancellation is a user escape hatch from any non-terminal state; a podcast that +has already finished (READY) has no exit, so the disallowed transition surfaces +as 409. +""" + +import pytest + +from app.podcasts.persistence import PodcastStatus + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_cancel_from_a_live_state_succeeds( + client, db_search_space, make_podcast +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_BRIEF + ) + + resp = await client.post(f"{BASE}/{podcast.id}/cancel") + + assert resp.status_code == 200 + assert resp.json()["status"] == "cancelled" + + +async def test_cancel_from_a_terminal_state_conflicts( + client, db_search_space, make_podcast +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + + resp = await client.post(f"{BASE}/{podcast.id}/cancel") + + assert resp.status_code == 409 diff --git a/surfsense_backend/tests/integration/podcasts/test_create.py b/surfsense_backend/tests/integration/podcasts/test_create.py new file mode 100644 index 000000000..19b5aeca2 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_create.py @@ -0,0 +1,51 @@ +"""Creating a podcast proposes a brief and opens the review gate. + +Driven through the real POST endpoint (auth + DB on one transaction): the row is +created, a brief is proposed inline from defaults, and the podcast lands in +``awaiting_brief`` with a complete spec and nothing generated yet. +""" + +from __future__ import annotations + +import pytest + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_create_proposes_brief_and_opens_gate(client, db_search_space): + resp = await client.post( + BASE, + json={ + "title": "My Episode", + "search_space_id": db_search_space.id, + "source_content": "A long piece of source content about a topic.", + }, + ) + + assert resp.status_code == 201 + body = resp.json() + assert body["title"] == "My Episode" + assert body["status"] == "awaiting_brief" + assert body["spec_version"] == 1 + assert body["spec"] is not None + assert body["spec"]["language"] == "en" + assert len(body["spec"]["speakers"]) == 2 + assert body["transcript"] is None + assert body["has_audio"] is False + + +async def test_create_honors_requested_speaker_count(client, db_search_space): + resp = await client.post( + BASE, + json={ + "title": "Solo", + "search_space_id": db_search_space.id, + "source_content": "Content.", + "speaker_count": 3, + }, + ) + + assert resp.status_code == 201 + assert len(resp.json()["spec"]["speakers"]) == 3 diff --git a/surfsense_backend/tests/integration/podcasts/test_draft_task.py b/surfsense_backend/tests/integration/podcasts/test_draft_task.py new file mode 100644 index 000000000..a5e0cbe36 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_draft_task.py @@ -0,0 +1,115 @@ +"""The transcript-drafting task against a real database. + +Drafting is the expensive LLM step, so it runs under ``billable_call``. The +behavior that protects users' money: when billing succeeds, a drafted transcript +opens the review gate (DRAFTING -> AWAITING_REVIEW); when billing denies or +settlement fails, the podcast ends FAILED with no transcript left behind. The DB, +service, and transcript persistence run for real; only the true externals are +faked — billing (the metering boundary) and the generation graph (the LLM). +""" + +from __future__ import annotations + +from contextlib import asynccontextmanager +from types import SimpleNamespace +from uuid import uuid4 + +import pytest + +from app.podcasts.persistence import PodcastStatus +from app.podcasts.service import read_transcript +from app.podcasts.tasks import draft +from app.services.billable_calls import ( + BillingSettlementError, + QuotaInsufficientError, +) + +from .conftest import build_transcript + +pytestmark = pytest.mark.integration + + +def _wire_billing(monkeypatch, *, billable_call, transcript=None) -> None: + """Replace the billing + LLM externals the draft body reaches for.""" + + async def _resolver(_session, _search_space_id, *, thread_id=None): + return uuid4(), "free", "openrouter/model" + + async def _ainvoke(_state, config=None): + return {"transcript": transcript} + + monkeypatch.setattr(draft, "_resolve_agent_billing_for_search_space", _resolver) + monkeypatch.setattr(draft, "billable_call", billable_call) + monkeypatch.setattr(draft, "transcript_graph", SimpleNamespace(ainvoke=_ainvoke)) + + +async def test_successful_billing_opens_review_gate_with_transcript( + monkeypatch, db_search_space, make_podcast, bind_task_session +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING + ) + + @asynccontextmanager + async def _ok(**_kwargs): + yield SimpleNamespace() + + _wire_billing(monkeypatch, billable_call=_ok, transcript=build_transcript()) + + result = await draft._draft_transcript(podcast.id, db_search_space.id) + + assert result["status"] == "awaiting_review" + assert podcast.status == PodcastStatus.AWAITING_REVIEW + assert read_transcript(podcast) is not None + + +async def test_quota_denial_fails_the_podcast_without_a_transcript( + monkeypatch, db_search_space, make_podcast, bind_task_session +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING + ) + + @asynccontextmanager + async def _deny(**_kwargs): + raise QuotaInsufficientError( + usage_type="podcast_generation", + used_micros=5_000_000, + limit_micros=5_000_000, + remaining_micros=0, + ) + yield # pragma: no cover - unreachable, satisfies the CM protocol + + _wire_billing(monkeypatch, billable_call=_deny) + + result = await draft._draft_transcript(podcast.id, db_search_space.id) + + assert result["reason"] == "quota" + assert podcast.status == PodcastStatus.FAILED + assert read_transcript(podcast) is None + + +async def test_billing_settlement_failure_fails_the_podcast( + monkeypatch, db_search_space, make_podcast, bind_task_session +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING + ) + + @asynccontextmanager + async def _settlement_fails(**_kwargs): + yield SimpleNamespace() + raise BillingSettlementError( + usage_type="podcast_generation", + user_id=uuid4(), + cause=RuntimeError("finalize failed"), + ) + + _wire_billing( + monkeypatch, billable_call=_settlement_fails, transcript=build_transcript() + ) + + result = await draft._draft_transcript(podcast.id, db_search_space.id) + + assert result["reason"] == "billing" + assert podcast.status == PodcastStatus.FAILED diff --git a/surfsense_backend/tests/integration/podcasts/test_public_stream.py b/surfsense_backend/tests/integration/podcasts/test_public_stream.py new file mode 100644 index 000000000..d2ba1d1b9 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_public_stream.py @@ -0,0 +1,64 @@ +"""Public (unauthenticated) podcast streaming from a chat snapshot. + +A shared chat snapshot carries each podcast's stored-audio key; the public route +streams those bytes from the object store via ``share_token`` with no auth. A +podcast that isn't in the snapshot is a 404. +""" + +import pytest + +from app.db import NewChatThread, PublicChatSnapshot, User + +pytestmark = pytest.mark.integration + + +async def _snapshot(db_session, *, search_space_id, user: User, token: str, podcasts): + thread = NewChatThread( + title="Shared", search_space_id=search_space_id, created_by_id=user.id + ) + db_session.add(thread) + await db_session.flush() + snapshot = PublicChatSnapshot( + thread_id=thread.id, + share_token=token, + content_hash=f"hash-{token}", + message_ids=[], + snapshot_data={"podcasts": podcasts}, + ) + db_session.add(snapshot) + await db_session.flush() + + +async def test_public_stream_serves_audio_via_storage_key( + client, db_session, db_search_space, db_user, fake_storage +): + await _snapshot( + db_session, + search_space_id=db_search_space.id, + user=db_user, + token="tok-audio", + podcasts=[{"original_id": 555, "storage_key": "podcasts/x.mp3"}], + ) + fake_storage.objects["podcasts/x.mp3"] = b"public-audio" + + resp = await client.get("/api/v1/public/tok-audio/podcasts/555/stream") + + assert resp.status_code == 200 + assert resp.headers["content-type"] == "audio/mpeg" + assert resp.content == b"public-audio" + + +async def test_public_stream_404_when_podcast_absent_from_snapshot( + client, db_session, db_search_space, db_user +): + await _snapshot( + db_session, + search_space_id=db_search_space.id, + user=db_user, + token="tok-empty", + podcasts=[], + ) + + resp = await client.get("/api/v1/public/tok-empty/podcasts/999/stream") + + assert resp.status_code == 404 diff --git a/surfsense_backend/tests/integration/podcasts/test_render_task.py b/surfsense_backend/tests/integration/podcasts/test_render_task.py new file mode 100644 index 000000000..fdb66a522 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_render_task.py @@ -0,0 +1,32 @@ +"""The audio-rendering task against a real database. + +From RENDERING, the task synthesises and merges the approved transcript, stores +the bytes, and marks the podcast READY with the storage location recorded. The +DB, service, renderer orchestration, and storage wrapper run for real; the true +externals are faked — the TTS provider, the FFmpeg merge, and the object store. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import PodcastStatus +from app.podcasts.tasks import render + +pytestmark = pytest.mark.integration + + +async def test_render_marks_ready_and_stores_audio( + db_search_space, make_podcast, bind_task_session, fake_tts, fake_merge, fake_storage +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.RENDERING + ) + + result = await render._render_audio(podcast.id) + + assert result["status"] == "ready" + assert podcast.status == PodcastStatus.READY + assert podcast.storage_backend == "memory" + assert podcast.storage_key + assert fake_storage.objects[podcast.storage_key] == b"merged-audio" diff --git a/surfsense_backend/tests/integration/podcasts/test_scoping.py b/surfsense_backend/tests/integration/podcasts/test_scoping.py new file mode 100644 index 000000000..304af6b6e --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_scoping.py @@ -0,0 +1,53 @@ +"""Podcasts are scoped to search-space membership. + +A user can only create or read podcasts in spaces they belong to, and an +unscoped listing returns only the caller's own podcasts — never another +member's. +""" + +import pytest + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_reading_a_podcast_in_a_nonmember_space_is_forbidden( + client, db_search_space, make_podcast, act_as, db_other_user +): + podcast = await make_podcast(search_space_id=db_search_space.id) + act_as(db_other_user) + + resp = await client.get(f"{BASE}/{podcast.id}") + + assert resp.status_code == 403 + + +async def test_creating_in_a_nonmember_space_is_forbidden( + client, db_search_space, act_as, db_other_user +): + act_as(db_other_user) + + resp = await client.post( + BASE, + json={ + "title": "X", + "search_space_id": db_search_space.id, + "source_content": "content", + }, + ) + + assert resp.status_code == 403 + + +async def test_listing_returns_only_the_callers_podcasts( + client, db_search_space, make_podcast, foreign_podcast +): + mine = await make_podcast(search_space_id=db_search_space.id, title="Mine") + + resp = await client.get(BASE) + + assert resp.status_code == 200 + ids = {p["id"] for p in resp.json()} + assert mine.id in ids + assert foreign_podcast.id not in ids diff --git a/surfsense_backend/tests/integration/podcasts/test_streaming.py b/surfsense_backend/tests/integration/podcasts/test_streaming.py new file mode 100644 index 000000000..891c53005 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_streaming.py @@ -0,0 +1,41 @@ +"""Streaming a podcast's rendered audio over HTTP. + +A ready podcast streams its bytes from the storage backend; a podcast with no +stored audio returns 404. Storage is an in-memory backend (the object store is a +system boundary). +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import PodcastStatus + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_stream_serves_stored_audio( + client, db_search_space, make_podcast, fake_storage +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + fake_storage.objects["podcasts/audio.mp3"] = b"the-audio" + + resp = await client.get(f"{BASE}/{podcast.id}/stream") + + assert resp.status_code == 200 + assert resp.headers["content-type"] == "audio/mpeg" + assert resp.content == b"the-audio" + + +async def test_stream_404_when_no_audio(client, db_search_space, make_podcast): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW + ) + + resp = await client.get(f"{BASE}/{podcast.id}/stream") + + assert resp.status_code == 404 diff --git a/surfsense_backend/tests/integration/podcasts/test_task_failure.py b/surfsense_backend/tests/integration/podcasts/test_task_failure.py new file mode 100644 index 000000000..43212f58f --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_task_failure.py @@ -0,0 +1,45 @@ +"""The task failure safety net (``mark_failed``) against a real database. + +When a task body raises, ``mark_failed`` records the reason on the row. Its +contract has two halves worth securing: a still-running podcast moves to FAILED +with the reason, while one that already reached a terminal state is left exactly +as it was rather than forced. A missing row is a no-op, never a crash. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import PodcastStatus +from app.podcasts.tasks import runtime + +pytestmark = pytest.mark.integration + + +async def test_marking_failed_records_the_reason_on_a_running_podcast( + db_search_space, make_podcast, bind_task_session +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING + ) + + await runtime.mark_failed(podcast.id, "tts provider unavailable") + + assert podcast.status == PodcastStatus.FAILED + assert podcast.error == "tts provider unavailable" + + +async def test_marking_failed_leaves_an_already_terminal_podcast_untouched( + db_search_space, make_podcast, bind_task_session +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + + await runtime.mark_failed(podcast.id, "too late") + + assert podcast.status == PodcastStatus.READY + + +async def test_marking_a_missing_podcast_failed_is_a_no_op(bind_task_session): + await runtime.mark_failed(987654321, "gone") # must not raise diff --git a/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py b/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py new file mode 100644 index 000000000..a8707f8db --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py @@ -0,0 +1,81 @@ +"""The transcript go/no-go gate: approve to render, or regenerate to redraft. + +From ``awaiting_review`` the user either approves (start rendering) or regenerates +(redraft). These pin the resulting state, the Celery task each enqueues, and the +HTTP codes for acting from the wrong state (409) or without a transcript (422). +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import Podcast, PodcastStatus + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_approve_transcript_starts_rendering_and_enqueues_render( + client, db_search_space, make_podcast, captured_tasks +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW + ) + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") + + assert resp.status_code == 200 + assert resp.json()["status"] == "rendering" + assert captured_tasks.render == [((podcast.id,), {})] + assert captured_tasks.draft == [] + + +async def test_regenerate_returns_to_drafting_and_enqueues_draft( + client, db_search_space, make_podcast, captured_tasks +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW + ) + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 200 + assert resp.json()["status"] == "drafting" + assert captured_tasks.draft == [((podcast.id, db_search_space.id), {})] + assert captured_tasks.render == [] + + +async def test_approve_transcript_from_terminal_state_is_rejected( + client, db_search_space, make_podcast, captured_tasks +): + # A ready podcast still has its transcript, so the precondition passes and + # the disallowed terminal->rendering transition is what surfaces (409). + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") + + assert resp.status_code == 409 + assert captured_tasks.render == [] + + +async def test_approve_without_transcript_is_unprocessable( + client, db_session, db_search_space, captured_tasks +): + # An anomalous awaiting_review row with no transcript exercises the route's + # precondition->422 mapping (the service refuses to render without one). + podcast = Podcast( + title="No transcript", + search_space_id=db_search_space.id, + status=PodcastStatus.AWAITING_REVIEW, + spec_version=1, + ) + db_session.add(podcast) + await db_session.flush() + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") + + assert resp.status_code == 422 + assert captured_tasks.render == [] diff --git a/surfsense_backend/tests/integration/podcasts/test_voices.py b/surfsense_backend/tests/integration/podcasts/test_voices.py new file mode 100644 index 000000000..688ddad56 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_voices.py @@ -0,0 +1,31 @@ +"""GET /podcasts/voices: the active provider's catalog, or 503 if unconfigured. + +The brief UI needs the voices the configured TTS provider offers; with no +provider configured there is nothing to choose from, which is a 503 rather than +an empty list. +""" + +import pytest + +from app.config import config as app_config + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_voices_returns_the_active_providers_catalog(client): + resp = await client.get(f"{BASE}/voices") + + assert resp.status_code == 200 + voices = resp.json() + assert voices # openai/tts-1 offers voices + assert {"voice_id", "display_name", "language", "gender"} <= voices[0].keys() + + +async def test_voices_503_when_no_tts_configured(client, monkeypatch): + monkeypatch.setattr(app_config, "TTS_SERVICE", "") + + resp = await client.get(f"{BASE}/voices") + + assert resp.status_code == 503 diff --git a/surfsense_backend/tests/unit/podcasts/conftest.py b/surfsense_backend/tests/unit/podcasts/conftest.py index 446982904..a3836f689 100644 --- a/surfsense_backend/tests/unit/podcasts/conftest.py +++ b/surfsense_backend/tests/unit/podcasts/conftest.py @@ -1,10 +1,10 @@ """Shared builders for podcast unit tests. -These tests exercise the podcast domain through its public interfaces. The only -test double is a minimal stand-in for the SQLAlchemy ``AsyncSession`` — a real -system boundary — so the service's own repository and state machine run for -real. Briefs and transcripts are built with valid factories so each test states -just the fields it cares about. +These tests exercise pure logic through public interfaces with no test doubles: +the brief and transcript factories build valid aggregates so each test states +only the fields it cares about. Stateful, persistence-backed paths (the lifecycle +service, the Celery task bodies) are covered by the integration suite against a +real database. """ from __future__ import annotations @@ -22,76 +22,6 @@ from app.podcasts.schemas import ( ) -class FakeAsyncSession: - """A no-op stand-in for ``AsyncSession`` at the persistence boundary. - - The service flushes to assign state within a unit of work; in a unit test - there is no database, so ``add``/``flush`` simply do nothing. Behavior is - observed through the returned aggregate, never through this double. - """ - - def add(self, _obj: object) -> None: - return None - - async def flush(self) -> None: - return None - - -class FakeCeleryDbSession(FakeAsyncSession): - """An async-context session double for Celery task bodies. - - Task bodies open ``get_celery_session_maker()()`` as an async context, - ``get`` the row, then ``commit``. This holds one preloaded podcast and - records whether the body committed, so tests assert on the row's final - state — not on the calls made to get there. - """ - - def __init__(self, podcast: object | None = None) -> None: - self._podcast = podcast - self.committed = False - - async def get(self, _model: object, _id: object) -> object | None: - return self._podcast - - async def commit(self) -> None: - self.committed = True - - async def __aenter__(self) -> FakeCeleryDbSession: - return self - - async def __aexit__(self, *_exc: object) -> None: - return None - - -@pytest.fixture -def fake_session() -> FakeAsyncSession: - return FakeAsyncSession() - - -@pytest.fixture -def make_celery_session(): - """Factory for a Celery-style session double holding one podcast.""" - - def _make(podcast: object | None = None) -> FakeCeleryDbSession: - return FakeCeleryDbSession(podcast) - - return _make - - -@pytest.fixture -def session_maker_for(): - """Build a ``get_celery_session_maker`` replacement bound to one session. - - ``get_celery_session_maker()()`` must yield the session, so the replacement - is a zero-arg callable returning a maker that returns the session. - """ - - def _make(session: object): - return lambda: (lambda: session) - - return _make - - @pytest.fixture def make_spec(): """Factory for a valid :class:`PodcastSpec`; override only what matters.""" diff --git a/surfsense_backend/tests/unit/podcasts/test_api_schemas.py b/surfsense_backend/tests/unit/podcasts/test_api_schemas.py index 8203d7fdb..b27d1ead5 100644 --- a/surfsense_backend/tests/unit/podcasts/test_api_schemas.py +++ b/surfsense_backend/tests/unit/podcasts/test_api_schemas.py @@ -1,10 +1,9 @@ """The API read model the frontend renders from. -``PodcastDetail.of`` is the contract the detail view and action responses -depend on: it exposes the deserialized brief and transcript and a simple -``has_audio`` flag the client can't derive from the published Zero columns. -These tests drive real podcasts through the service, then assert the read model -reflects their state. +``PodcastDetail.of`` maps a stored podcast row to the detail view and action +responses: it exposes the deserialized brief and transcript and a simple +``has_audio`` flag the client can't derive from the published Zero columns. Each +test builds a row in one lifecycle shape and asserts the mapping reflects it. """ from __future__ import annotations @@ -14,28 +13,27 @@ from datetime import UTC, datetime import pytest from app.podcasts.api.schemas import PodcastDetail -from app.podcasts.persistence import PodcastStatus -from app.podcasts.service import PodcastService +from app.podcasts.persistence import Podcast, PodcastStatus pytestmark = pytest.mark.unit -def _stamp(podcast): - """Give a transient row the id and created_at a persisted one would have. - - A detail response is only ever built from a saved podcast; without a real - database, we stand in the primary key and timestamp the DB would assign. - """ +def _podcast(*, status: PodcastStatus = PodcastStatus.PENDING, **columns) -> Podcast: + """A persisted-looking row: the id and created_at a saved podcast would carry.""" + podcast = Podcast( + title="Episode", + search_space_id=3, + status=status, + spec_version=1, + **columns, + ) podcast.id = 1 podcast.created_at = datetime.now(UTC) return podcast -async def test_a_fresh_podcast_exposes_no_brief_transcript_or_audio(fake_session): - service = PodcastService(fake_session) - podcast = _stamp(await service.create(title="New", search_space_id=3)) - - detail = PodcastDetail.of(podcast) +def test_a_fresh_podcast_exposes_no_brief_transcript_or_audio(): + detail = PodcastDetail.of(_podcast()) assert detail.status == PodcastStatus.PENDING assert detail.spec is None @@ -43,12 +41,11 @@ async def test_a_fresh_podcast_exposes_no_brief_transcript_or_audio(fake_session assert detail.has_audio is False -async def test_an_awaiting_brief_podcast_exposes_the_deserialized_brief( - fake_session, make_spec -): - service = PodcastService(fake_session) - podcast = _stamp(await service.create(title="Brief", search_space_id=3)) - await service.attach_brief(podcast, make_spec(language="fr")) +def test_an_awaiting_brief_podcast_exposes_the_deserialized_brief(make_spec): + podcast = _podcast( + status=PodcastStatus.AWAITING_BRIEF, + spec=make_spec(language="fr").model_dump(mode="json"), + ) detail = PodcastDetail.of(podcast) @@ -56,17 +53,14 @@ async def test_an_awaiting_brief_podcast_exposes_the_deserialized_brief( assert detail.spec.language == "fr" -async def test_a_ready_podcast_reports_available_audio( - fake_session, make_spec, make_transcript -): - service = PodcastService(fake_session) - podcast = _stamp(await service.create(title="Done", search_space_id=3)) - await service.attach_brief(podcast, make_spec()) - await service.begin_drafting(podcast) - await service.attach_transcript(podcast, make_transcript()) - await service.approve(podcast) - await service.attach_audio( - podcast, storage_backend="local", storage_key="k", duration_seconds=120 +def test_a_ready_podcast_reports_available_audio(make_spec, make_transcript): + podcast = _podcast( + status=PodcastStatus.READY, + spec=make_spec().model_dump(mode="json"), + podcast_transcript=make_transcript().model_dump(mode="json"), + storage_backend="local", + storage_key="k", + duration_seconds=120, ) detail = PodcastDetail.of(podcast) diff --git a/surfsense_backend/tests/unit/podcasts/test_draft_task.py b/surfsense_backend/tests/unit/podcasts/test_draft_task.py deleted file mode 100644 index d04692ae4..000000000 --- a/surfsense_backend/tests/unit/podcasts/test_draft_task.py +++ /dev/null @@ -1,135 +0,0 @@ -"""The transcript-drafting task's billing gate. - -Drafting is the expensive LLM step, so it runs under ``billable_call``. The -behavior that protects users' money: if billing denies the reservation the -podcast must end FAILED with no transcript, and only when billing succeeds does -a drafted transcript open the review gate. These tests fake the true -boundaries — the database, the billing system, and the generation graph — and -assert the podcast's resulting state, never how those boundaries were called. -""" - -from __future__ import annotations - -from contextlib import asynccontextmanager -from types import SimpleNamespace -from uuid import uuid4 - -import pytest - -from app.podcasts.persistence import Podcast, PodcastStatus -from app.podcasts.service import read_transcript -from app.podcasts.tasks import draft -from app.services.billable_calls import ( - BillingSettlementError, - QuotaInsufficientError, -) - -pytestmark = pytest.mark.unit - - -def _drafting_podcast(make_spec) -> Podcast: - """A podcast already at DRAFTING with an approved brief, as the API leaves it.""" - podcast = Podcast( - title="Episode", - search_space_id=42, - status=PodcastStatus.DRAFTING, - spec_version=1, - ) - podcast.id = 1 - podcast.thread_id = None - podcast.spec = make_spec().model_dump(mode="json") - podcast.source_content = "Some source material to discuss." - return podcast - - -def _wire_boundaries(monkeypatch, *, session, billable_call, transcript=None): - """Replace every external dependency the task body reaches for.""" - monkeypatch.setattr(draft, "get_celery_session_maker", lambda: (lambda: session)) - - async def _resolver(_session, _search_space_id, *, thread_id=None): - return uuid4(), "free", "openrouter/model" - - monkeypatch.setattr( - draft, "_resolve_agent_billing_for_search_space", _resolver - ) - monkeypatch.setattr(draft, "billable_call", billable_call) - - async def _ainvoke(_state, config=None): - return {"transcript": transcript} - - monkeypatch.setattr(draft, "transcript_graph", SimpleNamespace(ainvoke=_ainvoke)) - - -async def test_successful_billing_opens_the_review_gate_with_a_transcript( - monkeypatch, make_celery_session, make_spec, make_transcript -): - podcast = _drafting_podcast(make_spec) - session = make_celery_session(podcast) - - @asynccontextmanager - async def _ok(**_kwargs): - yield SimpleNamespace() - - _wire_boundaries( - monkeypatch, session=session, billable_call=_ok, transcript=make_transcript() - ) - - result = await draft._draft_transcript(podcast_id=1, search_space_id=42) - - assert podcast.status == PodcastStatus.AWAITING_REVIEW - assert read_transcript(podcast) is not None - assert result["status"] == "awaiting_review" - - -async def test_quota_denial_fails_the_podcast_without_a_transcript( - monkeypatch, make_celery_session, make_spec -): - """A denied reservation must not leave a half-drafted, billable mess.""" - podcast = _drafting_podcast(make_spec) - session = make_celery_session(podcast) - - @asynccontextmanager - async def _deny(**_kwargs): - raise QuotaInsufficientError( - usage_type="podcast_generation", - used_micros=5_000_000, - limit_micros=5_000_000, - remaining_micros=0, - ) - yield # pragma: no cover - unreachable, satisfies the CM protocol - - _wire_boundaries(monkeypatch, session=session, billable_call=_deny) - - result = await draft._draft_transcript(podcast_id=1, search_space_id=42) - - assert podcast.status == PodcastStatus.FAILED - assert read_transcript(podcast) is None - assert result["reason"] == "quota" - - -async def test_billing_settlement_failure_fails_the_podcast( - monkeypatch, make_celery_session, make_spec, make_transcript -): - podcast = _drafting_podcast(make_spec) - session = make_celery_session(podcast) - - @asynccontextmanager - async def _settlement_fails(**_kwargs): - yield SimpleNamespace() - raise BillingSettlementError( - usage_type="podcast_generation", - user_id=uuid4(), - cause=RuntimeError("finalize failed"), - ) - - _wire_boundaries( - monkeypatch, - session=session, - billable_call=_settlement_fails, - transcript=make_transcript(), - ) - - result = await draft._draft_transcript(podcast_id=1, search_space_id=42) - - assert podcast.status == PodcastStatus.FAILED - assert result["reason"] == "billing" diff --git a/surfsense_backend/tests/unit/podcasts/test_lifecycle.py b/surfsense_backend/tests/unit/podcasts/test_lifecycle.py deleted file mode 100644 index 5f61c7562..000000000 --- a/surfsense_backend/tests/unit/podcasts/test_lifecycle.py +++ /dev/null @@ -1,163 +0,0 @@ -"""The podcast lifecycle: the guarantees the rest of the system relies on. - -These tests drive the aggregate through :class:`PodcastService`'s public -methods and observe the resulting status and stored brief/transcript — the -domain's contract. They say nothing about how the service stores or flushes, -so they survive any refactor that preserves the lifecycle. -""" - -from __future__ import annotations - -import pytest - -from app.podcasts.persistence import PodcastStatus -from app.podcasts.service import ( - InvalidTransition, - PodcastService, - PreconditionFailed, - SpecConflict, - read_spec, - read_transcript, -) - -pytestmark = pytest.mark.unit - - -async def test_a_podcast_progresses_from_creation_to_ready( - fake_session, make_spec, make_transcript -): - """The full happy path: create → brief → draft → review → render → ready.""" - service = PodcastService(fake_session) - - podcast = await service.create(title="Episode 1", search_space_id=7) - assert podcast.status == PodcastStatus.PENDING - - spec = make_spec() - await service.attach_brief(podcast, spec) - assert podcast.status == PodcastStatus.AWAITING_BRIEF - assert read_spec(podcast) == spec - - await service.begin_drafting(podcast) - assert podcast.status == PodcastStatus.DRAFTING - - transcript = make_transcript() - await service.attach_transcript(podcast, transcript) - assert podcast.status == PodcastStatus.AWAITING_REVIEW - assert read_transcript(podcast) == transcript - - await service.approve(podcast) - assert podcast.status == PodcastStatus.RENDERING - - await service.attach_audio( - podcast, storage_backend="local", storage_key="k", duration_seconds=42 - ) - assert podcast.status == PodcastStatus.READY - assert podcast.duration_seconds == 42 - - -async def test_drafting_requires_an_approved_brief(fake_session): - """A brief must exist before drafting can begin.""" - service = PodcastService(fake_session) - podcast = await service.create(title="No brief", search_space_id=1) - - with pytest.raises(PreconditionFailed): - await service.begin_drafting(podcast) - - -async def test_rendering_requires_a_transcript(fake_session, make_spec): - """Approval to render is refused when no transcript has been drafted.""" - service = PodcastService(fake_session) - podcast = await service.create(title="No transcript", search_space_id=1) - await service.attach_brief(podcast, make_spec()) - await service.begin_drafting(podcast) - - with pytest.raises(PreconditionFailed): - await service.approve(podcast) - - -async def test_regenerate_returns_a_reviewed_transcript_to_drafting( - fake_session, make_spec, make_transcript -): - """At the go/no-go gate, rejecting sends the podcast back to drafting.""" - service = PodcastService(fake_session) - podcast = await service.create(title="Redo", search_space_id=1) - await service.attach_brief(podcast, make_spec()) - await service.begin_drafting(podcast) - await service.attach_transcript(podcast, make_transcript()) - - await service.regenerate(podcast) - - assert podcast.status == PodcastStatus.DRAFTING - - -async def test_brief_can_be_edited_at_the_gate_and_bumps_its_version( - fake_session, make_spec -): - """Editing the brief while awaiting review records it and advances version.""" - service = PodcastService(fake_session) - podcast = await service.create(title="Editable", search_space_id=1) - await service.attach_brief(podcast, make_spec(language="en")) - starting_version = podcast.spec_version - - await service.update_spec(podcast, make_spec(language="fr"), starting_version) - - assert read_spec(podcast).language == "fr" - assert podcast.spec_version == starting_version + 1 - - -async def test_editing_a_brief_with_a_stale_version_conflicts( - fake_session, make_spec -): - """A concurrent edit racing on a stale version is rejected, not silently lost.""" - service = PodcastService(fake_session) - podcast = await service.create(title="Raced", search_space_id=1) - await service.attach_brief(podcast, make_spec()) - current = podcast.spec_version - - with pytest.raises(SpecConflict): - await service.update_spec(podcast, make_spec(language="es"), current - 1) - - -async def test_brief_cannot_be_edited_after_the_gate_closes( - fake_session, make_spec -): - """Once drafting starts, the brief is settled and edits are refused.""" - service = PodcastService(fake_session) - podcast = await service.create(title="Locked", search_space_id=1) - await service.attach_brief(podcast, make_spec()) - await service.begin_drafting(podcast) - - with pytest.raises(InvalidTransition): - await service.update_spec(podcast, make_spec(language="es"), podcast.spec_version) - - -async def test_a_podcast_can_be_cancelled_while_in_flight(fake_session, make_spec): - """Cancellation is available from a non-terminal state.""" - service = PodcastService(fake_session) - podcast = await service.create(title="Abort", search_space_id=1) - await service.attach_brief(podcast, make_spec()) - - await service.cancel(podcast) - - assert podcast.status == PodcastStatus.CANCELLED - - -async def test_failure_records_a_reason(fake_session): - """Failing a podcast captures a human-readable reason.""" - service = PodcastService(fake_session) - podcast = await service.create(title="Boom", search_space_id=1) - - await service.fail(podcast, "tts provider unavailable") - - assert podcast.status == PodcastStatus.FAILED - assert podcast.error == "tts provider unavailable" - - -async def test_terminal_podcasts_reject_further_transitions(fake_session): - """A finished podcast cannot be cancelled or otherwise moved.""" - service = PodcastService(fake_session) - podcast = await service.create(title="Done", search_space_id=1) - await service.cancel(podcast) - - with pytest.raises(InvalidTransition): - await service.fail(podcast, "too late") diff --git a/surfsense_backend/tests/unit/podcasts/test_runtime.py b/surfsense_backend/tests/unit/podcasts/test_runtime.py deleted file mode 100644 index 91c6ada77..000000000 --- a/surfsense_backend/tests/unit/podcasts/test_runtime.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Failure recording shared by the podcast tasks. - -When a task body raises, ``mark_failed`` is the safety net that records the -reason on the row. Its contract has two halves worth securing: a still-running -podcast is moved to FAILED with the reason, and a podcast that already reached a -terminal state is left exactly as it was rather than forced. Only the database -(a real boundary) is doubled; the lifecycle service runs for real. -""" - -from __future__ import annotations - -import pytest - -from app.podcasts.persistence import Podcast, PodcastStatus -from app.podcasts.tasks import runtime - -pytestmark = pytest.mark.unit - - -def _podcast(status: PodcastStatus) -> Podcast: - podcast = Podcast(title="Episode", search_space_id=1, status=status, spec_version=1) - podcast.id = 1 - return podcast - - -async def test_marking_failed_records_the_reason_on_a_running_podcast( - monkeypatch, session_maker_for, make_celery_session -): - podcast = _podcast(PodcastStatus.DRAFTING) - session = make_celery_session(podcast) - monkeypatch.setattr(runtime, "get_celery_session_maker", session_maker_for(session)) - - await runtime.mark_failed(1, "tts provider unavailable") - - assert podcast.status == PodcastStatus.FAILED - assert podcast.error == "tts provider unavailable" - - -async def test_marking_failed_leaves_an_already_terminal_podcast_untouched( - monkeypatch, session_maker_for, make_celery_session -): - podcast = _podcast(PodcastStatus.CANCELLED) - session = make_celery_session(podcast) - monkeypatch.setattr(runtime, "get_celery_session_maker", session_maker_for(session)) - - await runtime.mark_failed(1, "too late") - - assert podcast.status == PodcastStatus.CANCELLED - - -async def test_marking_a_missing_podcast_failed_is_a_no_op( - monkeypatch, session_maker_for, make_celery_session -): - session = make_celery_session(None) - monkeypatch.setattr(runtime, "get_celery_session_maker", session_maker_for(session)) - - await runtime.mark_failed(999, "gone") # must not raise From 64b36f2622f41cba1c52e636636edabe1da2998a Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 11 Jun 2026 10:04:51 +0200 Subject: [PATCH 39/50] feat(podcasts): add frontend contracts and lifecycle api service --- .../contracts/types/podcast.types.ts | 120 ++++++++++++++++++ .../lib/apis/podcasts-api.service.ts | 58 +++++++++ 2 files changed, 178 insertions(+) create mode 100644 surfsense_web/contracts/types/podcast.types.ts create mode 100644 surfsense_web/lib/apis/podcasts-api.service.ts diff --git a/surfsense_web/contracts/types/podcast.types.ts b/surfsense_web/contracts/types/podcast.types.ts new file mode 100644 index 000000000..13bf530e7 --- /dev/null +++ b/surfsense_web/contracts/types/podcast.types.ts @@ -0,0 +1,120 @@ +import { z } from "zod"; + +// ============================================================================= +// Lifecycle — mirror app/podcasts/persistence/enums/podcast_status.py +// ============================================================================= + +export const podcastStatus = z.enum([ + "pending", + "awaiting_brief", + "drafting", + "awaiting_review", + "rendering", + "ready", + "failed", + "cancelled", +]); +export type PodcastStatus = z.infer; + +/** States waiting on user input before the lifecycle can proceed. */ +export const GATE_STATUSES: ReadonlySet = new Set([ + "awaiting_brief", + "awaiting_review", +]); + +/** States from which no further transition is possible. */ +export const TERMINAL_STATUSES: ReadonlySet = new Set([ + "ready", + "failed", + "cancelled", +]); + +// ============================================================================= +// Brief (spec) — mirror app/podcasts/schemas/spec.py +// ============================================================================= + +export const speakerRole = z.enum(["host", "cohost", "guest", "expert", "narrator"]); +export type SpeakerRole = z.infer; + +export const podcastStyle = z.enum([ + "conversational", + "interview", + "debate", + "monologue", + "narrative", +]); +export type PodcastStyle = z.infer; + +export const MAX_SPEAKERS = 6; + +export const speakerSpec = z.object({ + slot: z.number().int().min(0), + name: z.string().min(1).max(120), + role: speakerRole, + voice_id: z.string().min(1), +}); +export type SpeakerSpec = z.infer; + +export const durationTarget = z.object({ + min_minutes: z.number().int().min(1), + max_minutes: z.number().int().min(1), +}); +export type DurationTarget = z.infer; + +export const podcastSpec = z.object({ + language: z.string().min(2), + style: podcastStyle, + speakers: z.array(speakerSpec).min(1).max(MAX_SPEAKERS), + duration: durationTarget, + focus: z.string().max(2000).nullable().optional(), +}); +export type PodcastSpec = z.infer; + +// ============================================================================= +// Transcript — mirror app/podcasts/schemas/transcript.py +// ============================================================================= + +export const transcriptTurn = z.object({ + speaker: z.number().int().min(0), + text: z.string().min(1), +}); +export type TranscriptTurn = z.infer; + +export const transcript = z.object({ + turns: z.array(transcriptTurn).min(1), +}); +export type Transcript = z.infer; + +// ============================================================================= +// API shapes — mirror app/podcasts/api/schemas.py +// ============================================================================= + +export const voiceOption = z.object({ + voice_id: z.string(), + display_name: z.string(), + language: z.string(), + gender: z.string(), +}); +export type VoiceOption = z.infer; + +export const updateSpecRequest = z.object({ + spec: podcastSpec, + expected_version: z.number().int().min(1), +}); +export type UpdateSpecRequest = z.infer; + +export const podcastDetail = z.object({ + id: z.number(), + title: z.string(), + status: podcastStatus, + spec_version: z.number(), + spec: podcastSpec.nullable(), + transcript: transcript.nullable(), + has_audio: z.boolean(), + duration_seconds: z.number().nullable(), + error: z.string().nullable(), + created_at: z.string(), + search_space_id: z.number(), + thread_id: z.number().nullable(), +}); +export type PodcastDetail = z.infer; diff --git a/surfsense_web/lib/apis/podcasts-api.service.ts b/surfsense_web/lib/apis/podcasts-api.service.ts new file mode 100644 index 000000000..f47269654 --- /dev/null +++ b/surfsense_web/lib/apis/podcasts-api.service.ts @@ -0,0 +1,58 @@ +import { z } from "zod"; +import { + type PodcastSpec, + podcastDetail, + updateSpecRequest, + voiceOption, +} from "@/contracts/types/podcast.types"; +import { ValidationError } from "../error"; +import { baseApiService } from "./base-api.service"; + +const BASE = "/api/v1/podcasts"; + +const voiceOptionList = z.array(voiceOption); + +class PodcastsApiService { + // Full state including the deserialized brief and transcript; thin lifecycle + // fields (status, spec, spec_version) also arrive live via Zero. + getDetail = async (podcastId: number) => { + return baseApiService.get(`${BASE}/${podcastId}`, podcastDetail); + }; + + // Guarded by the version the caller last saw; the backend answers 409 when + // the brief changed underneath them. + updateSpec = async (podcastId: number, spec: PodcastSpec, expectedVersion: number) => { + const parsed = updateSpecRequest.safeParse({ spec, expected_version: expectedVersion }); + if (!parsed.success) { + throw new ValidationError( + `Invalid request: ${parsed.error.issues.map((i) => i.message).join(", ")}` + ); + } + return baseApiService.patch(`${BASE}/${podcastId}/spec`, podcastDetail, { + body: parsed.data, + }); + }; + + approveBrief = async (podcastId: number) => { + return baseApiService.post(`${BASE}/${podcastId}/brief/approve`, podcastDetail); + }; + + approveTranscript = async (podcastId: number) => { + return baseApiService.post(`${BASE}/${podcastId}/transcript/approve`, podcastDetail); + }; + + regenerateTranscript = async (podcastId: number) => { + return baseApiService.post(`${BASE}/${podcastId}/transcript/regenerate`, podcastDetail); + }; + + cancel = async (podcastId: number) => { + return baseApiService.post(`${BASE}/${podcastId}/cancel`, podcastDetail); + }; + + listVoices = async (language?: string) => { + const qs = language ? `?${new URLSearchParams({ language })}` : ""; + return baseApiService.get(`${BASE}/voices${qs}`, voiceOptionList); + }; +} + +export const podcastsApiService = new PodcastsApiService(); From a3d1fafb0bf51cca87b38b616c5d8df8d3ed46cf Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 11 Jun 2026 10:04:51 +0200 Subject: [PATCH 40/50] feat(podcasts): add zero-driven live podcast hook --- surfsense_web/hooks/use-podcast-live.ts | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 surfsense_web/hooks/use-podcast-live.ts diff --git a/surfsense_web/hooks/use-podcast-live.ts b/surfsense_web/hooks/use-podcast-live.ts new file mode 100644 index 000000000..e0a30e05b --- /dev/null +++ b/surfsense_web/hooks/use-podcast-live.ts @@ -0,0 +1,59 @@ +"use client"; + +import { useQuery } from "@rocicorp/zero/react"; +import { useMemo } from "react"; +import { type PodcastSpec, type PodcastStatus, podcastSpec } from "@/contracts/types/podcast.types"; +import { queries } from "@/zero/queries"; + +/** + * Thin live row sourced from Zero's `podcasts` publication. Drives the + * lifecycle UI by push (no polling); heavy fields (transcript, audio) stay on + * REST and are fetched lazily when a gate or the player needs them. + */ +export interface LivePodcast { + id: number; + title: string; + status: PodcastStatus; + spec: PodcastSpec | null; + specVersion: number; + durationSeconds: number | null; + error: string | null; + searchSpaceId: number; + threadId: number | null; +} + +interface UsePodcastLiveResult { + podcast: LivePodcast | undefined; + isLoading: boolean; +} + +export function usePodcastLive(podcastId: number | undefined): UsePodcastLiveResult { + const [row, result] = useQuery(queries.podcasts.byId({ podcastId: podcastId ?? -1 })); + + const podcast = useMemo(() => { + if (!podcastId || !row) return undefined; + return { + id: row.id, + title: row.title, + status: row.status as PodcastStatus, + spec: parseSpec(row.spec), + specVersion: row.specVersion, + durationSeconds: row.durationSeconds ?? null, + error: row.error ?? null, + searchSpaceId: row.searchSpaceId, + threadId: row.threadId ?? null, + }; + }, [podcastId, row]); + + // Pre-hydration window: no row AND Zero hasn't confirmed completeness yet. + const isLoading = !!podcastId && !row && result.type !== "complete"; + + return { podcast, isLoading }; +} + +/** The JSONB column holds the snake_case spec; reject anything malformed. */ +function parseSpec(raw: unknown): PodcastSpec | null { + if (raw == null) return null; + const parsed = podcastSpec.safeParse(raw); + return parsed.success ? parsed.data : null; +} From 6f6c0564044c5476ea10d54f7ffe6282b32425e2 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 11 Jun 2026 10:04:51 +0200 Subject: [PATCH 41/50] feat(podcasts): add lifecycle tool ui with brief and transcript gates --- .../tool-ui/podcast/brief-review.tsx | 396 ++++++++++++++++++ .../tool-ui/podcast/generate-podcast.tsx | 194 +++++++++ .../components/tool-ui/podcast/index.ts | 1 + .../components/tool-ui/podcast/player.tsx | 203 +++++++++ .../tool-ui/podcast/review-sheet.tsx | 67 +++ .../components/tool-ui/podcast/schema.ts | 33 ++ .../tool-ui/podcast/transcript-review.tsx | 118 ++++++ 7 files changed, 1012 insertions(+) create mode 100644 surfsense_web/components/tool-ui/podcast/brief-review.tsx create mode 100644 surfsense_web/components/tool-ui/podcast/generate-podcast.tsx create mode 100644 surfsense_web/components/tool-ui/podcast/index.ts create mode 100644 surfsense_web/components/tool-ui/podcast/player.tsx create mode 100644 surfsense_web/components/tool-ui/podcast/review-sheet.tsx create mode 100644 surfsense_web/components/tool-ui/podcast/schema.ts create mode 100644 surfsense_web/components/tool-ui/podcast/transcript-review.tsx diff --git a/surfsense_web/components/tool-ui/podcast/brief-review.tsx b/surfsense_web/components/tool-ui/podcast/brief-review.tsx new file mode 100644 index 000000000..9679fd8fa --- /dev/null +++ b/surfsense_web/components/tool-ui/podcast/brief-review.tsx @@ -0,0 +1,396 @@ +"use client"; + +import { Loader2, Plus, Trash2 } from "lucide-react"; +import { useEffect, useMemo, useState } from "react"; +import { toast } from "sonner"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Textarea } from "@/components/ui/textarea"; +import { + MAX_SPEAKERS, + type PodcastSpec, + type PodcastStyle, + podcastStyle, + type SpeakerRole, + speakerRole, + type VoiceOption, +} from "@/contracts/types/podcast.types"; +import type { LivePodcast } from "@/hooks/use-podcast-live"; +import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; +import { AppError } from "@/lib/error"; + +// A "*" voice speaks whatever language the text is in (mirrors the backend +// catalog's ANY_LANGUAGE sentinel). +const ANY_LANGUAGE = "*"; + +function speaks(voice: VoiceOption, language: string): boolean { + if (voice.language === ANY_LANGUAGE) return true; + return primary(voice.language) === primary(language); +} + +function primary(language: string): string { + return language.split("-", 1)[0].trim().toLowerCase(); +} + +interface BriefReviewProps { + podcast: LivePodcast; + spec: PodcastSpec; + onApproved: () => void; +} + +/** + * Gate 1: the pre-filled brief as a near-confirmation. One-click approve is + * the easy path; every field stays overridable and saves through the + * version-guarded PATCH so concurrent edits surface instead of clobbering. + */ +export function BriefReview({ podcast, spec, onApproved }: BriefReviewProps) { + const [draft, setDraft] = useState(spec); + const [voices, setVoices] = useState(null); + const [isSubmitting, setIsSubmitting] = useState(false); + + // A pushed spec change (saved edit or concurrent editor) resets the form to + // the authoritative version. + // biome-ignore lint/correctness/useExhaustiveDependencies: reset only when the server version moves + useEffect(() => { + setDraft(spec); + }, [podcast.specVersion]); + + useEffect(() => { + let cancelled = false; + podcastsApiService + .listVoices() + .then((catalog) => { + if (!cancelled) setVoices(catalog); + }) + .catch(() => { + if (!cancelled) setVoices([]); + }); + return () => { + cancelled = true; + }; + }, []); + + const languages = useMemo(() => { + const tags = new Set(); + for (const voice of voices ?? []) { + if (voice.language !== ANY_LANGUAGE) tags.add(voice.language); + } + tags.add(draft.language); + return [...tags].sort(); + }, [voices, draft.language]); + + const voicesForLanguage = useMemo( + () => (voices ?? []).filter((voice) => speaks(voice, draft.language)), + [voices, draft.language] + ); + + const isDirty = useMemo(() => JSON.stringify(draft) !== JSON.stringify(spec), [draft, spec]); + + const setLanguage = (language: string) => { + setDraft((current) => { + const candidates = (voices ?? []).filter((voice) => speaks(voice, language)); + // Voices that can't render the new language are remapped so the saved + // spec never pairs a language with an incompatible voice. + const speakers = current.speakers.map((speaker, index) => { + const stillValid = candidates.some((voice) => voice.voice_id === speaker.voice_id); + const fallback = candidates[index % Math.max(candidates.length, 1)]; + return stillValid || !fallback ? speaker : { ...speaker, voice_id: fallback.voice_id }; + }); + return { ...current, language, speakers }; + }); + }; + + const updateSpeaker = (slot: number, change: Partial) => { + setDraft((current) => ({ + ...current, + speakers: current.speakers.map((speaker) => + speaker.slot === slot ? { ...speaker, ...change } : speaker + ), + })); + }; + + const addSpeaker = () => { + setDraft((current) => { + if (current.speakers.length >= MAX_SPEAKERS) return current; + const slot = Math.max(...current.speakers.map((s) => s.slot)) + 1; + const voice = + voicesForLanguage[current.speakers.length % Math.max(voicesForLanguage.length, 1)]; + return { + ...current, + speakers: [ + ...current.speakers, + { + slot, + name: `Speaker ${current.speakers.length + 1}`, + role: "guest" as SpeakerRole, + voice_id: voice?.voice_id ?? current.speakers[0].voice_id, + }, + ], + }; + }); + }; + + const removeSpeaker = (slot: number) => { + setDraft((current) => { + if (current.speakers.length <= 1) return current; + return { + ...current, + speakers: current.speakers.filter((speaker) => speaker.slot !== slot), + }; + }); + }; + + const saveIfDirty = async (): Promise => { + if (!isDirty) return true; + try { + await podcastsApiService.updateSpec(podcast.id, draft, podcast.specVersion); + return true; + } catch (error) { + if (error instanceof AppError && error.status === 409) { + toast.warning("The brief changed elsewhere — reloaded the latest version."); + setDraft(spec); + } else { + toast.error(error instanceof Error ? error.message : "Failed to save the brief"); + } + return false; + } + }; + + const handleSave = async () => { + setIsSubmitting(true); + try { + if (await saveIfDirty()) { + toast.success("Brief saved."); + } + } finally { + setIsSubmitting(false); + } + }; + + const handleApprove = async () => { + setIsSubmitting(true); + try { + if (!(await saveIfDirty())) return; + await podcastsApiService.approveBrief(podcast.id); + onApproved(); + } catch (error) { + toast.error(error instanceof Error ? error.message : "Failed to approve the brief"); + } finally { + setIsSubmitting(false); + } + }; + + return ( +
+
+
+ + +
+
+ + +
+
+ +
+
+ + +
+ {draft.speakers.map((speaker) => ( +
+
+ + updateSpeaker(speaker.slot, { name: e.target.value })} + /> +
+
+ + +
+
+ + +
+ +
+ ))} +
+ +
+
+ + + setDraft((current) => ({ + ...current, + duration: { ...current.duration, min_minutes: Number(e.target.value) || 1 }, + })) + } + /> +
+
+ + + setDraft((current) => ({ + ...current, + duration: { + ...current.duration, + max_minutes: Number(e.target.value) || current.duration.min_minutes, + }, + })) + } + /> +
+
+ +
+ +