From 65b6c2d3577e523348674bab21b8daca7609ec3d Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:03 +0200 Subject: [PATCH] feat(podcasts): add persistence model and repository --- .../app/podcasts/persistence/__init__.py | 9 ++ .../podcasts/persistence/enums/__init__.py | 7 ++ .../persistence/enums/podcast_status.py | 37 +++++++++ .../app/podcasts/persistence/models.py | 82 +++++++++++++++++++ .../app/podcasts/persistence/repository.py | 46 +++++++++++ 5 files changed, 181 insertions(+) create mode 100644 surfsense_backend/app/podcasts/persistence/__init__.py create mode 100644 surfsense_backend/app/podcasts/persistence/enums/__init__.py create mode 100644 surfsense_backend/app/podcasts/persistence/enums/podcast_status.py create mode 100644 surfsense_backend/app/podcasts/persistence/models.py create mode 100644 surfsense_backend/app/podcasts/persistence/repository.py diff --git a/surfsense_backend/app/podcasts/persistence/__init__.py b/surfsense_backend/app/podcasts/persistence/__init__.py new file mode 100644 index 000000000..2166d5d9d --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/__init__.py @@ -0,0 +1,9 @@ +"""Models, enums, and data access for the podcasts table.""" + +from __future__ import annotations + +from .enums import PodcastStatus +from .models import Podcast +from .repository import PodcastRepository + +__all__ = ["Podcast", "PodcastRepository", "PodcastStatus"] diff --git a/surfsense_backend/app/podcasts/persistence/enums/__init__.py b/surfsense_backend/app/podcasts/persistence/enums/__init__.py new file mode 100644 index 000000000..f0527fd78 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/enums/__init__.py @@ -0,0 +1,7 @@ +"""Enums for the podcasts table.""" + +from __future__ import annotations + +from .podcast_status import PodcastStatus + +__all__ = ["PodcastStatus"] diff --git a/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py new file mode 100644 index 000000000..403473353 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py @@ -0,0 +1,37 @@ +"""Podcast generation lifecycle. + +The status drives a guarded state machine. A podcast is proposed (``PENDING``), +gets a reviewable brief (``AWAITING_BRIEF``), is drafted into a transcript +(``DRAFTING`` → ``AWAITING_REVIEW``), then rendered to audio (``RENDERING`` → +``READY``). ``FAILED`` and ``CANCELLED`` are terminal. The Python enum is kept +in lockstep with the ``podcast_status`` Postgres type via its paired migration. +""" + +from __future__ import annotations + +from enum import StrEnum + + +class PodcastStatus(StrEnum): + PENDING = "pending" + AWAITING_BRIEF = "awaiting_brief" + DRAFTING = "drafting" + AWAITING_REVIEW = "awaiting_review" + RENDERING = "rendering" + READY = "ready" + FAILED = "failed" + CANCELLED = "cancelled" + + @property + def is_terminal(self) -> bool: + """Whether no further transition is possible from this state.""" + return self in _TERMINAL + + @property + def is_gate(self) -> bool: + """Whether this state waits on user input before proceeding.""" + return self in _GATES + + +_TERMINAL = frozenset({PodcastStatus.READY, PodcastStatus.FAILED, PodcastStatus.CANCELLED}) +_GATES = frozenset({PodcastStatus.AWAITING_BRIEF, PodcastStatus.AWAITING_REVIEW}) diff --git a/surfsense_backend/app/podcasts/persistence/models.py b/surfsense_backend/app/podcasts/persistence/models.py new file mode 100644 index 000000000..6e40a8040 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/models.py @@ -0,0 +1,82 @@ +"""``podcasts`` table: a generated podcast, its brief, transcript, and state.""" + +from __future__ import annotations + +from sqlalchemy import ( + Column, + Enum as SQLAlchemyEnum, + ForeignKey, + Integer, + String, + Text, +) +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import relationship + +from app.db import BaseModel, TimestampMixin + +from .enums import PodcastStatus + + +class Podcast(BaseModel, TimestampMixin): + """A podcast across its whole lifecycle: brief, transcript, audio, status. + + ``spec`` (the reviewable brief) and ``podcast_transcript`` are JSONB so the + flexible Pydantic shapes can evolve without migrations. ``spec_version`` + backs optimistic concurrency on brief edits. Rendered audio lives in the + object store, addressed by ``storage_backend`` + ``storage_key`` rather than + a raw path. + """ + + __tablename__ = "podcasts" + + title = Column(String(500), nullable=False) + + status = Column( + SQLAlchemyEnum( + PodcastStatus, + name="podcast_status", + create_type=False, + values_callable=lambda x: [e.value for e in x], + ), + nullable=False, + default=PodcastStatus.PENDING, + server_default=PodcastStatus.PENDING.value, + index=True, + ) + + # The source material the episode is generated from. Persisted because + # drafting happens after the brief gate, long after creation. + source_content = Column(Text, nullable=True) + + # The reviewable brief (PodcastSpec); null until the brief gate is reached. + spec = Column(JSONB, nullable=True) + # Bumped on every spec edit; guards concurrent edits at the brief gate. + spec_version = Column(Integer, nullable=False, default=1, server_default="1") + + # The drafted dialogue (Transcript); null until drafting completes. + podcast_transcript = Column(JSONB, nullable=True) + + # Where the rendered audio lives in the object store; null until READY. + storage_backend = Column(String(32), nullable=True) + storage_key = Column(Text, nullable=True) + duration_seconds = Column(Integer, nullable=True) + + # Human-readable reason when status is FAILED. + error = Column(Text, nullable=True) + + # Legacy local audio path; retained for back-compat until cutover. + file_location = Column(Text, nullable=True) + + search_space_id = Column( + Integer, ForeignKey("searchspaces.id", ondelete="CASCADE"), nullable=False + ) + search_space = relationship("SearchSpace", back_populates="podcasts") + + thread_id = Column( + Integer, + ForeignKey("new_chat_threads.id", ondelete="SET NULL"), + nullable=True, + index=True, + ) + thread = relationship("NewChatThread") diff --git a/surfsense_backend/app/podcasts/persistence/repository.py b/surfsense_backend/app/podcasts/persistence/repository.py new file mode 100644 index 000000000..04eae9ce1 --- /dev/null +++ b/surfsense_backend/app/podcasts/persistence/repository.py @@ -0,0 +1,46 @@ +"""Data access for the ``podcasts`` table. + +A thin async repository so the service and tasks never write raw queries. It +only loads and persists rows; lifecycle rules and (de)serialization live in the +service. +""" + +from __future__ import annotations + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from .models import Podcast + + +class PodcastRepository: + """Loads and stores :class:`Podcast` rows for one session.""" + + def __init__(self, session: AsyncSession) -> None: + self._session = session + + async def get(self, podcast_id: int) -> Podcast | None: + return await self._session.get(Podcast, podcast_id) + + async def add(self, podcast: Podcast) -> Podcast: + """Persist a new row and assign its primary key.""" + self._session.add(podcast) + await self._session.flush() + return podcast + + async def latest_with_spec(self, search_space_id: int) -> Podcast | None: + """Most recent podcast in the space that has a stored brief. + + Used to seed language/voice defaults for a new podcast from what the + user chose last. + """ + result = await self._session.execute( + select(Podcast) + .where( + Podcast.search_space_id == search_space_id, + Podcast.spec.is_not(None), + ) + .order_by(Podcast.created_at.desc()) + .limit(1) + ) + return result.scalars().first()