From ca9b157676b04b226ba03d79d7c29b2fd4af8847 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Thu, 11 Jun 2026 12:43:07 +0200 Subject: [PATCH] fix(podcasts): keep legacy episodes readable and guard regenerate --- surfsense_backend/app/podcasts/api/schemas.py | 4 +-- surfsense_backend/app/podcasts/service.py | 26 ++++++++++++++----- .../integration/podcasts/test_regeneration.py | 23 +++++++++++++++- .../tests/unit/podcasts/test_api_schemas.py | 22 ++++++++++++++++ 4 files changed, 66 insertions(+), 9 deletions(-) diff --git a/surfsense_backend/app/podcasts/api/schemas.py b/surfsense_backend/app/podcasts/api/schemas.py index d5aba0e14..7f1f8cc7c 100644 --- a/surfsense_backend/app/podcasts/api/schemas.py +++ b/surfsense_backend/app/podcasts/api/schemas.py @@ -13,7 +13,7 @@ from pydantic import BaseModel, ConfigDict, Field from app.podcasts.persistence import Podcast, PodcastStatus from app.podcasts.schemas import PodcastSpec, Transcript -from app.podcasts.service import read_spec, read_transcript +from app.podcasts.service import has_stored_episode, read_spec, read_transcript # Defaults applied when a create request omits brief sizing; the brief gate lets # the user adjust before any cost is incurred. @@ -88,7 +88,7 @@ class PodcastDetail(BaseModel): spec_version=podcast.spec_version, spec=read_spec(podcast), transcript=read_transcript(podcast), - has_audio=bool(podcast.storage_key), + has_audio=has_stored_episode(podcast), duration_seconds=podcast.duration_seconds, error=podcast.error, created_at=podcast.created_at, diff --git a/surfsense_backend/app/podcasts/service.py b/surfsense_backend/app/podcasts/service.py index 8310a7228..1a2f3677b 100644 --- a/surfsense_backend/app/podcasts/service.py +++ b/surfsense_backend/app/podcasts/service.py @@ -12,7 +12,7 @@ from __future__ import annotations from sqlalchemy.ext.asyncio import AsyncSession from app.podcasts.persistence import Podcast, PodcastRepository, PodcastStatus -from app.podcasts.schemas import PodcastSpec, Transcript +from app.podcasts.schemas import PodcastSpec, Transcript, TranscriptTurn _MAX_ERROR_CHARS = 2000 @@ -148,6 +148,10 @@ class PodcastService: raise InvalidTransition( f"nothing to regenerate from {_status(podcast).value}" ) + # Legacy episodes finished before briefs existed; a gate with nothing + # to review would strand them. + if podcast.spec is None: + raise PreconditionFailed("cannot regenerate without a brief") self._transition(podcast, PodcastStatus.AWAITING_BRIEF) await self._session.flush() return podcast @@ -159,7 +163,7 @@ class PodcastService: any point before that commit is a free change of mind. A fresh podcast has no regeneration to revert and is rejected. """ - if not _has_episode(podcast): + if not has_stored_episode(podcast): raise InvalidTransition("no finished episode to fall back to") self._transition(podcast, PodcastStatus.READY) await self._session.flush() @@ -195,7 +199,7 @@ class PodcastService: No user action may destroy playable audio: once an episode exists, backing out goes through revert_regeneration instead. """ - if _has_episode(podcast): + if has_stored_episode(podcast): raise InvalidTransition( "a finished episode exists; revert the regeneration instead" ) @@ -216,7 +220,7 @@ def _status(podcast: Podcast) -> PodcastStatus: return PodcastStatus(podcast.status) -def _has_episode(podcast: Podcast) -> bool: +def has_stored_episode(podcast: Podcast) -> bool: """Whether finished audio is stored (``file_location`` covers legacy rows).""" return bool(podcast.storage_key or podcast.file_location) @@ -228,9 +232,19 @@ def read_spec(podcast: Podcast) -> PodcastSpec | None: def read_transcript(podcast: Podcast) -> Transcript | None: """Deserialize the stored transcript, or ``None`` if not yet drafted.""" - if not podcast.podcast_transcript: + raw = podcast.podcast_transcript + if not raw: return None - return Transcript.model_validate(podcast.podcast_transcript) + # Rows from before the lifecycle rework stored a bare turn list with + # different field names; they must keep reading, not fail validation. + if isinstance(raw, list): + return Transcript( + turns=[ + TranscriptTurn(speaker=turn["speaker_id"], text=turn["dialog"]) + for turn in raw + ] + ) + return Transcript.model_validate(raw) def preferences_from(podcast: Podcast | None) -> tuple[str | None, list[str]]: diff --git a/surfsense_backend/tests/integration/podcasts/test_regeneration.py b/surfsense_backend/tests/integration/podcasts/test_regeneration.py index 8617c3d37..fd31df4ca 100644 --- a/surfsense_backend/tests/integration/podcasts/test_regeneration.py +++ b/surfsense_backend/tests/integration/podcasts/test_regeneration.py @@ -13,7 +13,7 @@ from __future__ import annotations import pytest -from app.podcasts.persistence import PodcastStatus +from app.podcasts.persistence import Podcast, PodcastStatus from app.podcasts.service import PodcastService from .conftest import build_transcript @@ -179,3 +179,24 @@ async def test_revert_when_nothing_was_regenerated_is_rejected( resp = await client.post(f"{BASE}/{podcast.id}/regenerate/revert") assert resp.status_code == 409 + + +async def test_regenerate_without_a_brief_is_rejected( + client, db_session, db_search_space, captured_tasks +): + # Legacy episodes finished before briefs existed; reopening a gate with + # nothing to review would strand them there. + podcast = Podcast( + title="Legacy Episode", + search_space_id=db_search_space.id, + status=PodcastStatus.READY, + spec_version=1, + file_location="/var/old/podcast.mp3", + ) + db_session.add(podcast) + await db_session.flush() + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 422 + assert captured_tasks.draft == [] diff --git a/surfsense_backend/tests/unit/podcasts/test_api_schemas.py b/surfsense_backend/tests/unit/podcasts/test_api_schemas.py index b27d1ead5..41664ac64 100644 --- a/surfsense_backend/tests/unit/podcasts/test_api_schemas.py +++ b/surfsense_backend/tests/unit/podcasts/test_api_schemas.py @@ -53,6 +53,28 @@ def test_an_awaiting_brief_podcast_exposes_the_deserialized_brief(make_spec): assert detail.spec.language == "fr" +def test_a_legacy_episode_still_exposes_its_transcript_and_audio(): + # Pre-rework rows stored [{speaker_id, dialog}] and a local file path; + # they must keep flowing through the new read model, not fail validation. + podcast = _podcast( + status=PodcastStatus.READY, + podcast_transcript=[ + {"speaker_id": 0, "dialog": "Welcome back."}, + {"speaker_id": 1, "dialog": "Glad to be here."}, + ], + file_location="/var/old/podcast.mp3", + ) + + detail = PodcastDetail.of(podcast) + + assert detail.has_audio is True + assert detail.transcript is not None + assert [(turn.speaker, turn.text) for turn in detail.transcript.turns] == [ + (0, "Welcome back."), + (1, "Glad to be here."), + ] + + def test_a_ready_podcast_reports_available_audio(make_spec, make_transcript): podcast = _podcast( status=PodcastStatus.READY,