diff --git a/surfsense_backend/app/podcasts/api/routes.py b/surfsense_backend/app/podcasts/api/routes.py index 7a998c85e..af378e1c2 100644 --- a/surfsense_backend/app/podcasts/api/routes.py +++ b/surfsense_backend/app/podcasts/api/routes.py @@ -1,7 +1,7 @@ """HTTP surface for the podcast lifecycle. Status is observed by the frontend through Zero, so these routes are about -actions (create, edit the brief, approve/regenerate, cancel) and audio delivery. +actions (create, edit/approve the brief, regenerate, cancel) and audio delivery. Each mutating route performs the guarded transition via the service, commits, then enqueues the matching Celery task; lifecycle errors map to 409/422. """ @@ -11,7 +11,7 @@ from __future__ import annotations import os from pathlib import Path -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Response from fastapi.responses import StreamingResponse from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -33,11 +33,13 @@ from app.podcasts.service import ( SpecConflict, ) from app.podcasts.storage import open_audio_stream, purge_audio -from app.podcasts.tasks import ( - draft_transcript_task, - render_audio_task, +from app.podcasts.tasks import draft_transcript_task +from app.podcasts.tts import get_text_to_speech +from app.podcasts.voices import ( + get_voice_catalog, + provider_from_service, + render_voice_preview, ) -from app.podcasts.voices import get_voice_catalog, provider_from_service from app.users import current_active_user from app.utils.rbac import check_permission @@ -110,6 +112,29 @@ async def list_voices(language: str | None = None): ] +@router.get("/podcasts/voices/{voice_id}/preview") +async def preview_voice( + voice_id: str, + user: User = Depends(current_active_user), +): + """A short audio sample of a voice, so users pick by sound.""" + if not app_config.TTS_SERVICE: + raise HTTPException(status_code=503, detail="No TTS provider configured") + + provider = provider_from_service(app_config.TTS_SERVICE) + try: + voice = get_voice_catalog().get(voice_id) + except KeyError: + raise HTTPException(status_code=404, detail="Unknown voice") from None + if voice.provider is not provider: + raise HTTPException( + status_code=404, detail="Voice not offered by the active TTS provider" + ) + + data, content_type = await render_voice_preview(voice, get_text_to_speech()) + return Response(content=data, media_type=content_type) + + @router.post("/podcasts", response_model=PodcastDetail, status_code=201) async def create_podcast( body: CreatePodcastRequest, @@ -180,21 +205,6 @@ async def approve_brief( return PodcastDetail.of(podcast) -@router.post("/podcasts/{podcast_id}/transcript/approve", response_model=PodcastDetail) -async def approve_transcript( - podcast_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Approve the transcript and start rendering audio.""" - podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) - async with _lifecycle_errors(): - await PodcastService(session).approve(podcast) - await session.commit() - render_audio_task.delay(podcast.id) - return PodcastDetail.of(podcast) - - @router.post( "/podcasts/{podcast_id}/transcript/regenerate", response_model=PodcastDetail ) @@ -203,7 +213,7 @@ async def regenerate_transcript( session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): - """Reject the transcript and draft a fresh one.""" + """Send a finished episode back to drafting for a fresh take.""" podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) async with _lifecycle_errors(): await PodcastService(session).regenerate(podcast) diff --git a/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py index 403473353..347c0422a 100644 --- a/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py +++ b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py @@ -2,9 +2,12 @@ The status drives a guarded state machine. A podcast is proposed (``PENDING``), gets a reviewable brief (``AWAITING_BRIEF``), is drafted into a transcript -(``DRAFTING`` → ``AWAITING_REVIEW``), then rendered to audio (``RENDERING`` → -``READY``). ``FAILED`` and ``CANCELLED`` are terminal. The Python enum is kept -in lockstep with the ``podcast_status`` Postgres type via its paired migration. +(``DRAFTING``), then rendered to audio (``RENDERING`` → ``READY``). ``FAILED`` +and ``CANCELLED`` are terminal; a ``READY`` episode can be sent back to +drafting for regeneration. ``AWAITING_REVIEW`` is retained for legacy rows but +never entered anymore — the brief is the only approval gate. The Python enum is +kept in lockstep with the ``podcast_status`` Postgres type via its paired +migration. """ from __future__ import annotations @@ -33,5 +36,5 @@ class PodcastStatus(StrEnum): return self in _GATES -_TERMINAL = frozenset({PodcastStatus.READY, PodcastStatus.FAILED, PodcastStatus.CANCELLED}) +_TERMINAL = frozenset({PodcastStatus.FAILED, PodcastStatus.CANCELLED}) _GATES = frozenset({PodcastStatus.AWAITING_BRIEF, PodcastStatus.AWAITING_REVIEW}) diff --git a/surfsense_backend/app/podcasts/service.py b/surfsense_backend/app/podcasts/service.py index 6df8c315f..9afb0ab86 100644 --- a/surfsense_backend/app/podcasts/service.py +++ b/surfsense_backend/app/podcasts/service.py @@ -25,20 +25,18 @@ _ALLOWED: dict[PodcastStatus, frozenset[PodcastStatus]] = { {PodcastStatus.DRAFTING, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), PodcastStatus.DRAFTING: frozenset( - {PodcastStatus.AWAITING_REVIEW, PodcastStatus.FAILED, PodcastStatus.CANCELLED} + {PodcastStatus.RENDERING, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), + # Never entered anymore (the transcript gate was dropped); kept with exits + # so legacy rows aren't stranded. PodcastStatus.AWAITING_REVIEW: frozenset( - { - PodcastStatus.RENDERING, # approve - PodcastStatus.DRAFTING, # regenerate - PodcastStatus.FAILED, - PodcastStatus.CANCELLED, - } + {PodcastStatus.DRAFTING, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), PodcastStatus.RENDERING: frozenset( {PodcastStatus.READY, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), - PodcastStatus.READY: frozenset(), + # Not terminal: regeneration is decided by listening to the finished episode. + PodcastStatus.READY: frozenset({PodcastStatus.DRAFTING}), PodcastStatus.FAILED: frozenset(), PodcastStatus.CANCELLED: frozenset(), } @@ -121,22 +119,22 @@ class PodcastService: async def attach_transcript( self, podcast: Podcast, transcript: Transcript ) -> Podcast: - """Record the drafted transcript and open the go/no-go gate.""" - self._transition(podcast, PodcastStatus.AWAITING_REVIEW) + """Record the drafted transcript and move straight to rendering.""" + self._transition(podcast, PodcastStatus.RENDERING) podcast.podcast_transcript = transcript.model_dump(mode="json") await self._session.flush() return podcast - async def approve(self, podcast: Podcast) -> Podcast: - """Accept the transcript and start rendering.""" - if not podcast.podcast_transcript: - raise PreconditionFailed("cannot render without a transcript") - self._transition(podcast, PodcastStatus.RENDERING) - await self._session.flush() - return podcast + # Guards regenerate beyond the transition table: from AWAITING_BRIEF the + # DRAFTING target is also legal, but there it means brief approval. + _REGENERABLE = frozenset({PodcastStatus.READY, PodcastStatus.AWAITING_REVIEW}) async def regenerate(self, podcast: Podcast) -> Podcast: - """Reject the transcript and draft a new one.""" + """Send the episode back to drafting for a fresh transcript and render.""" + if _status(podcast) not in self._REGENERABLE: + raise InvalidTransition( + f"nothing to regenerate from {_status(podcast).value}" + ) self._transition(podcast, PodcastStatus.DRAFTING) await self._session.flush() return podcast diff --git a/surfsense_backend/app/podcasts/storage.py b/surfsense_backend/app/podcasts/storage.py index 30179adae..f02429dff 100644 --- a/surfsense_backend/app/podcasts/storage.py +++ b/surfsense_backend/app/podcasts/storage.py @@ -44,5 +44,10 @@ def open_audio_stream(podcast: Podcast) -> AsyncIterator[bytes]: async def purge_audio(podcast: Podcast) -> None: """Delete a podcast's stored audio if present; a missing object is fine.""" - if podcast.storage_key: - await get_storage_backend().delete(podcast.storage_key) + await purge_audio_object(podcast.storage_key) + + +async def purge_audio_object(key: str | None) -> None: + """Delete a stored audio object by key, e.g. the one a re-render replaced.""" + if key: + await get_storage_backend().delete(key) diff --git a/surfsense_backend/app/podcasts/tasks/draft.py b/surfsense_backend/app/podcasts/tasks/draft.py index 575daf2ba..8779f6ce1 100644 --- a/surfsense_backend/app/podcasts/tasks/draft.py +++ b/surfsense_backend/app/podcasts/tasks/draft.py @@ -1,8 +1,9 @@ -"""Transcript-drafting task: DRAFTING -> AWAITING_REVIEW. +"""Transcript-drafting task: DRAFTING -> RENDERING. The expensive, LLM-heavy step, so it runs under ``billable_call``. The API has already moved the row to DRAFTING and stored the approved brief; this task -drafts the long-form transcript and opens the go/no-go gate. +drafts the long-form transcript and chains straight into the render — the brief +gate is the only approval in the lifecycle. """ from __future__ import annotations @@ -23,6 +24,7 @@ from app.services.billable_calls import ( ) from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task +from .render import render_audio_task from .runtime import billable_session, mark_failed logger = logging.getLogger(__name__) @@ -90,4 +92,8 @@ async def _draft_transcript(podcast_id: int, search_space_id: int) -> dict: await service.attach_transcript(podcast, result["transcript"]) await session.commit() - return {"status": "awaiting_review", "podcast_id": podcast_id} + + # Enqueue only after the transaction is committed, so the render worker can + # never pick up a row whose transcript isn't visible yet. + render_audio_task.delay(podcast_id) + return {"status": "rendering", "podcast_id": podcast_id} diff --git a/surfsense_backend/app/podcasts/tasks/render.py b/surfsense_backend/app/podcasts/tasks/render.py index 04fb9ab9d..dc8c9b7ed 100644 --- a/surfsense_backend/app/podcasts/tasks/render.py +++ b/surfsense_backend/app/podcasts/tasks/render.py @@ -15,7 +15,7 @@ from app.celery_app import celery_app from app.podcasts.persistence import PodcastRepository from app.podcasts.rendering import PodcastRenderer from app.podcasts.service import PodcastService, read_spec, read_transcript -from app.podcasts.storage import store_audio +from app.podcasts.storage import purge_audio_object, store_audio from app.podcasts.tts import get_text_to_speech from app.podcasts.voices import get_voice_catalog from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task @@ -58,6 +58,8 @@ async def _render_audio(podcast_id: int) -> dict: spec=spec, transcript=transcript, workdir=workdir ) + superseded_key = podcast.storage_key + backend_name, key = await store_audio( search_space_id=podcast.search_space_id, podcast_id=podcast_id, @@ -67,4 +69,8 @@ async def _render_audio(podcast_id: int) -> dict: podcast, storage_backend=backend_name, storage_key=key ) await session.commit() - return {"status": "ready", "podcast_id": podcast_id} + + # Purge only after the new audio is committed, so a failed re-render never + # destroys the episode the user can still play. + await purge_audio_object(superseded_key) + return {"status": "ready", "podcast_id": podcast_id} diff --git a/surfsense_backend/app/podcasts/voices/__init__.py b/surfsense_backend/app/podcasts/voices/__init__.py index 99560ab35..ab1f8bbbf 100644 --- a/surfsense_backend/app/podcasts/voices/__init__.py +++ b/surfsense_backend/app/podcasts/voices/__init__.py @@ -7,6 +7,7 @@ configured provider via :func:`provider_from_service`. from __future__ import annotations from .catalog import VoiceCatalog, get_voice_catalog +from .preview import render_voice_preview from .provider import TtsProvider, provider_from_service from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender @@ -18,4 +19,5 @@ __all__ = [ "VoiceGender", "get_voice_catalog", "provider_from_service", + "render_voice_preview", ] diff --git a/surfsense_backend/app/podcasts/voices/preview.py b/surfsense_backend/app/podcasts/voices/preview.py new file mode 100644 index 000000000..cb70a9f0b --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/preview.py @@ -0,0 +1,67 @@ +"""Audible previews so users pick voices by sound, not by name. + +A preview is a short sample sentence synthesised in the voice's own language. +Samples are served through the same content-addressed cache the renderer uses, +so each voice costs at most one synthesis per cache lifetime — repeat listens +while comparing voices are free. +""" + +from __future__ import annotations + +import tempfile +from pathlib import Path + +from app.podcasts.rendering.cache import SegmentCache +from app.podcasts.tts import SynthesisRequest, TextToSpeech + +from .voice import ANY_LANGUAGE, CatalogVoice + +# Previews are user-independent, so one rendered sample serves everyone. +PREVIEW_CACHE_ROOT = Path(tempfile.gettempdir()) / "surfsense_podcasts" / "previews" + +_FALLBACK_LANGUAGE = "en" + +# A voice previews best speaking its own language. +_SAMPLE_TEXTS = { + "en": "Hi there! This is how I sound when narrating your podcast.", + "es": "¡Hola! Así sueno cuando narro tu pódcast.", + "fr": "Bonjour ! Voici ma voix quand je raconte votre podcast.", + "hi": "नमस्ते! आपका पॉडकास्ट सुनाते समय मेरी आवाज़ ऐसी होती है।", + "it": "Ciao! Questa è la mia voce quando racconto il tuo podcast.", + "ja": "こんにちは。ポッドキャストをお届けするときの私の声です。", + "pt": "Olá! É assim que eu soo ao narrar o seu podcast.", + "zh": "你好!这就是我为你播报播客时的声音。", +} + +_CONTENT_TYPES = {"mp3": "audio/mpeg", "wav": "audio/wav"} + + +async def render_voice_preview( + voice: CatalogVoice, tts: TextToSpeech +) -> tuple[bytes, str]: + """Return ``(audio_bytes, content_type)`` for a sample spoken by ``voice``.""" + language = ( + _FALLBACK_LANGUAGE if voice.language == ANY_LANGUAGE else voice.language + ) + request = SynthesisRequest( + text=_sample_text(language), voice=voice.native_ref, language=language + ) + + cache = SegmentCache(PREVIEW_CACHE_ROOT) + key = cache.key(request) + cached = cache.get(key, tts.container) + if cached is not None: + return cached.read_bytes(), _content_type(tts.container) + + audio = await tts.synthesize(request) + cache.put(key, audio.container, audio.data) + return audio.data, _content_type(audio.container) + + +def _sample_text(language: str) -> str: + primary = language.split("-", 1)[0].strip().lower() + return _SAMPLE_TEXTS.get(primary, _SAMPLE_TEXTS[_FALLBACK_LANGUAGE]) + + +def _content_type(container: str) -> str: + return _CONTENT_TYPES.get(container, "application/octet-stream") diff --git a/surfsense_backend/tests/integration/podcasts/conftest.py b/surfsense_backend/tests/integration/podcasts/conftest.py index e2702fdfd..330bcbef0 100644 --- a/surfsense_backend/tests/integration/podcasts/conftest.py +++ b/surfsense_backend/tests/integration/podcasts/conftest.py @@ -166,13 +166,20 @@ def bind_task_session(db_session: AsyncSession, monkeypatch) -> AsyncSession: class FakeTextToSpeech(TextToSpeech): - """In-memory TTS provider: every segment yields fixed bytes (the boundary).""" + """In-memory TTS provider: every segment yields fixed bytes (the boundary). + + Records each request so tests can assert how often synthesis was paid for. + """ + + def __init__(self) -> None: + self.requests: list[SynthesisRequest] = [] @property def container(self) -> str: return "mp3" async def synthesize(self, request: SynthesisRequest) -> SynthesizedAudio: + self.requests.append(request) return SynthesizedAudio(data=b"segment-audio", container="mp3") @@ -233,7 +240,6 @@ def make_podcast(db_session: AsyncSession): _LADDER = [ PodcastStatus.AWAITING_BRIEF, PodcastStatus.DRAFTING, - PodcastStatus.AWAITING_REVIEW, PodcastStatus.RENDERING, PodcastStatus.READY, ] @@ -259,10 +265,8 @@ def make_podcast(db_session: AsyncSession): await service.attach_brief(podcast, build_spec()) elif target is PodcastStatus.DRAFTING: await service.begin_drafting(podcast) - elif target is PodcastStatus.AWAITING_REVIEW: - await service.attach_transcript(podcast, build_transcript()) elif target is PodcastStatus.RENDERING: - await service.approve(podcast) + await service.attach_transcript(podcast, build_transcript()) elif target is PodcastStatus.READY: await service.attach_audio( podcast, diff --git a/surfsense_backend/tests/integration/podcasts/test_draft_task.py b/surfsense_backend/tests/integration/podcasts/test_draft_task.py index a5e0cbe36..7dadfc2f5 100644 --- a/surfsense_backend/tests/integration/podcasts/test_draft_task.py +++ b/surfsense_backend/tests/integration/podcasts/test_draft_task.py @@ -1,11 +1,12 @@ """The transcript-drafting task against a real database. Drafting is the expensive LLM step, so it runs under ``billable_call``. The -behavior that protects users' money: when billing succeeds, a drafted transcript -opens the review gate (DRAFTING -> AWAITING_REVIEW); when billing denies or -settlement fails, the podcast ends FAILED with no transcript left behind. The DB, -service, and transcript persistence run for real; only the true externals are -faked — billing (the metering boundary) and the generation graph (the LLM). +behavior that protects users' money: when billing succeeds, the drafted +transcript is stored and rendering starts immediately (DRAFTING -> RENDERING, +render task enqueued — the brief gate is the only approval); when billing denies +or settlement fails, the podcast ends FAILED with no transcript left behind. The +DB, service, and transcript persistence run for real; only the true externals +are faked — billing (the metering boundary) and the generation graph (the LLM). """ from __future__ import annotations @@ -43,8 +44,8 @@ def _wire_billing(monkeypatch, *, billable_call, transcript=None) -> None: monkeypatch.setattr(draft, "transcript_graph", SimpleNamespace(ainvoke=_ainvoke)) -async def test_successful_billing_opens_review_gate_with_transcript( - monkeypatch, db_search_space, make_podcast, bind_task_session +async def test_successful_draft_stores_transcript_and_starts_rendering( + monkeypatch, db_search_space, make_podcast, bind_task_session, captured_tasks ): podcast = await make_podcast( search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING @@ -58,9 +59,10 @@ async def test_successful_billing_opens_review_gate_with_transcript( result = await draft._draft_transcript(podcast.id, db_search_space.id) - assert result["status"] == "awaiting_review" - assert podcast.status == PodcastStatus.AWAITING_REVIEW + assert result["status"] == "rendering" + assert podcast.status == PodcastStatus.RENDERING assert read_transcript(podcast) is not None + assert captured_tasks.render == [((podcast.id,), {})] async def test_quota_denial_fails_the_podcast_without_a_transcript( diff --git a/surfsense_backend/tests/integration/podcasts/test_regeneration.py b/surfsense_backend/tests/integration/podcasts/test_regeneration.py new file mode 100644 index 000000000..9874fe0f2 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_regeneration.py @@ -0,0 +1,60 @@ +"""Regeneration: the listen-then-redo loop after the brief gate. + +The brief is the only approval; drafting flows straight into rendering. A user +who dislikes the finished audio sends the episode back with regenerate. These +pin the READY -> DRAFTING round trip (with the draft task enqueued) and the 409 +for regenerating from states that have nothing to redo. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import PodcastStatus + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_regenerate_from_ready_returns_to_drafting_and_enqueues_draft( + client, db_search_space, make_podcast, captured_tasks +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 200 + assert resp.json()["status"] == "drafting" + assert captured_tasks.draft == [((podcast.id, db_search_space.id), {})] + assert captured_tasks.render == [] + + +async def test_regenerate_from_brief_gate_is_rejected( + client, db_search_space, make_podcast, captured_tasks +): + # Nothing has been drafted yet, so there is nothing to regenerate. + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_BRIEF + ) + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 409 + assert captured_tasks.draft == [] + + +async def test_regenerate_from_cancelled_is_rejected( + client, db_search_space, make_podcast, captured_tasks +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_BRIEF + ) + await client.post(f"{BASE}/{podcast.id}/cancel") + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 409 + assert captured_tasks.draft == [] diff --git a/surfsense_backend/tests/integration/podcasts/test_render_task.py b/surfsense_backend/tests/integration/podcasts/test_render_task.py index fdb66a522..7fa542ed5 100644 --- a/surfsense_backend/tests/integration/podcasts/test_render_task.py +++ b/surfsense_backend/tests/integration/podcasts/test_render_task.py @@ -11,8 +11,11 @@ from __future__ import annotations import pytest from app.podcasts.persistence import PodcastStatus +from app.podcasts.service import PodcastService from app.podcasts.tasks import render +from .conftest import build_transcript + pytestmark = pytest.mark.integration @@ -30,3 +33,33 @@ async def test_render_marks_ready_and_stores_audio( assert podcast.storage_backend == "memory" assert podcast.storage_key assert fake_storage.objects[podcast.storage_key] == b"merged-audio" + + +async def test_rerender_replaces_audio_and_purges_the_old_object( + db_session, + db_search_space, + make_podcast, + bind_task_session, + fake_tts, + fake_merge, + fake_storage, +): + # A regenerated episode keeps exactly one stored object: the new render + # must not leak the superseded audio in the object store. + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + old_key = podcast.storage_key + fake_storage.objects[old_key] = b"old-audio" + + service = PodcastService(db_session) + await service.regenerate(podcast) + await service.attach_transcript(podcast, build_transcript()) + + result = await render._render_audio(podcast.id) + + assert result["status"] == "ready" + assert podcast.status == PodcastStatus.READY + assert podcast.storage_key != old_key + assert fake_storage.objects[podcast.storage_key] == b"merged-audio" + assert old_key in fake_storage.deleted diff --git a/surfsense_backend/tests/integration/podcasts/test_streaming.py b/surfsense_backend/tests/integration/podcasts/test_streaming.py index 891c53005..82456bac9 100644 --- a/surfsense_backend/tests/integration/podcasts/test_streaming.py +++ b/surfsense_backend/tests/integration/podcasts/test_streaming.py @@ -33,7 +33,7 @@ async def test_stream_serves_stored_audio( async def test_stream_404_when_no_audio(client, db_search_space, make_podcast): podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW + search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING ) resp = await client.get(f"{BASE}/{podcast.id}/stream") diff --git a/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py b/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py deleted file mode 100644 index a8707f8db..000000000 --- a/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py +++ /dev/null @@ -1,81 +0,0 @@ -"""The transcript go/no-go gate: approve to render, or regenerate to redraft. - -From ``awaiting_review`` the user either approves (start rendering) or regenerates -(redraft). These pin the resulting state, the Celery task each enqueues, and the -HTTP codes for acting from the wrong state (409) or without a transcript (422). -""" - -from __future__ import annotations - -import pytest - -from app.podcasts.persistence import Podcast, PodcastStatus - -pytestmark = pytest.mark.integration - -BASE = "/api/v1/podcasts" - - -async def test_approve_transcript_starts_rendering_and_enqueues_render( - client, db_search_space, make_podcast, captured_tasks -): - podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW - ) - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") - - assert resp.status_code == 200 - assert resp.json()["status"] == "rendering" - assert captured_tasks.render == [((podcast.id,), {})] - assert captured_tasks.draft == [] - - -async def test_regenerate_returns_to_drafting_and_enqueues_draft( - client, db_search_space, make_podcast, captured_tasks -): - podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW - ) - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") - - assert resp.status_code == 200 - assert resp.json()["status"] == "drafting" - assert captured_tasks.draft == [((podcast.id, db_search_space.id), {})] - assert captured_tasks.render == [] - - -async def test_approve_transcript_from_terminal_state_is_rejected( - client, db_search_space, make_podcast, captured_tasks -): - # A ready podcast still has its transcript, so the precondition passes and - # the disallowed terminal->rendering transition is what surfaces (409). - podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.READY - ) - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") - - assert resp.status_code == 409 - assert captured_tasks.render == [] - - -async def test_approve_without_transcript_is_unprocessable( - client, db_session, db_search_space, captured_tasks -): - # An anomalous awaiting_review row with no transcript exercises the route's - # precondition->422 mapping (the service refuses to render without one). - podcast = Podcast( - title="No transcript", - search_space_id=db_search_space.id, - status=PodcastStatus.AWAITING_REVIEW, - spec_version=1, - ) - db_session.add(podcast) - await db_session.flush() - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") - - assert resp.status_code == 422 - assert captured_tasks.render == [] diff --git a/surfsense_backend/tests/integration/podcasts/test_voice_preview.py b/surfsense_backend/tests/integration/podcasts/test_voice_preview.py new file mode 100644 index 000000000..729b77be4 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_voice_preview.py @@ -0,0 +1,79 @@ +"""Audible voice previews for the brief gate's voice picker. + +A user choosing voices should hear them, not guess from names. The endpoint +synthesises a short sample for a catalog voice and caches it on disk so each +voice is paid for at most once per process lifetime. Unknown voices and voices +of an inactive provider are 404; no configured TTS is 503. +""" + +from __future__ import annotations + +import pytest + +from app.config import config as app_config + +from .conftest import FakeTextToSpeech + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +@pytest.fixture +def preview_tts(monkeypatch, tmp_path) -> FakeTextToSpeech: + """Route preview synthesis to the fake provider and an isolated cache.""" + provider = FakeTextToSpeech() + monkeypatch.setattr( + "app.podcasts.api.routes.get_text_to_speech", lambda: provider + ) + monkeypatch.setattr( + "app.podcasts.voices.preview.PREVIEW_CACHE_ROOT", tmp_path + ) + return provider + + +async def test_preview_returns_playable_audio_for_a_catalog_voice( + client, preview_tts +): + resp = await client.get(f"{BASE}/voices/openai:alloy/preview") + + assert resp.status_code == 200 + assert resp.headers["content-type"] == "audio/mpeg" + assert resp.content == b"segment-audio" + + +async def test_preview_is_synthesised_once_then_served_from_cache( + client, preview_tts +): + first = await client.get(f"{BASE}/voices/openai:alloy/preview") + second = await client.get(f"{BASE}/voices/openai:alloy/preview") + + assert first.status_code == second.status_code == 200 + assert second.content == first.content + assert len(preview_tts.requests) == 1 + + +async def test_preview_unknown_voice_is_404(client, preview_tts): + resp = await client.get(f"{BASE}/voices/openai:nope/preview") + + assert resp.status_code == 404 + assert preview_tts.requests == [] + + +async def test_preview_voice_of_inactive_provider_is_404(client, preview_tts): + # The active provider is OpenAI (pinned in conftest); a Kokoro voice exists + # in the catalog but cannot be heard through the configured provider. + resp = await client.get(f"{BASE}/voices/kokoro:af_heart/preview") + + assert resp.status_code == 404 + assert preview_tts.requests == [] + + +async def test_preview_without_tts_provider_is_503( + client, preview_tts, monkeypatch +): + monkeypatch.setattr(app_config, "TTS_SERVICE", None) + + resp = await client.get(f"{BASE}/voices/openai:alloy/preview") + + assert resp.status_code == 503 diff --git a/surfsense_web/components/tool-ui/podcast/brief-review.tsx b/surfsense_web/components/tool-ui/podcast/brief-review.tsx index 9679fd8fa..9d244d191 100644 --- a/surfsense_web/components/tool-ui/podcast/brief-review.tsx +++ b/surfsense_web/components/tool-ui/podcast/brief-review.tsx @@ -26,6 +26,7 @@ import { import type { LivePodcast } from "@/hooks/use-podcast-live"; import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; import { AppError } from "@/lib/error"; +import { VoicePreviewButton } from "./voice-preview-button"; // A "*" voice speaks whatever language the text is in (mirrors the backend // catalog's ANY_LANGUAGE sentinel). @@ -274,23 +275,26 @@ export function BriefReview({ podcast, spec, onApproved }: BriefReviewProps) { -
+
- +
+ + +
+ ); + } + + return ( +
+ Replace this episode with a new take? + + +
+ ); +} + /** Status-driven card for an authenticated viewer, fed by Zero push. */ function LivePodcastCard({ podcastId, @@ -102,30 +165,47 @@ function LivePodcastCard({ case "rendering": return ; case "awaiting_brief": - case "awaiting_review": { - const isBriefGate = podcast.status === "awaiting_brief"; return ( <> setReviewOpen(true)} /> ); - } + case "awaiting_review": + // Legacy rows parked at the removed transcript gate; the only way + // forward is a fresh draft. + return ( +
+
+

{title}

+

+ This podcast was drafted before audio rendering became automatic. +

+
+
+
+ +
+
+ ); case "ready": return ( - +
+ +
+ +
+
); case "failed": return ; diff --git a/surfsense_web/components/tool-ui/podcast/review-sheet.tsx b/surfsense_web/components/tool-ui/podcast/review-sheet.tsx index 89e796d92..3253e3156 100644 --- a/surfsense_web/components/tool-ui/podcast/review-sheet.tsx +++ b/surfsense_web/components/tool-ui/podcast/review-sheet.tsx @@ -9,7 +9,6 @@ import { } from "@/components/ui/sheet"; import type { LivePodcast } from "@/hooks/use-podcast-live"; import { BriefReview } from "./brief-review"; -import { TranscriptReview } from "./transcript-review"; interface PodcastReviewSheetProps { podcast: LivePodcast; @@ -18,49 +17,34 @@ interface PodcastReviewSheetProps { } /** - * The podcast panel: hosts whichever gate the lifecycle is waiting on. The - * pushed status decides the content, so the same sheet serves both gates and - * simply closes once the podcast moves on. + * The podcast panel: hosts the brief gate, the only approval in the lifecycle + * — after it the episode generates unattended. */ export function PodcastReviewSheet({ podcast, open, onOpenChange }: PodcastReviewSheetProps) { const close = () => onOpenChange(false); - const gate = - podcast.status === "awaiting_brief" && podcast.spec ? ( - <> - - Review podcast brief - - Confirm the language, voices, and length before the transcript is drafted. - - -
- -
- - ) : podcast.status === "awaiting_review" ? ( - <> - - Review transcript - - Approve the script to render the audio, or regenerate a fresh draft. - - -
- -
- - ) : ( - - {podcast.title} - Nothing is awaiting review right now. - - ); - return ( - {gate} + {podcast.status === "awaiting_brief" && podcast.spec ? ( + <> + + Review podcast brief + + Confirm the language, voices, and length — the episode generates unattended after + this. + + +
+ +
+ + ) : ( + + {podcast.title} + Nothing is awaiting review right now. + + )}
); diff --git a/surfsense_web/components/tool-ui/podcast/transcript-review.tsx b/surfsense_web/components/tool-ui/podcast/transcript-review.tsx deleted file mode 100644 index 2391c4c74..000000000 --- a/surfsense_web/components/tool-ui/podcast/transcript-review.tsx +++ /dev/null @@ -1,118 +0,0 @@ -"use client"; - -import { Loader2 } from "lucide-react"; -import { useEffect, useState } from "react"; -import { toast } from "sonner"; -import { TextShimmerLoader } from "@/components/prompt-kit/loader"; -import { Button } from "@/components/ui/button"; -import type { PodcastDetail } from "@/contracts/types/podcast.types"; -import type { LivePodcast } from "@/hooks/use-podcast-live"; -import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; -import { speakerLabel } from "./schema"; - -interface TranscriptReviewProps { - podcast: LivePodcast; - onDecided: () => void; -} - -/** - * Gate 2: a go/no-go on the drafted script before the expensive render. - * Read-only by design — approve it, regenerate a fresh draft, or cancel. - */ -export function TranscriptReview({ podcast, onDecided }: TranscriptReviewProps) { - const [detail, setDetail] = useState(null); - const [loadError, setLoadError] = useState(null); - const [pendingAction, setPendingAction] = useState<"approve" | "regenerate" | "cancel" | null>( - null - ); - - useEffect(() => { - let cancelled = false; - setDetail(null); - setLoadError(null); - podcastsApiService - .getDetail(podcast.id) - .then((data) => { - if (!cancelled) setDetail(data); - }) - .catch((error) => { - if (!cancelled) { - setLoadError(error instanceof Error ? error.message : "Failed to load the transcript"); - } - }); - return () => { - cancelled = true; - }; - }, [podcast.id]); - - const act = async (action: "approve" | "regenerate" | "cancel", run: () => Promise) => { - setPendingAction(action); - try { - await run(); - onDecided(); - } catch (error) { - toast.error(error instanceof Error ? error.message : "Action failed"); - } finally { - setPendingAction(null); - } - }; - - if (loadError) { - return

{loadError}

; - } - - if (!detail) { - return ; - } - - const turns = detail.transcript?.turns ?? []; - - return ( -
-
- {turns.map((turn, idx) => ( -
- - {speakerLabel(detail.spec, turn.speaker)}: - {" "} - {turn.text} -
- ))} - {turns.length === 0 ? ( -

No transcript available.

- ) : null} -
- -
- - - -
-
- ); -} diff --git a/surfsense_web/components/tool-ui/podcast/voice-preview-button.tsx b/surfsense_web/components/tool-ui/podcast/voice-preview-button.tsx new file mode 100644 index 000000000..989b15e0f --- /dev/null +++ b/surfsense_web/components/tool-ui/podcast/voice-preview-button.tsx @@ -0,0 +1,98 @@ +"use client"; + +import { Loader2, Play, Square } from "lucide-react"; +import { useEffect, useRef, useState } from "react"; +import { toast } from "sonner"; +import { Button } from "@/components/ui/button"; +import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; + +// Comparing voices means replaying the same samples, so each voice is fetched +// at most once per page lifetime. +const sampleUrls = new Map>(); + +// Overlapping samples are useless for comparison, so only one plays at a time. +let activeAudio: HTMLAudioElement | null = null; +let stopActive: (() => void) | null = null; + +function getSampleUrl(voiceId: string): Promise { + let url = sampleUrls.get(voiceId); + if (!url) { + url = podcastsApiService.previewVoice(voiceId).then((blob) => URL.createObjectURL(blob)); + // A failed fetch must not poison the cache for retries. + url.catch(() => sampleUrls.delete(voiceId)); + sampleUrls.set(voiceId, url); + } + return url; +} + +/** Plays a short sample of `voiceId` so users pick voices by sound. */ +export function VoicePreviewButton({ voiceId }: { voiceId: string }) { + const [state, setState] = useState<"idle" | "loading" | "playing">("idle"); + const mountedRef = useRef(true); + + useEffect(() => { + mountedRef.current = true; + return () => { + mountedRef.current = false; + if (stopActive && activeAudio?.dataset.voiceId === voiceId) { + stopActive(); + } + }; + }, [voiceId]); + + const stop = () => { + if (stopActive) stopActive(); + }; + + const play = async () => { + stop(); + setState("loading"); + try { + const url = await getSampleUrl(voiceId); + if (!mountedRef.current) return; + + const audio = new Audio(url); + audio.dataset.voiceId = voiceId; + activeAudio = audio; + stopActive = () => { + audio.pause(); + activeAudio = null; + stopActive = null; + if (mountedRef.current) setState("idle"); + }; + audio.onended = () => { + if (activeAudio === audio) { + activeAudio = null; + stopActive = null; + } + if (mountedRef.current) setState("idle"); + }; + await audio.play(); + if (mountedRef.current) setState("playing"); + } catch (error) { + if (mountedRef.current) setState("idle"); + toast.error(error instanceof Error ? error.message : "Couldn't play the voice sample"); + } + }; + + const isPlaying = state === "playing"; + + return ( + + ); +} diff --git a/surfsense_web/contracts/types/podcast.types.ts b/surfsense_web/contracts/types/podcast.types.ts index 13bf530e7..7cf12a6dd 100644 --- a/surfsense_web/contracts/types/podcast.types.ts +++ b/surfsense_web/contracts/types/podcast.types.ts @@ -16,18 +16,18 @@ export const podcastStatus = z.enum([ ]); export type PodcastStatus = z.infer; -/** States waiting on user input before the lifecycle can proceed. */ -export const GATE_STATUSES: ReadonlySet = new Set([ - "awaiting_brief", - "awaiting_review", -]); +/** + * States waiting on user input before the lifecycle can proceed. The brief is + * the only approval gate; `awaiting_review` survives in the enum for legacy + * rows but is never entered anymore. + */ +export const GATE_STATUSES: ReadonlySet = new Set(["awaiting_brief"]); -/** States from which no further transition is possible. */ -export const TERMINAL_STATUSES: ReadonlySet = new Set([ - "ready", - "failed", - "cancelled", -]); +/** + * States from which no further transition is possible. A `ready` episode is + * not terminal: it can be sent back to drafting for regeneration. + */ +export const TERMINAL_STATUSES: ReadonlySet = new Set(["failed", "cancelled"]); // ============================================================================= // Brief (spec) — mirror app/podcasts/schemas/spec.py diff --git a/surfsense_web/lib/apis/podcasts-api.service.ts b/surfsense_web/lib/apis/podcasts-api.service.ts index f47269654..df9fb0260 100644 --- a/surfsense_web/lib/apis/podcasts-api.service.ts +++ b/surfsense_web/lib/apis/podcasts-api.service.ts @@ -37,11 +37,8 @@ class PodcastsApiService { return baseApiService.post(`${BASE}/${podcastId}/brief/approve`, podcastDetail); }; - approveTranscript = async (podcastId: number) => { - return baseApiService.post(`${BASE}/${podcastId}/transcript/approve`, podcastDetail); - }; - - regenerateTranscript = async (podcastId: number) => { + // Destructive: the current transcript and audio are replaced by a fresh take. + regenerate = async (podcastId: number) => { return baseApiService.post(`${BASE}/${podcastId}/transcript/regenerate`, podcastDetail); }; @@ -53,6 +50,11 @@ class PodcastsApiService { const qs = language ? `?${new URLSearchParams({ language })}` : ""; return baseApiService.get(`${BASE}/voices${qs}`, voiceOptionList); }; + + // A short audio sample of a voice, cached server-side per voice. + previewVoice = async (voiceId: string) => { + return baseApiService.getBlob(`${BASE}/voices/${encodeURIComponent(voiceId)}/preview`); + }; } export const podcastsApiService = new PodcastsApiService();