diff --git a/surfsense_backend/app/podcasts/api/routes.py b/surfsense_backend/app/podcasts/api/routes.py index 7a998c85e..af378e1c2 100644 --- a/surfsense_backend/app/podcasts/api/routes.py +++ b/surfsense_backend/app/podcasts/api/routes.py @@ -1,7 +1,7 @@ """HTTP surface for the podcast lifecycle. Status is observed by the frontend through Zero, so these routes are about -actions (create, edit the brief, approve/regenerate, cancel) and audio delivery. +actions (create, edit/approve the brief, regenerate, cancel) and audio delivery. Each mutating route performs the guarded transition via the service, commits, then enqueues the matching Celery task; lifecycle errors map to 409/422. """ @@ -11,7 +11,7 @@ from __future__ import annotations import os from pathlib import Path -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Response from fastapi.responses import StreamingResponse from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession @@ -33,11 +33,13 @@ from app.podcasts.service import ( SpecConflict, ) from app.podcasts.storage import open_audio_stream, purge_audio -from app.podcasts.tasks import ( - draft_transcript_task, - render_audio_task, +from app.podcasts.tasks import draft_transcript_task +from app.podcasts.tts import get_text_to_speech +from app.podcasts.voices import ( + get_voice_catalog, + provider_from_service, + render_voice_preview, ) -from app.podcasts.voices import get_voice_catalog, provider_from_service from app.users import current_active_user from app.utils.rbac import check_permission @@ -110,6 +112,29 @@ async def list_voices(language: str | None = None): ] +@router.get("/podcasts/voices/{voice_id}/preview") +async def preview_voice( + voice_id: str, + user: User = Depends(current_active_user), +): + """A short audio sample of a voice, so users pick by sound.""" + if not app_config.TTS_SERVICE: + raise HTTPException(status_code=503, detail="No TTS provider configured") + + provider = provider_from_service(app_config.TTS_SERVICE) + try: + voice = get_voice_catalog().get(voice_id) + except KeyError: + raise HTTPException(status_code=404, detail="Unknown voice") from None + if voice.provider is not provider: + raise HTTPException( + status_code=404, detail="Voice not offered by the active TTS provider" + ) + + data, content_type = await render_voice_preview(voice, get_text_to_speech()) + return Response(content=data, media_type=content_type) + + @router.post("/podcasts", response_model=PodcastDetail, status_code=201) async def create_podcast( body: CreatePodcastRequest, @@ -180,21 +205,6 @@ async def approve_brief( return PodcastDetail.of(podcast) -@router.post("/podcasts/{podcast_id}/transcript/approve", response_model=PodcastDetail) -async def approve_transcript( - podcast_id: int, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Approve the transcript and start rendering audio.""" - podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) - async with _lifecycle_errors(): - await PodcastService(session).approve(podcast) - await session.commit() - render_audio_task.delay(podcast.id) - return PodcastDetail.of(podcast) - - @router.post( "/podcasts/{podcast_id}/transcript/regenerate", response_model=PodcastDetail ) @@ -203,7 +213,7 @@ async def regenerate_transcript( session: AsyncSession = Depends(get_async_session), user: User = Depends(current_active_user), ): - """Reject the transcript and draft a fresh one.""" + """Send a finished episode back to drafting for a fresh take.""" podcast = await _load(session, user, podcast_id, Permission.PODCASTS_UPDATE) async with _lifecycle_errors(): await PodcastService(session).regenerate(podcast) diff --git a/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py index 403473353..347c0422a 100644 --- a/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py +++ b/surfsense_backend/app/podcasts/persistence/enums/podcast_status.py @@ -2,9 +2,12 @@ The status drives a guarded state machine. A podcast is proposed (``PENDING``), gets a reviewable brief (``AWAITING_BRIEF``), is drafted into a transcript -(``DRAFTING`` → ``AWAITING_REVIEW``), then rendered to audio (``RENDERING`` → -``READY``). ``FAILED`` and ``CANCELLED`` are terminal. The Python enum is kept -in lockstep with the ``podcast_status`` Postgres type via its paired migration. +(``DRAFTING``), then rendered to audio (``RENDERING`` → ``READY``). ``FAILED`` +and ``CANCELLED`` are terminal; a ``READY`` episode can be sent back to +drafting for regeneration. ``AWAITING_REVIEW`` is retained for legacy rows but +never entered anymore — the brief is the only approval gate. The Python enum is +kept in lockstep with the ``podcast_status`` Postgres type via its paired +migration. """ from __future__ import annotations @@ -33,5 +36,5 @@ class PodcastStatus(StrEnum): return self in _GATES -_TERMINAL = frozenset({PodcastStatus.READY, PodcastStatus.FAILED, PodcastStatus.CANCELLED}) +_TERMINAL = frozenset({PodcastStatus.FAILED, PodcastStatus.CANCELLED}) _GATES = frozenset({PodcastStatus.AWAITING_BRIEF, PodcastStatus.AWAITING_REVIEW}) diff --git a/surfsense_backend/app/podcasts/service.py b/surfsense_backend/app/podcasts/service.py index 6df8c315f..9afb0ab86 100644 --- a/surfsense_backend/app/podcasts/service.py +++ b/surfsense_backend/app/podcasts/service.py @@ -25,20 +25,18 @@ _ALLOWED: dict[PodcastStatus, frozenset[PodcastStatus]] = { {PodcastStatus.DRAFTING, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), PodcastStatus.DRAFTING: frozenset( - {PodcastStatus.AWAITING_REVIEW, PodcastStatus.FAILED, PodcastStatus.CANCELLED} + {PodcastStatus.RENDERING, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), + # Never entered anymore (the transcript gate was dropped); kept with exits + # so legacy rows aren't stranded. PodcastStatus.AWAITING_REVIEW: frozenset( - { - PodcastStatus.RENDERING, # approve - PodcastStatus.DRAFTING, # regenerate - PodcastStatus.FAILED, - PodcastStatus.CANCELLED, - } + {PodcastStatus.DRAFTING, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), PodcastStatus.RENDERING: frozenset( {PodcastStatus.READY, PodcastStatus.FAILED, PodcastStatus.CANCELLED} ), - PodcastStatus.READY: frozenset(), + # Not terminal: regeneration is decided by listening to the finished episode. + PodcastStatus.READY: frozenset({PodcastStatus.DRAFTING}), PodcastStatus.FAILED: frozenset(), PodcastStatus.CANCELLED: frozenset(), } @@ -121,22 +119,22 @@ class PodcastService: async def attach_transcript( self, podcast: Podcast, transcript: Transcript ) -> Podcast: - """Record the drafted transcript and open the go/no-go gate.""" - self._transition(podcast, PodcastStatus.AWAITING_REVIEW) + """Record the drafted transcript and move straight to rendering.""" + self._transition(podcast, PodcastStatus.RENDERING) podcast.podcast_transcript = transcript.model_dump(mode="json") await self._session.flush() return podcast - async def approve(self, podcast: Podcast) -> Podcast: - """Accept the transcript and start rendering.""" - if not podcast.podcast_transcript: - raise PreconditionFailed("cannot render without a transcript") - self._transition(podcast, PodcastStatus.RENDERING) - await self._session.flush() - return podcast + # Guards regenerate beyond the transition table: from AWAITING_BRIEF the + # DRAFTING target is also legal, but there it means brief approval. + _REGENERABLE = frozenset({PodcastStatus.READY, PodcastStatus.AWAITING_REVIEW}) async def regenerate(self, podcast: Podcast) -> Podcast: - """Reject the transcript and draft a new one.""" + """Send the episode back to drafting for a fresh transcript and render.""" + if _status(podcast) not in self._REGENERABLE: + raise InvalidTransition( + f"nothing to regenerate from {_status(podcast).value}" + ) self._transition(podcast, PodcastStatus.DRAFTING) await self._session.flush() return podcast diff --git a/surfsense_backend/app/podcasts/storage.py b/surfsense_backend/app/podcasts/storage.py index 30179adae..f02429dff 100644 --- a/surfsense_backend/app/podcasts/storage.py +++ b/surfsense_backend/app/podcasts/storage.py @@ -44,5 +44,10 @@ def open_audio_stream(podcast: Podcast) -> AsyncIterator[bytes]: async def purge_audio(podcast: Podcast) -> None: """Delete a podcast's stored audio if present; a missing object is fine.""" - if podcast.storage_key: - await get_storage_backend().delete(podcast.storage_key) + await purge_audio_object(podcast.storage_key) + + +async def purge_audio_object(key: str | None) -> None: + """Delete a stored audio object by key, e.g. the one a re-render replaced.""" + if key: + await get_storage_backend().delete(key) diff --git a/surfsense_backend/app/podcasts/tasks/draft.py b/surfsense_backend/app/podcasts/tasks/draft.py index 575daf2ba..8779f6ce1 100644 --- a/surfsense_backend/app/podcasts/tasks/draft.py +++ b/surfsense_backend/app/podcasts/tasks/draft.py @@ -1,8 +1,9 @@ -"""Transcript-drafting task: DRAFTING -> AWAITING_REVIEW. +"""Transcript-drafting task: DRAFTING -> RENDERING. The expensive, LLM-heavy step, so it runs under ``billable_call``. The API has already moved the row to DRAFTING and stored the approved brief; this task -drafts the long-form transcript and opens the go/no-go gate. +drafts the long-form transcript and chains straight into the render — the brief +gate is the only approval in the lifecycle. """ from __future__ import annotations @@ -23,6 +24,7 @@ from app.services.billable_calls import ( ) from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task +from .render import render_audio_task from .runtime import billable_session, mark_failed logger = logging.getLogger(__name__) @@ -90,4 +92,8 @@ async def _draft_transcript(podcast_id: int, search_space_id: int) -> dict: await service.attach_transcript(podcast, result["transcript"]) await session.commit() - return {"status": "awaiting_review", "podcast_id": podcast_id} + + # Enqueue only after the transaction is committed, so the render worker can + # never pick up a row whose transcript isn't visible yet. + render_audio_task.delay(podcast_id) + return {"status": "rendering", "podcast_id": podcast_id} diff --git a/surfsense_backend/app/podcasts/tasks/render.py b/surfsense_backend/app/podcasts/tasks/render.py index 04fb9ab9d..dc8c9b7ed 100644 --- a/surfsense_backend/app/podcasts/tasks/render.py +++ b/surfsense_backend/app/podcasts/tasks/render.py @@ -15,7 +15,7 @@ from app.celery_app import celery_app from app.podcasts.persistence import PodcastRepository from app.podcasts.rendering import PodcastRenderer from app.podcasts.service import PodcastService, read_spec, read_transcript -from app.podcasts.storage import store_audio +from app.podcasts.storage import purge_audio_object, store_audio from app.podcasts.tts import get_text_to_speech from app.podcasts.voices import get_voice_catalog from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task @@ -58,6 +58,8 @@ async def _render_audio(podcast_id: int) -> dict: spec=spec, transcript=transcript, workdir=workdir ) + superseded_key = podcast.storage_key + backend_name, key = await store_audio( search_space_id=podcast.search_space_id, podcast_id=podcast_id, @@ -67,4 +69,8 @@ async def _render_audio(podcast_id: int) -> dict: podcast, storage_backend=backend_name, storage_key=key ) await session.commit() - return {"status": "ready", "podcast_id": podcast_id} + + # Purge only after the new audio is committed, so a failed re-render never + # destroys the episode the user can still play. + await purge_audio_object(superseded_key) + return {"status": "ready", "podcast_id": podcast_id} diff --git a/surfsense_backend/app/podcasts/voices/__init__.py b/surfsense_backend/app/podcasts/voices/__init__.py index 99560ab35..ab1f8bbbf 100644 --- a/surfsense_backend/app/podcasts/voices/__init__.py +++ b/surfsense_backend/app/podcasts/voices/__init__.py @@ -7,6 +7,7 @@ configured provider via :func:`provider_from_service`. from __future__ import annotations from .catalog import VoiceCatalog, get_voice_catalog +from .preview import render_voice_preview from .provider import TtsProvider, provider_from_service from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender @@ -18,4 +19,5 @@ __all__ = [ "VoiceGender", "get_voice_catalog", "provider_from_service", + "render_voice_preview", ] diff --git a/surfsense_backend/app/podcasts/voices/preview.py b/surfsense_backend/app/podcasts/voices/preview.py new file mode 100644 index 000000000..cb70a9f0b --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/preview.py @@ -0,0 +1,67 @@ +"""Audible previews so users pick voices by sound, not by name. + +A preview is a short sample sentence synthesised in the voice's own language. +Samples are served through the same content-addressed cache the renderer uses, +so each voice costs at most one synthesis per cache lifetime — repeat listens +while comparing voices are free. +""" + +from __future__ import annotations + +import tempfile +from pathlib import Path + +from app.podcasts.rendering.cache import SegmentCache +from app.podcasts.tts import SynthesisRequest, TextToSpeech + +from .voice import ANY_LANGUAGE, CatalogVoice + +# Previews are user-independent, so one rendered sample serves everyone. +PREVIEW_CACHE_ROOT = Path(tempfile.gettempdir()) / "surfsense_podcasts" / "previews" + +_FALLBACK_LANGUAGE = "en" + +# A voice previews best speaking its own language. +_SAMPLE_TEXTS = { + "en": "Hi there! This is how I sound when narrating your podcast.", + "es": "¡Hola! Así sueno cuando narro tu pódcast.", + "fr": "Bonjour ! Voici ma voix quand je raconte votre podcast.", + "hi": "नमस्ते! आपका पॉडकास्ट सुनाते समय मेरी आवाज़ ऐसी होती है।", + "it": "Ciao! Questa è la mia voce quando racconto il tuo podcast.", + "ja": "こんにちは。ポッドキャストをお届けするときの私の声です。", + "pt": "Olá! É assim que eu soo ao narrar o seu podcast.", + "zh": "你好!这就是我为你播报播客时的声音。", +} + +_CONTENT_TYPES = {"mp3": "audio/mpeg", "wav": "audio/wav"} + + +async def render_voice_preview( + voice: CatalogVoice, tts: TextToSpeech +) -> tuple[bytes, str]: + """Return ``(audio_bytes, content_type)`` for a sample spoken by ``voice``.""" + language = ( + _FALLBACK_LANGUAGE if voice.language == ANY_LANGUAGE else voice.language + ) + request = SynthesisRequest( + text=_sample_text(language), voice=voice.native_ref, language=language + ) + + cache = SegmentCache(PREVIEW_CACHE_ROOT) + key = cache.key(request) + cached = cache.get(key, tts.container) + if cached is not None: + return cached.read_bytes(), _content_type(tts.container) + + audio = await tts.synthesize(request) + cache.put(key, audio.container, audio.data) + return audio.data, _content_type(audio.container) + + +def _sample_text(language: str) -> str: + primary = language.split("-", 1)[0].strip().lower() + return _SAMPLE_TEXTS.get(primary, _SAMPLE_TEXTS[_FALLBACK_LANGUAGE]) + + +def _content_type(container: str) -> str: + return _CONTENT_TYPES.get(container, "application/octet-stream") diff --git a/surfsense_backend/tests/integration/podcasts/conftest.py b/surfsense_backend/tests/integration/podcasts/conftest.py index e2702fdfd..330bcbef0 100644 --- a/surfsense_backend/tests/integration/podcasts/conftest.py +++ b/surfsense_backend/tests/integration/podcasts/conftest.py @@ -166,13 +166,20 @@ def bind_task_session(db_session: AsyncSession, monkeypatch) -> AsyncSession: class FakeTextToSpeech(TextToSpeech): - """In-memory TTS provider: every segment yields fixed bytes (the boundary).""" + """In-memory TTS provider: every segment yields fixed bytes (the boundary). + + Records each request so tests can assert how often synthesis was paid for. + """ + + def __init__(self) -> None: + self.requests: list[SynthesisRequest] = [] @property def container(self) -> str: return "mp3" async def synthesize(self, request: SynthesisRequest) -> SynthesizedAudio: + self.requests.append(request) return SynthesizedAudio(data=b"segment-audio", container="mp3") @@ -233,7 +240,6 @@ def make_podcast(db_session: AsyncSession): _LADDER = [ PodcastStatus.AWAITING_BRIEF, PodcastStatus.DRAFTING, - PodcastStatus.AWAITING_REVIEW, PodcastStatus.RENDERING, PodcastStatus.READY, ] @@ -259,10 +265,8 @@ def make_podcast(db_session: AsyncSession): await service.attach_brief(podcast, build_spec()) elif target is PodcastStatus.DRAFTING: await service.begin_drafting(podcast) - elif target is PodcastStatus.AWAITING_REVIEW: - await service.attach_transcript(podcast, build_transcript()) elif target is PodcastStatus.RENDERING: - await service.approve(podcast) + await service.attach_transcript(podcast, build_transcript()) elif target is PodcastStatus.READY: await service.attach_audio( podcast, diff --git a/surfsense_backend/tests/integration/podcasts/test_draft_task.py b/surfsense_backend/tests/integration/podcasts/test_draft_task.py index a5e0cbe36..7dadfc2f5 100644 --- a/surfsense_backend/tests/integration/podcasts/test_draft_task.py +++ b/surfsense_backend/tests/integration/podcasts/test_draft_task.py @@ -1,11 +1,12 @@ """The transcript-drafting task against a real database. Drafting is the expensive LLM step, so it runs under ``billable_call``. The -behavior that protects users' money: when billing succeeds, a drafted transcript -opens the review gate (DRAFTING -> AWAITING_REVIEW); when billing denies or -settlement fails, the podcast ends FAILED with no transcript left behind. The DB, -service, and transcript persistence run for real; only the true externals are -faked — billing (the metering boundary) and the generation graph (the LLM). +behavior that protects users' money: when billing succeeds, the drafted +transcript is stored and rendering starts immediately (DRAFTING -> RENDERING, +render task enqueued — the brief gate is the only approval); when billing denies +or settlement fails, the podcast ends FAILED with no transcript left behind. The +DB, service, and transcript persistence run for real; only the true externals +are faked — billing (the metering boundary) and the generation graph (the LLM). """ from __future__ import annotations @@ -43,8 +44,8 @@ def _wire_billing(monkeypatch, *, billable_call, transcript=None) -> None: monkeypatch.setattr(draft, "transcript_graph", SimpleNamespace(ainvoke=_ainvoke)) -async def test_successful_billing_opens_review_gate_with_transcript( - monkeypatch, db_search_space, make_podcast, bind_task_session +async def test_successful_draft_stores_transcript_and_starts_rendering( + monkeypatch, db_search_space, make_podcast, bind_task_session, captured_tasks ): podcast = await make_podcast( search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING @@ -58,9 +59,10 @@ async def test_successful_billing_opens_review_gate_with_transcript( result = await draft._draft_transcript(podcast.id, db_search_space.id) - assert result["status"] == "awaiting_review" - assert podcast.status == PodcastStatus.AWAITING_REVIEW + assert result["status"] == "rendering" + assert podcast.status == PodcastStatus.RENDERING assert read_transcript(podcast) is not None + assert captured_tasks.render == [((podcast.id,), {})] async def test_quota_denial_fails_the_podcast_without_a_transcript( diff --git a/surfsense_backend/tests/integration/podcasts/test_regeneration.py b/surfsense_backend/tests/integration/podcasts/test_regeneration.py new file mode 100644 index 000000000..9874fe0f2 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_regeneration.py @@ -0,0 +1,60 @@ +"""Regeneration: the listen-then-redo loop after the brief gate. + +The brief is the only approval; drafting flows straight into rendering. A user +who dislikes the finished audio sends the episode back with regenerate. These +pin the READY -> DRAFTING round trip (with the draft task enqueued) and the 409 +for regenerating from states that have nothing to redo. +""" + +from __future__ import annotations + +import pytest + +from app.podcasts.persistence import PodcastStatus + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +async def test_regenerate_from_ready_returns_to_drafting_and_enqueues_draft( + client, db_search_space, make_podcast, captured_tasks +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 200 + assert resp.json()["status"] == "drafting" + assert captured_tasks.draft == [((podcast.id, db_search_space.id), {})] + assert captured_tasks.render == [] + + +async def test_regenerate_from_brief_gate_is_rejected( + client, db_search_space, make_podcast, captured_tasks +): + # Nothing has been drafted yet, so there is nothing to regenerate. + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_BRIEF + ) + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 409 + assert captured_tasks.draft == [] + + +async def test_regenerate_from_cancelled_is_rejected( + client, db_search_space, make_podcast, captured_tasks +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_BRIEF + ) + await client.post(f"{BASE}/{podcast.id}/cancel") + + resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") + + assert resp.status_code == 409 + assert captured_tasks.draft == [] diff --git a/surfsense_backend/tests/integration/podcasts/test_render_task.py b/surfsense_backend/tests/integration/podcasts/test_render_task.py index fdb66a522..7fa542ed5 100644 --- a/surfsense_backend/tests/integration/podcasts/test_render_task.py +++ b/surfsense_backend/tests/integration/podcasts/test_render_task.py @@ -11,8 +11,11 @@ from __future__ import annotations import pytest from app.podcasts.persistence import PodcastStatus +from app.podcasts.service import PodcastService from app.podcasts.tasks import render +from .conftest import build_transcript + pytestmark = pytest.mark.integration @@ -30,3 +33,33 @@ async def test_render_marks_ready_and_stores_audio( assert podcast.storage_backend == "memory" assert podcast.storage_key assert fake_storage.objects[podcast.storage_key] == b"merged-audio" + + +async def test_rerender_replaces_audio_and_purges_the_old_object( + db_session, + db_search_space, + make_podcast, + bind_task_session, + fake_tts, + fake_merge, + fake_storage, +): + # A regenerated episode keeps exactly one stored object: the new render + # must not leak the superseded audio in the object store. + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + old_key = podcast.storage_key + fake_storage.objects[old_key] = b"old-audio" + + service = PodcastService(db_session) + await service.regenerate(podcast) + await service.attach_transcript(podcast, build_transcript()) + + result = await render._render_audio(podcast.id) + + assert result["status"] == "ready" + assert podcast.status == PodcastStatus.READY + assert podcast.storage_key != old_key + assert fake_storage.objects[podcast.storage_key] == b"merged-audio" + assert old_key in fake_storage.deleted diff --git a/surfsense_backend/tests/integration/podcasts/test_streaming.py b/surfsense_backend/tests/integration/podcasts/test_streaming.py index 891c53005..82456bac9 100644 --- a/surfsense_backend/tests/integration/podcasts/test_streaming.py +++ b/surfsense_backend/tests/integration/podcasts/test_streaming.py @@ -33,7 +33,7 @@ async def test_stream_serves_stored_audio( async def test_stream_404_when_no_audio(client, db_search_space, make_podcast): podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW + search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING ) resp = await client.get(f"{BASE}/{podcast.id}/stream") diff --git a/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py b/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py deleted file mode 100644 index a8707f8db..000000000 --- a/surfsense_backend/tests/integration/podcasts/test_transcript_gate.py +++ /dev/null @@ -1,81 +0,0 @@ -"""The transcript go/no-go gate: approve to render, or regenerate to redraft. - -From ``awaiting_review`` the user either approves (start rendering) or regenerates -(redraft). These pin the resulting state, the Celery task each enqueues, and the -HTTP codes for acting from the wrong state (409) or without a transcript (422). -""" - -from __future__ import annotations - -import pytest - -from app.podcasts.persistence import Podcast, PodcastStatus - -pytestmark = pytest.mark.integration - -BASE = "/api/v1/podcasts" - - -async def test_approve_transcript_starts_rendering_and_enqueues_render( - client, db_search_space, make_podcast, captured_tasks -): - podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW - ) - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") - - assert resp.status_code == 200 - assert resp.json()["status"] == "rendering" - assert captured_tasks.render == [((podcast.id,), {})] - assert captured_tasks.draft == [] - - -async def test_regenerate_returns_to_drafting_and_enqueues_draft( - client, db_search_space, make_podcast, captured_tasks -): - podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.AWAITING_REVIEW - ) - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/regenerate") - - assert resp.status_code == 200 - assert resp.json()["status"] == "drafting" - assert captured_tasks.draft == [((podcast.id, db_search_space.id), {})] - assert captured_tasks.render == [] - - -async def test_approve_transcript_from_terminal_state_is_rejected( - client, db_search_space, make_podcast, captured_tasks -): - # A ready podcast still has its transcript, so the precondition passes and - # the disallowed terminal->rendering transition is what surfaces (409). - podcast = await make_podcast( - search_space_id=db_search_space.id, status=PodcastStatus.READY - ) - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") - - assert resp.status_code == 409 - assert captured_tasks.render == [] - - -async def test_approve_without_transcript_is_unprocessable( - client, db_session, db_search_space, captured_tasks -): - # An anomalous awaiting_review row with no transcript exercises the route's - # precondition->422 mapping (the service refuses to render without one). - podcast = Podcast( - title="No transcript", - search_space_id=db_search_space.id, - status=PodcastStatus.AWAITING_REVIEW, - spec_version=1, - ) - db_session.add(podcast) - await db_session.flush() - - resp = await client.post(f"{BASE}/{podcast.id}/transcript/approve") - - assert resp.status_code == 422 - assert captured_tasks.render == [] diff --git a/surfsense_backend/tests/integration/podcasts/test_voice_preview.py b/surfsense_backend/tests/integration/podcasts/test_voice_preview.py new file mode 100644 index 000000000..729b77be4 --- /dev/null +++ b/surfsense_backend/tests/integration/podcasts/test_voice_preview.py @@ -0,0 +1,79 @@ +"""Audible voice previews for the brief gate's voice picker. + +A user choosing voices should hear them, not guess from names. The endpoint +synthesises a short sample for a catalog voice and caches it on disk so each +voice is paid for at most once per process lifetime. Unknown voices and voices +of an inactive provider are 404; no configured TTS is 503. +""" + +from __future__ import annotations + +import pytest + +from app.config import config as app_config + +from .conftest import FakeTextToSpeech + +pytestmark = pytest.mark.integration + +BASE = "/api/v1/podcasts" + + +@pytest.fixture +def preview_tts(monkeypatch, tmp_path) -> FakeTextToSpeech: + """Route preview synthesis to the fake provider and an isolated cache.""" + provider = FakeTextToSpeech() + monkeypatch.setattr( + "app.podcasts.api.routes.get_text_to_speech", lambda: provider + ) + monkeypatch.setattr( + "app.podcasts.voices.preview.PREVIEW_CACHE_ROOT", tmp_path + ) + return provider + + +async def test_preview_returns_playable_audio_for_a_catalog_voice( + client, preview_tts +): + resp = await client.get(f"{BASE}/voices/openai:alloy/preview") + + assert resp.status_code == 200 + assert resp.headers["content-type"] == "audio/mpeg" + assert resp.content == b"segment-audio" + + +async def test_preview_is_synthesised_once_then_served_from_cache( + client, preview_tts +): + first = await client.get(f"{BASE}/voices/openai:alloy/preview") + second = await client.get(f"{BASE}/voices/openai:alloy/preview") + + assert first.status_code == second.status_code == 200 + assert second.content == first.content + assert len(preview_tts.requests) == 1 + + +async def test_preview_unknown_voice_is_404(client, preview_tts): + resp = await client.get(f"{BASE}/voices/openai:nope/preview") + + assert resp.status_code == 404 + assert preview_tts.requests == [] + + +async def test_preview_voice_of_inactive_provider_is_404(client, preview_tts): + # The active provider is OpenAI (pinned in conftest); a Kokoro voice exists + # in the catalog but cannot be heard through the configured provider. + resp = await client.get(f"{BASE}/voices/kokoro:af_heart/preview") + + assert resp.status_code == 404 + assert preview_tts.requests == [] + + +async def test_preview_without_tts_provider_is_503( + client, preview_tts, monkeypatch +): + monkeypatch.setattr(app_config, "TTS_SERVICE", None) + + resp = await client.get(f"{BASE}/voices/openai:alloy/preview") + + assert resp.status_code == 503 diff --git a/surfsense_web/components/tool-ui/podcast/brief-review.tsx b/surfsense_web/components/tool-ui/podcast/brief-review.tsx index 9679fd8fa..9d244d191 100644 --- a/surfsense_web/components/tool-ui/podcast/brief-review.tsx +++ b/surfsense_web/components/tool-ui/podcast/brief-review.tsx @@ -26,6 +26,7 @@ import { import type { LivePodcast } from "@/hooks/use-podcast-live"; import { podcastsApiService } from "@/lib/apis/podcasts-api.service"; import { AppError } from "@/lib/error"; +import { VoicePreviewButton } from "./voice-preview-button"; // A "*" voice speaks whatever language the text is in (mirrors the backend // catalog's ANY_LANGUAGE sentinel). @@ -274,23 +275,26 @@ export function BriefReview({ podcast, spec, onApproved }: BriefReviewProps) { -