diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 35effefc0..e8c9f96a9 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -106,6 +106,7 @@ services: volumes: - ../surfsense_backend/app:/app/app - shared_temp:/shared_tmp + - object_store:/app/.local_object_store env_file: - ../surfsense_backend/.env extra_hosts: @@ -119,6 +120,7 @@ services: - PYTHONPATH=/app - UVICORN_LOOP=asyncio - UNSTRUCTURED_HAS_PATCHED_LOOP=1 + - FILE_STORAGE_LOCAL_PATH=/app/.local_object_store - LANGCHAIN_TRACING_V2=false - LANGSMITH_TRACING=false - AUTH_TYPE=${AUTH_TYPE:-LOCAL} @@ -171,6 +173,7 @@ services: volumes: - ../surfsense_backend/app:/app/app - shared_temp:/shared_tmp + - object_store:/app/.local_object_store env_file: - ../surfsense_backend/.env extra_hosts: @@ -182,6 +185,7 @@ services: - REDIS_APP_URL=${REDIS_URL:-redis://redis:6379/0} - CELERY_TASK_DEFAULT_QUEUE=surfsense - PYTHONPATH=/app + - FILE_STORAGE_LOCAL_PATH=/app/.local_object_store - SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080} - SERVICE_ROLE=worker depends_on: @@ -278,6 +282,8 @@ volumes: name: surfsense-dev-redis shared_temp: name: surfsense-dev-shared-temp + object_store: + name: surfsense-dev-object-store zero_cache_data: name: surfsense-dev-zero-cache whatsapp_sessions: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 9bbf28ffd..dc0d8b3ae 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -100,6 +100,7 @@ services: - "${BACKEND_PORT:-8929}:8000" volumes: - shared_temp:/shared_tmp + - object_store:/app/.local_object_store env_file: - .env extra_hosts: @@ -113,6 +114,7 @@ services: PYTHONPATH: /app UVICORN_LOOP: asyncio UNSTRUCTURED_HAS_PATCHED_LOOP: "1" + FILE_STORAGE_LOCAL_PATH: /app/.local_object_store NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}} SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080} WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929} @@ -165,6 +167,7 @@ services: image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}} volumes: - shared_temp:/shared_tmp + - object_store:/app/.local_object_store env_file: - .env extra_hosts: @@ -176,6 +179,7 @@ services: REDIS_APP_URL: ${REDIS_URL:-redis://redis:6379/0} CELERY_TASK_DEFAULT_QUEUE: surfsense PYTHONPATH: /app + FILE_STORAGE_LOCAL_PATH: /app/.local_object_store SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080} SERVICE_ROLE: worker depends_on: @@ -278,6 +282,8 @@ volumes: name: surfsense-redis shared_temp: name: surfsense-shared-temp + object_store: + name: surfsense-object-store zero_cache_data: name: surfsense-zero-cache whatsapp_sessions: diff --git a/surfsense_backend/app/podcasts/api/routes.py b/surfsense_backend/app/podcasts/api/routes.py index 80e5e1c64..f55febcbd 100644 --- a/surfsense_backend/app/podcasts/api/routes.py +++ b/surfsense_backend/app/podcasts/api/routes.py @@ -27,14 +27,14 @@ from app.db import ( get_async_session, ) from app.podcasts.generation.brief import propose_brief -from app.podcasts.persistence import Podcast, PodcastRepository +from app.podcasts.persistence import Podcast, PodcastRepository, PodcastStatus from app.podcasts.service import ( InvalidTransitionError, PodcastService, PreconditionFailedError, SpecConflictError, ) -from app.podcasts.storage import open_audio_stream, purge_audio +from app.podcasts.storage import audio_exists, open_audio_stream, purge_audio from app.podcasts.tasks import draft_transcript_task from app.podcasts.tts import get_text_to_speech from app.podcasts.voices import ( @@ -272,6 +272,11 @@ async def stream_podcast( podcast = await _load(session, user, podcast_id, Permission.PODCASTS_READ) if podcast.storage_key: + # Verify first so a missing object is a 404, not a mid-stream crash. + if not await audio_exists(podcast): + raise HTTPException( + status_code=404, detail="Podcast audio is no longer available" + ) return StreamingResponse( open_audio_stream(podcast), media_type="audio/mpeg", @@ -295,7 +300,10 @@ async def stream_podcast( }, ) - raise HTTPException(status_code=404, detail="Podcast audio not found") + # No audio: terminal states never will have any, otherwise it's in flight. + if PodcastStatus(podcast.status).is_terminal: + raise HTTPException(status_code=404, detail="Podcast audio not found") + raise HTTPException(status_code=409, detail="Podcast audio is not ready yet") async def _require( diff --git a/surfsense_backend/app/podcasts/storage.py b/surfsense_backend/app/podcasts/storage.py index f02429dff..c3326460d 100644 --- a/surfsense_backend/app/podcasts/storage.py +++ b/surfsense_backend/app/podcasts/storage.py @@ -42,6 +42,13 @@ def open_audio_stream(podcast: Podcast) -> AsyncIterator[bytes]: return get_storage_backend().open_stream(podcast.storage_key) +async def audio_exists(podcast: Podcast) -> bool: + """Whether the podcast's stored audio object is actually present.""" + return bool(podcast.storage_key) and await get_storage_backend().exists( + podcast.storage_key + ) + + async def purge_audio(podcast: Podcast) -> None: """Delete a podcast's stored audio if present; a missing object is fine.""" await purge_audio_object(podcast.storage_key) diff --git a/surfsense_backend/app/routes/public_chat_routes.py b/surfsense_backend/app/routes/public_chat_routes.py index 53f4c2651..516e976e6 100644 --- a/surfsense_backend/app/routes/public_chat_routes.py +++ b/surfsense_backend/app/routes/public_chat_routes.py @@ -103,8 +103,14 @@ async def stream_public_podcast( if storage_key: from app.file_storage.factory import get_storage_backend + backend = get_storage_backend() + # Verify first so a missing object is a 404, not a mid-stream crash. + if not await backend.exists(storage_key): + raise HTTPException( + status_code=404, detail="Podcast audio is no longer available" + ) return StreamingResponse( - get_storage_backend().open_stream(storage_key), + backend.open_stream(storage_key), media_type="audio/mpeg", headers={"Accept-Ranges": "bytes"}, ) diff --git a/surfsense_backend/tests/integration/podcasts/conftest.py b/surfsense_backend/tests/integration/podcasts/conftest.py index f244c17d2..240f57f96 100644 --- a/surfsense_backend/tests/integration/podcasts/conftest.py +++ b/surfsense_backend/tests/integration/podcasts/conftest.py @@ -120,6 +120,9 @@ class FakeStorageBackend: async def open_stream(self, key: str) -> AsyncIterator[bytes]: yield self.objects.get(key, b"audio-bytes") + async def exists(self, key: str) -> bool: + return key in self.objects + async def delete(self, key: str) -> None: self.deleted.append(key) diff --git a/surfsense_backend/tests/integration/podcasts/test_public_stream.py b/surfsense_backend/tests/integration/podcasts/test_public_stream.py index d2ba1d1b9..63f634234 100644 --- a/surfsense_backend/tests/integration/podcasts/test_public_stream.py +++ b/surfsense_backend/tests/integration/podcasts/test_public_stream.py @@ -48,6 +48,22 @@ async def test_public_stream_serves_audio_via_storage_key( assert resp.content == b"public-audio" +async def test_public_stream_404_when_object_missing( + client, db_session, db_search_space, db_user, fake_storage +): + await _snapshot( + db_session, + search_space_id=db_search_space.id, + user=db_user, + token="tok-gone", + podcasts=[{"original_id": 556, "storage_key": "podcasts/gone.mp3"}], + ) + + resp = await client.get("/api/v1/public/tok-gone/podcasts/556/stream") + + assert resp.status_code == 404 + + async def test_public_stream_404_when_podcast_absent_from_snapshot( client, db_session, db_search_space, db_user ): diff --git a/surfsense_backend/tests/integration/podcasts/test_streaming.py b/surfsense_backend/tests/integration/podcasts/test_streaming.py index 82456bac9..b924e2971 100644 --- a/surfsense_backend/tests/integration/podcasts/test_streaming.py +++ b/surfsense_backend/tests/integration/podcasts/test_streaming.py @@ -1,8 +1,7 @@ """Streaming a podcast's rendered audio over HTTP. -A ready podcast streams its bytes from the storage backend; a podcast with no -stored audio returns 404. Storage is an in-memory backend (the object store is a -system boundary). +A ready podcast streams its bytes; an in-flight one is 409, a stored-but-missing +object is 404. Storage is an in-memory backend (the object store is a boundary). """ from __future__ import annotations @@ -31,11 +30,23 @@ async def test_stream_serves_stored_audio( assert resp.content == b"the-audio" -async def test_stream_404_when_no_audio(client, db_search_space, make_podcast): +async def test_stream_409_while_in_flight(client, db_search_space, make_podcast): podcast = await make_podcast( search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING ) resp = await client.get(f"{BASE}/{podcast.id}/stream") + assert resp.status_code == 409 + + +async def test_stream_404_when_object_missing( + client, db_search_space, make_podcast, fake_storage +): + podcast = await make_podcast( + search_space_id=db_search_space.id, status=PodcastStatus.READY + ) + + resp = await client.get(f"{BASE}/{podcast.id}/stream") + assert resp.status_code == 404