Merge pull request #1500 from CREDO23/fix/podcast-stream-missing-audio

fix(podcasts): guard stream when audio missing and share object store volume
This commit is contained in:
Thierry CH. 2026-06-16 11:16:27 -07:00 committed by GitHub
commit 683a827300
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 71 additions and 8 deletions

View file

@ -106,6 +106,7 @@ services:
volumes:
- ../surfsense_backend/app:/app/app
- shared_temp:/shared_tmp
- object_store:/app/.local_object_store
env_file:
- ../surfsense_backend/.env
extra_hosts:
@ -119,6 +120,7 @@ services:
- PYTHONPATH=/app
- UVICORN_LOOP=asyncio
- UNSTRUCTURED_HAS_PATCHED_LOOP=1
- FILE_STORAGE_LOCAL_PATH=/app/.local_object_store
- LANGCHAIN_TRACING_V2=false
- LANGSMITH_TRACING=false
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
@ -171,6 +173,7 @@ services:
volumes:
- ../surfsense_backend/app:/app/app
- shared_temp:/shared_tmp
- object_store:/app/.local_object_store
env_file:
- ../surfsense_backend/.env
extra_hosts:
@ -182,6 +185,7 @@ services:
- REDIS_APP_URL=${REDIS_URL:-redis://redis:6379/0}
- CELERY_TASK_DEFAULT_QUEUE=surfsense
- PYTHONPATH=/app
- FILE_STORAGE_LOCAL_PATH=/app/.local_object_store
- SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
- SERVICE_ROLE=worker
depends_on:
@ -278,6 +282,8 @@ volumes:
name: surfsense-dev-redis
shared_temp:
name: surfsense-dev-shared-temp
object_store:
name: surfsense-dev-object-store
zero_cache_data:
name: surfsense-dev-zero-cache
whatsapp_sessions:

View file

@ -100,6 +100,7 @@ services:
- "${BACKEND_PORT:-8929}:8000"
volumes:
- shared_temp:/shared_tmp
- object_store:/app/.local_object_store
env_file:
- .env
extra_hosts:
@ -113,6 +114,7 @@ services:
PYTHONPATH: /app
UVICORN_LOOP: asyncio
UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
FILE_STORAGE_LOCAL_PATH: /app/.local_object_store
NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
@ -165,6 +167,7 @@ services:
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
volumes:
- shared_temp:/shared_tmp
- object_store:/app/.local_object_store
env_file:
- .env
extra_hosts:
@ -176,6 +179,7 @@ services:
REDIS_APP_URL: ${REDIS_URL:-redis://redis:6379/0}
CELERY_TASK_DEFAULT_QUEUE: surfsense
PYTHONPATH: /app
FILE_STORAGE_LOCAL_PATH: /app/.local_object_store
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
SERVICE_ROLE: worker
depends_on:
@ -278,6 +282,8 @@ volumes:
name: surfsense-redis
shared_temp:
name: surfsense-shared-temp
object_store:
name: surfsense-object-store
zero_cache_data:
name: surfsense-zero-cache
whatsapp_sessions:

View file

@ -27,14 +27,14 @@ from app.db import (
get_async_session,
)
from app.podcasts.generation.brief import propose_brief
from app.podcasts.persistence import Podcast, PodcastRepository
from app.podcasts.persistence import Podcast, PodcastRepository, PodcastStatus
from app.podcasts.service import (
InvalidTransitionError,
PodcastService,
PreconditionFailedError,
SpecConflictError,
)
from app.podcasts.storage import open_audio_stream, purge_audio
from app.podcasts.storage import audio_exists, open_audio_stream, purge_audio
from app.podcasts.tasks import draft_transcript_task
from app.podcasts.tts import get_text_to_speech
from app.podcasts.voices import (
@ -272,6 +272,11 @@ async def stream_podcast(
podcast = await _load(session, user, podcast_id, Permission.PODCASTS_READ)
if podcast.storage_key:
# Verify first so a missing object is a 404, not a mid-stream crash.
if not await audio_exists(podcast):
raise HTTPException(
status_code=404, detail="Podcast audio is no longer available"
)
return StreamingResponse(
open_audio_stream(podcast),
media_type="audio/mpeg",
@ -295,7 +300,10 @@ async def stream_podcast(
},
)
raise HTTPException(status_code=404, detail="Podcast audio not found")
# No audio: terminal states never will have any, otherwise it's in flight.
if PodcastStatus(podcast.status).is_terminal:
raise HTTPException(status_code=404, detail="Podcast audio not found")
raise HTTPException(status_code=409, detail="Podcast audio is not ready yet")
async def _require(

View file

@ -42,6 +42,13 @@ def open_audio_stream(podcast: Podcast) -> AsyncIterator[bytes]:
return get_storage_backend().open_stream(podcast.storage_key)
async def audio_exists(podcast: Podcast) -> bool:
"""Whether the podcast's stored audio object is actually present."""
return bool(podcast.storage_key) and await get_storage_backend().exists(
podcast.storage_key
)
async def purge_audio(podcast: Podcast) -> None:
"""Delete a podcast's stored audio if present; a missing object is fine."""
await purge_audio_object(podcast.storage_key)

View file

@ -103,8 +103,14 @@ async def stream_public_podcast(
if storage_key:
from app.file_storage.factory import get_storage_backend
backend = get_storage_backend()
# Verify first so a missing object is a 404, not a mid-stream crash.
if not await backend.exists(storage_key):
raise HTTPException(
status_code=404, detail="Podcast audio is no longer available"
)
return StreamingResponse(
get_storage_backend().open_stream(storage_key),
backend.open_stream(storage_key),
media_type="audio/mpeg",
headers={"Accept-Ranges": "bytes"},
)

View file

@ -120,6 +120,9 @@ class FakeStorageBackend:
async def open_stream(self, key: str) -> AsyncIterator[bytes]:
yield self.objects.get(key, b"audio-bytes")
async def exists(self, key: str) -> bool:
return key in self.objects
async def delete(self, key: str) -> None:
self.deleted.append(key)

View file

@ -48,6 +48,22 @@ async def test_public_stream_serves_audio_via_storage_key(
assert resp.content == b"public-audio"
async def test_public_stream_404_when_object_missing(
client, db_session, db_search_space, db_user, fake_storage
):
await _snapshot(
db_session,
search_space_id=db_search_space.id,
user=db_user,
token="tok-gone",
podcasts=[{"original_id": 556, "storage_key": "podcasts/gone.mp3"}],
)
resp = await client.get("/api/v1/public/tok-gone/podcasts/556/stream")
assert resp.status_code == 404
async def test_public_stream_404_when_podcast_absent_from_snapshot(
client, db_session, db_search_space, db_user
):

View file

@ -1,8 +1,7 @@
"""Streaming a podcast's rendered audio over HTTP.
A ready podcast streams its bytes from the storage backend; a podcast with no
stored audio returns 404. Storage is an in-memory backend (the object store is a
system boundary).
A ready podcast streams its bytes; an in-flight one is 409, a stored-but-missing
object is 404. Storage is an in-memory backend (the object store is a boundary).
"""
from __future__ import annotations
@ -31,11 +30,23 @@ async def test_stream_serves_stored_audio(
assert resp.content == b"the-audio"
async def test_stream_404_when_no_audio(client, db_search_space, make_podcast):
async def test_stream_409_while_in_flight(client, db_search_space, make_podcast):
podcast = await make_podcast(
search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING
)
resp = await client.get(f"{BASE}/{podcast.id}/stream")
assert resp.status_code == 409
async def test_stream_404_when_object_missing(
client, db_search_space, make_podcast, fake_storage
):
podcast = await make_podcast(
search_space_id=db_search_space.id, status=PodcastStatus.READY
)
resp = await client.get(f"{BASE}/{podcast.id}/stream")
assert resp.status_code == 404