mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-16 21:05:20 +02:00
Merge pull request #1500 from CREDO23/fix/podcast-stream-missing-audio
fix(podcasts): guard stream when audio missing and share object store volume
This commit is contained in:
commit
683a827300
8 changed files with 71 additions and 8 deletions
|
|
@ -106,6 +106,7 @@ services:
|
|||
volumes:
|
||||
- ../surfsense_backend/app:/app/app
|
||||
- shared_temp:/shared_tmp
|
||||
- object_store:/app/.local_object_store
|
||||
env_file:
|
||||
- ../surfsense_backend/.env
|
||||
extra_hosts:
|
||||
|
|
@ -119,6 +120,7 @@ services:
|
|||
- PYTHONPATH=/app
|
||||
- UVICORN_LOOP=asyncio
|
||||
- UNSTRUCTURED_HAS_PATCHED_LOOP=1
|
||||
- FILE_STORAGE_LOCAL_PATH=/app/.local_object_store
|
||||
- LANGCHAIN_TRACING_V2=false
|
||||
- LANGSMITH_TRACING=false
|
||||
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
|
||||
|
|
@ -171,6 +173,7 @@ services:
|
|||
volumes:
|
||||
- ../surfsense_backend/app:/app/app
|
||||
- shared_temp:/shared_tmp
|
||||
- object_store:/app/.local_object_store
|
||||
env_file:
|
||||
- ../surfsense_backend/.env
|
||||
extra_hosts:
|
||||
|
|
@ -182,6 +185,7 @@ services:
|
|||
- REDIS_APP_URL=${REDIS_URL:-redis://redis:6379/0}
|
||||
- CELERY_TASK_DEFAULT_QUEUE=surfsense
|
||||
- PYTHONPATH=/app
|
||||
- FILE_STORAGE_LOCAL_PATH=/app/.local_object_store
|
||||
- SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
|
||||
- SERVICE_ROLE=worker
|
||||
depends_on:
|
||||
|
|
@ -278,6 +282,8 @@ volumes:
|
|||
name: surfsense-dev-redis
|
||||
shared_temp:
|
||||
name: surfsense-dev-shared-temp
|
||||
object_store:
|
||||
name: surfsense-dev-object-store
|
||||
zero_cache_data:
|
||||
name: surfsense-dev-zero-cache
|
||||
whatsapp_sessions:
|
||||
|
|
|
|||
|
|
@ -100,6 +100,7 @@ services:
|
|||
- "${BACKEND_PORT:-8929}:8000"
|
||||
volumes:
|
||||
- shared_temp:/shared_tmp
|
||||
- object_store:/app/.local_object_store
|
||||
env_file:
|
||||
- .env
|
||||
extra_hosts:
|
||||
|
|
@ -113,6 +114,7 @@ services:
|
|||
PYTHONPATH: /app
|
||||
UVICORN_LOOP: asyncio
|
||||
UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
|
||||
FILE_STORAGE_LOCAL_PATH: /app/.local_object_store
|
||||
NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
|
||||
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
|
||||
WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
|
||||
|
|
@ -165,6 +167,7 @@ services:
|
|||
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
|
||||
volumes:
|
||||
- shared_temp:/shared_tmp
|
||||
- object_store:/app/.local_object_store
|
||||
env_file:
|
||||
- .env
|
||||
extra_hosts:
|
||||
|
|
@ -176,6 +179,7 @@ services:
|
|||
REDIS_APP_URL: ${REDIS_URL:-redis://redis:6379/0}
|
||||
CELERY_TASK_DEFAULT_QUEUE: surfsense
|
||||
PYTHONPATH: /app
|
||||
FILE_STORAGE_LOCAL_PATH: /app/.local_object_store
|
||||
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
|
||||
SERVICE_ROLE: worker
|
||||
depends_on:
|
||||
|
|
@ -278,6 +282,8 @@ volumes:
|
|||
name: surfsense-redis
|
||||
shared_temp:
|
||||
name: surfsense-shared-temp
|
||||
object_store:
|
||||
name: surfsense-object-store
|
||||
zero_cache_data:
|
||||
name: surfsense-zero-cache
|
||||
whatsapp_sessions:
|
||||
|
|
|
|||
|
|
@ -27,14 +27,14 @@ from app.db import (
|
|||
get_async_session,
|
||||
)
|
||||
from app.podcasts.generation.brief import propose_brief
|
||||
from app.podcasts.persistence import Podcast, PodcastRepository
|
||||
from app.podcasts.persistence import Podcast, PodcastRepository, PodcastStatus
|
||||
from app.podcasts.service import (
|
||||
InvalidTransitionError,
|
||||
PodcastService,
|
||||
PreconditionFailedError,
|
||||
SpecConflictError,
|
||||
)
|
||||
from app.podcasts.storage import open_audio_stream, purge_audio
|
||||
from app.podcasts.storage import audio_exists, open_audio_stream, purge_audio
|
||||
from app.podcasts.tasks import draft_transcript_task
|
||||
from app.podcasts.tts import get_text_to_speech
|
||||
from app.podcasts.voices import (
|
||||
|
|
@ -272,6 +272,11 @@ async def stream_podcast(
|
|||
podcast = await _load(session, user, podcast_id, Permission.PODCASTS_READ)
|
||||
|
||||
if podcast.storage_key:
|
||||
# Verify first so a missing object is a 404, not a mid-stream crash.
|
||||
if not await audio_exists(podcast):
|
||||
raise HTTPException(
|
||||
status_code=404, detail="Podcast audio is no longer available"
|
||||
)
|
||||
return StreamingResponse(
|
||||
open_audio_stream(podcast),
|
||||
media_type="audio/mpeg",
|
||||
|
|
@ -295,7 +300,10 @@ async def stream_podcast(
|
|||
},
|
||||
)
|
||||
|
||||
raise HTTPException(status_code=404, detail="Podcast audio not found")
|
||||
# No audio: terminal states never will have any, otherwise it's in flight.
|
||||
if PodcastStatus(podcast.status).is_terminal:
|
||||
raise HTTPException(status_code=404, detail="Podcast audio not found")
|
||||
raise HTTPException(status_code=409, detail="Podcast audio is not ready yet")
|
||||
|
||||
|
||||
async def _require(
|
||||
|
|
|
|||
|
|
@ -42,6 +42,13 @@ def open_audio_stream(podcast: Podcast) -> AsyncIterator[bytes]:
|
|||
return get_storage_backend().open_stream(podcast.storage_key)
|
||||
|
||||
|
||||
async def audio_exists(podcast: Podcast) -> bool:
|
||||
"""Whether the podcast's stored audio object is actually present."""
|
||||
return bool(podcast.storage_key) and await get_storage_backend().exists(
|
||||
podcast.storage_key
|
||||
)
|
||||
|
||||
|
||||
async def purge_audio(podcast: Podcast) -> None:
|
||||
"""Delete a podcast's stored audio if present; a missing object is fine."""
|
||||
await purge_audio_object(podcast.storage_key)
|
||||
|
|
|
|||
|
|
@ -103,8 +103,14 @@ async def stream_public_podcast(
|
|||
if storage_key:
|
||||
from app.file_storage.factory import get_storage_backend
|
||||
|
||||
backend = get_storage_backend()
|
||||
# Verify first so a missing object is a 404, not a mid-stream crash.
|
||||
if not await backend.exists(storage_key):
|
||||
raise HTTPException(
|
||||
status_code=404, detail="Podcast audio is no longer available"
|
||||
)
|
||||
return StreamingResponse(
|
||||
get_storage_backend().open_stream(storage_key),
|
||||
backend.open_stream(storage_key),
|
||||
media_type="audio/mpeg",
|
||||
headers={"Accept-Ranges": "bytes"},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -120,6 +120,9 @@ class FakeStorageBackend:
|
|||
async def open_stream(self, key: str) -> AsyncIterator[bytes]:
|
||||
yield self.objects.get(key, b"audio-bytes")
|
||||
|
||||
async def exists(self, key: str) -> bool:
|
||||
return key in self.objects
|
||||
|
||||
async def delete(self, key: str) -> None:
|
||||
self.deleted.append(key)
|
||||
|
||||
|
|
|
|||
|
|
@ -48,6 +48,22 @@ async def test_public_stream_serves_audio_via_storage_key(
|
|||
assert resp.content == b"public-audio"
|
||||
|
||||
|
||||
async def test_public_stream_404_when_object_missing(
|
||||
client, db_session, db_search_space, db_user, fake_storage
|
||||
):
|
||||
await _snapshot(
|
||||
db_session,
|
||||
search_space_id=db_search_space.id,
|
||||
user=db_user,
|
||||
token="tok-gone",
|
||||
podcasts=[{"original_id": 556, "storage_key": "podcasts/gone.mp3"}],
|
||||
)
|
||||
|
||||
resp = await client.get("/api/v1/public/tok-gone/podcasts/556/stream")
|
||||
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
async def test_public_stream_404_when_podcast_absent_from_snapshot(
|
||||
client, db_session, db_search_space, db_user
|
||||
):
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
"""Streaming a podcast's rendered audio over HTTP.
|
||||
|
||||
A ready podcast streams its bytes from the storage backend; a podcast with no
|
||||
stored audio returns 404. Storage is an in-memory backend (the object store is a
|
||||
system boundary).
|
||||
A ready podcast streams its bytes; an in-flight one is 409, a stored-but-missing
|
||||
object is 404. Storage is an in-memory backend (the object store is a boundary).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -31,11 +30,23 @@ async def test_stream_serves_stored_audio(
|
|||
assert resp.content == b"the-audio"
|
||||
|
||||
|
||||
async def test_stream_404_when_no_audio(client, db_search_space, make_podcast):
|
||||
async def test_stream_409_while_in_flight(client, db_search_space, make_podcast):
|
||||
podcast = await make_podcast(
|
||||
search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING
|
||||
)
|
||||
|
||||
resp = await client.get(f"{BASE}/{podcast.id}/stream")
|
||||
|
||||
assert resp.status_code == 409
|
||||
|
||||
|
||||
async def test_stream_404_when_object_missing(
|
||||
client, db_search_space, make_podcast, fake_storage
|
||||
):
|
||||
podcast = await make_podcast(
|
||||
search_space_id=db_search_space.id, status=PodcastStatus.READY
|
||||
)
|
||||
|
||||
resp = await client.get(f"{BASE}/{podcast.id}/stream")
|
||||
|
||||
assert resp.status_code == 404
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue