"""Audible previews so users pick voices by sound, not by name. A preview is a short sample sentence synthesised in the voice's own language. Samples are served through the same content-addressed cache the renderer uses, so each voice costs at most one synthesis per cache lifetime — repeat listens while comparing voices are free. """ from __future__ import annotations import tempfile from pathlib import Path from app.podcasts.rendering.cache import SegmentCache from app.podcasts.tts import SynthesisRequest, TextToSpeech from .voice import ANY_LANGUAGE, CatalogVoice # Previews are user-independent, so one rendered sample serves everyone. PREVIEW_CACHE_ROOT = Path(tempfile.gettempdir()) / "surfsense_podcasts" / "previews" _FALLBACK_LANGUAGE = "en" # A voice previews best speaking its own language. _SAMPLE_TEXTS = { "en": "Hi there! This is how I sound when narrating your podcast.", "es": "¡Hola! Así sueno cuando narro tu pódcast.", "fr": "Bonjour ! Voici ma voix quand je raconte votre podcast.", "hi": "नमस्ते! आपका पॉडकास्ट सुनाते समय मेरी आवाज़ ऐसी होती है।", "it": "Ciao! Questa è la mia voce quando racconto il tuo podcast.", "ja": "こんにちは。ポッドキャストをお届けするときの私の声です。", "pt": "Olá! É assim que eu soo ao narrar o seu podcast.", "zh": "你好!这就是我为你播报播客时的声音。", # noqa: RUF001 } _CONTENT_TYPES = {"mp3": "audio/mpeg", "wav": "audio/wav"} async def render_voice_preview( voice: CatalogVoice, tts: TextToSpeech ) -> tuple[bytes, str]: """Return ``(audio_bytes, content_type)`` for a sample spoken by ``voice``.""" language = _FALLBACK_LANGUAGE if voice.language == ANY_LANGUAGE else voice.language request = SynthesisRequest( text=_sample_text(language), voice=voice.native_ref, language=language ) cache = SegmentCache(PREVIEW_CACHE_ROOT) key = cache.key(request) cached = cache.get(key, tts.container) if cached is not None: return cached.read_bytes(), _content_type(tts.container) audio = await tts.synthesize(request) cache.put(key, audio.container, audio.data) return audio.data, _content_type(audio.container) def _sample_text(language: str) -> str: primary = language.split("-", 1)[0].strip().lower() return _SAMPLE_TEXTS.get(primary, _SAMPLE_TEXTS[_FALLBACK_LANGUAGE]) def _content_type(container: str) -> str: return _CONTENT_TYPES.get(container, "application/octet-stream")