feat(podcasts): add voice catalog

This commit is contained in:
CREDO23 2026-06-10 18:44:03 +02:00
parent 65b6c2d357
commit ee24925747
4 changed files with 154 additions and 0 deletions

View file

@ -0,0 +1,22 @@
"""Voices: the catalog of selectable TTS voices and the active provider.
Replaces the legacy hardcoded speaker-id voice maps. Callers obtain the
catalog via :func:`get_voice_catalog` and identify the configured provider via
:func:`provider_from_service`.
"""
from __future__ import annotations
from .catalog import VoiceCatalog, get_voice_catalog
from .provider import TtsProvider, provider_from_service
from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
__all__ = [
"ANY_LANGUAGE",
"CatalogVoice",
"TtsProvider",
"VoiceCatalog",
"VoiceGender",
"get_voice_catalog",
"provider_from_service",
]

View file

@ -0,0 +1,55 @@
"""The voice catalog: look up and filter selectable voices.
A :class:`VoiceCatalog` is the single source of truth for which voices exist,
replacing the hardcoded speaker-id maps. Resolution uses it to pick defaults
for a brief, the API exposes it as picker options, and the renderer uses it to
turn a stored ``voice_id`` back into the provider-native reference.
"""
from __future__ import annotations
from collections.abc import Iterable
from functools import lru_cache
from .data import AZURE_VOICES, KOKORO_VOICES, OPENAI_VOICES, VERTEX_VOICES
from .provider import TtsProvider
from .voice import CatalogVoice
class VoiceCatalog:
"""An indexed, read-only collection of :class:`CatalogVoice`."""
def __init__(self, voices: Iterable[CatalogVoice]) -> None:
self._by_id: dict[str, CatalogVoice] = {}
self._by_provider: dict[TtsProvider, list[CatalogVoice]] = {}
for voice in voices:
if voice.voice_id in self._by_id:
raise ValueError(f"duplicate voice_id: {voice.voice_id}")
self._by_id[voice.voice_id] = voice
self._by_provider.setdefault(voice.provider, []).append(voice)
def get(self, voice_id: str) -> CatalogVoice:
"""Return the voice with ``voice_id`` or raise ``KeyError``."""
return self._by_id[voice_id]
def for_provider(self, provider: TtsProvider) -> list[CatalogVoice]:
"""All voices offered by ``provider``, in catalog order."""
return list(self._by_provider.get(provider, ()))
def for_language(
self, provider: TtsProvider, language: str
) -> list[CatalogVoice]:
"""``provider`` voices that can render ``language``, in catalog order."""
return [v for v in self.for_provider(provider) if v.speaks(language)]
def supports_language(self, provider: TtsProvider, language: str) -> bool:
"""Whether ``provider`` has at least one voice for ``language``."""
return any(v.speaks(language) for v in self.for_provider(provider))
@lru_cache(maxsize=1)
def get_voice_catalog() -> VoiceCatalog:
"""The process-wide catalog assembled from every provider's roster."""
return VoiceCatalog(
(*KOKORO_VOICES, *OPENAI_VOICES, *AZURE_VOICES, *VERTEX_VOICES)
)

View file

@ -0,0 +1,27 @@
"""The TTS providers we carry voices for, and how to name one from config."""
from __future__ import annotations
from enum import StrEnum
class TtsProvider(StrEnum):
"""A speech provider whose voices the catalog enumerates."""
KOKORO = "kokoro"
OPENAI = "openai"
AZURE = "azure"
VERTEX_AI = "vertex_ai"
def provider_from_service(service: str) -> TtsProvider:
"""Map a ``TTS_SERVICE`` string to its provider.
The config value is a LiteLLM-style ``provider/model`` string
(``openai/tts-1``, ``vertex_ai/...``) except for local Kokoro, which is
spelled ``local/kokoro``; both halves of that special case resolve here.
"""
prefix = service.split("/", 1)[0].strip().lower()
if prefix == "local":
return TtsProvider.KOKORO
return TtsProvider(prefix)

View file

@ -0,0 +1,50 @@
"""A catalog voice: a stable id paired with its provider-native reference."""
from __future__ import annotations
from dataclasses import dataclass
from enum import StrEnum
from app.podcasts.tts import VoiceRef
from .provider import TtsProvider
# A voice that speaks whatever language the input text is in (e.g. OpenAI's
# voices), matched against every requested language.
ANY_LANGUAGE = "*"
class VoiceGender(StrEnum):
"""Perceived voice gender, used to pick distinct voices per speaker."""
MALE = "male"
FEMALE = "female"
NEUTRAL = "neutral"
@dataclass(frozen=True, slots=True)
class CatalogVoice:
"""One selectable voice.
``voice_id`` is the provider-prefixed, stable id stored on a speaker in the
brief (e.g. ``"kokoro:am_adam"``). ``native_ref`` is the untyped value the
TTS adapter passes to the provider a string for most, a mapping for
Vertex kept separate so renaming the catalog id never breaks synthesis.
"""
voice_id: str
provider: TtsProvider
language: str
display_name: str
gender: VoiceGender
native_ref: VoiceRef
def speaks(self, language: str) -> bool:
"""Whether this voice can render ``language`` (primary subtag match)."""
if self.language == ANY_LANGUAGE:
return True
return _primary(self.language) == _primary(language)
def _primary(language: str) -> str:
return language.split("-", 1)[0].strip().lower()