fix(gitignore): anchor data/ rule; track podcast voice catalogs

2026-07-26 23:51:14 +02:00 · 2026-06-12 00:06:37 +02:00 · 2026-06-12 00:06:37 +02:00 · 7b30a76856
commit 7b30a76856
parent 41f4a58663
6 changed files with 177 additions and 1 deletions
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -1,7 +1,7 @@
 .env
 .venv
 venv/
-data/
+/data/
 .local_object_store/
 __pycache__/
 .flashrank_cache
--- a/surfsense_backend/app/podcasts/voices/data/init.py
+++ b/surfsense_backend/app/podcasts/voices/data/init.py
@ -0,0 +1,10 @@
+"""Static per-provider voice rosters that compose the catalog."""
+
+from __future__ import annotations
+
+from .azure import AZURE_VOICES
+from .kokoro import KOKORO_VOICES
+from .openai import OPENAI_VOICES
+from .vertex import VERTEX_VOICES
+
+__all__ = ["AZURE_VOICES", "KOKORO_VOICES", "OPENAI_VOICES", "VERTEX_VOICES"]
--- a/surfsense_backend/app/podcasts/voices/data/azure.py
+++ b/surfsense_backend/app/podcasts/voices/data/azure.py
@ -0,0 +1,32 @@
+"""Azure TTS voices, routed through the OpenAI-compatible voice names.
+
+The deployment fronts Azure with OpenAI-style voice names (matching the legacy
+podcaster), so these mirror the OpenAI roster and, like it, speak any requested
+language.
+"""
+
+from __future__ import annotations
+
+from ..provider import TtsProvider
+from ..voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
+
+
+def _voice(name: str, display: str, gender: VoiceGender) -> CatalogVoice:
+    return CatalogVoice(
+        voice_id=f"azure:{name}",
+        provider=TtsProvider.AZURE,
+        language=ANY_LANGUAGE,
+        display_name=display,
+        gender=gender,
+        native_ref=name,
+    )
+
+
+AZURE_VOICES: tuple[CatalogVoice, ...] = (
+    _voice("alloy", "Alloy", VoiceGender.NEUTRAL),
+    _voice("echo", "Echo", VoiceGender.MALE),
+    _voice("fable", "Fable", VoiceGender.NEUTRAL),
+    _voice("onyx", "Onyx", VoiceGender.MALE),
+    _voice("nova", "Nova", VoiceGender.FEMALE),
+    _voice("shimmer", "Shimmer", VoiceGender.FEMALE),
+)
--- a/surfsense_backend/app/podcasts/voices/data/kokoro.py
+++ b/surfsense_backend/app/podcasts/voices/data/kokoro.py
@ -0,0 +1,63 @@
+"""Curated Kokoro voices, the local provider's multilingual roster.
+
+Kokoro voice names encode language and gender in their first two letters
+(``a``=American English, ``b``=British, ``e``=Spanish, ``f``=French,
+``h``=Hindi, ``i``=Italian, ``j``=Japanese, ``p``=Brazilian Portuguese,
+``z``=Mandarin; second letter ``f``/``m`` = female/male). We carry at least one
+male and one female voice per language so a two-speaker brief always has a
+distinct pair. ``native_ref`` is the bare voice name Kokoro expects.
+
+Reference: https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices
+"""
+
+from __future__ import annotations
+
+from ..provider import TtsProvider
+from ..voice import CatalogVoice, VoiceGender
+
+
+def _voice(name: str, language: str, display: str, gender: VoiceGender) -> CatalogVoice:
+    return CatalogVoice(
+        voice_id=f"kokoro:{name}",
+        provider=TtsProvider.KOKORO,
+        language=language,
+        display_name=display,
+        gender=gender,
+        native_ref=name,
+    )
+
+
+KOKORO_VOICES: tuple[CatalogVoice, ...] = (
+    # American English
+    _voice("am_adam", "en-US", "Adam (US)", VoiceGender.MALE),
+    _voice("am_michael", "en-US", "Michael (US)", VoiceGender.MALE),
+    _voice("af_bella", "en-US", "Bella (US)", VoiceGender.FEMALE),
+    _voice("af_heart", "en-US", "Heart (US)", VoiceGender.FEMALE),
+    _voice("af_nicole", "en-US", "Nicole (US)", VoiceGender.FEMALE),
+    _voice("af_sarah", "en-US", "Sarah (US)", VoiceGender.FEMALE),
+    # British English
+    _voice("bm_george", "en-GB", "George (UK)", VoiceGender.MALE),
+    _voice("bm_lewis", "en-GB", "Lewis (UK)", VoiceGender.MALE),
+    _voice("bf_emma", "en-GB", "Emma (UK)", VoiceGender.FEMALE),
+    _voice("bf_isabella", "en-GB", "Isabella (UK)", VoiceGender.FEMALE),
+    # Spanish
+    _voice("em_alex", "es", "Alex (ES)", VoiceGender.MALE),
+    _voice("ef_dora", "es", "Dora (ES)", VoiceGender.FEMALE),
+    # French
+    _voice("ff_siwis", "fr", "Siwis (FR)", VoiceGender.FEMALE),
+    # Hindi
+    _voice("hm_omega", "hi", "Omega (HI)", VoiceGender.MALE),
+    _voice("hf_alpha", "hi", "Alpha (HI)", VoiceGender.FEMALE),
+    # Italian
+    _voice("im_nicola", "it", "Nicola (IT)", VoiceGender.MALE),
+    _voice("if_sara", "it", "Sara (IT)", VoiceGender.FEMALE),
+    # Japanese
+    _voice("jm_kumo", "ja", "Kumo (JA)", VoiceGender.MALE),
+    _voice("jf_alpha", "ja", "Alpha (JA)", VoiceGender.FEMALE),
+    # Brazilian Portuguese
+    _voice("pm_alex", "pt-BR", "Alex (BR)", VoiceGender.MALE),
+    _voice("pf_dora", "pt-BR", "Dora (BR)", VoiceGender.FEMALE),
+    # Mandarin Chinese
+    _voice("zm_yunxi", "zh", "Yunxi (ZH)", VoiceGender.MALE),
+    _voice("zf_xiaoxiao", "zh", "Xiaoxiao (ZH)", VoiceGender.FEMALE),
+)
--- a/surfsense_backend/app/podcasts/voices/data/openai.py
+++ b/surfsense_backend/app/podcasts/voices/data/openai.py
@ -0,0 +1,32 @@
+"""OpenAI TTS voices: language-agnostic, so each speaks any requested language.
+
+OpenAI voices follow the language of the input text rather than being tied to a
+locale, so they are tagged :data:`ANY_LANGUAGE` and match every brief. The
+``native_ref`` is the plain voice name the API expects.
+"""
+
+from __future__ import annotations
+
+from ..provider import TtsProvider
+from ..voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
+
+
+def _voice(name: str, display: str, gender: VoiceGender) -> CatalogVoice:
+    return CatalogVoice(
+        voice_id=f"openai:{name}",
+        provider=TtsProvider.OPENAI,
+        language=ANY_LANGUAGE,
+        display_name=display,
+        gender=gender,
+        native_ref=name,
+    )
+
+
+OPENAI_VOICES: tuple[CatalogVoice, ...] = (
+    _voice("alloy", "Alloy", VoiceGender.NEUTRAL),
+    _voice("echo", "Echo", VoiceGender.MALE),
+    _voice("fable", "Fable", VoiceGender.NEUTRAL),
+    _voice("onyx", "Onyx", VoiceGender.MALE),
+    _voice("nova", "Nova", VoiceGender.FEMALE),
+    _voice("shimmer", "Shimmer", VoiceGender.FEMALE),
+)
--- a/surfsense_backend/app/podcasts/voices/data/vertex.py
+++ b/surfsense_backend/app/podcasts/voices/data/vertex.py
@ -0,0 +1,39 @@
+"""Vertex AI Studio voices: locale-specific, referenced by a mapping.
+
+Vertex voices are tied to a locale and named via a ``{languageCode, name}``
+mapping, which is exactly the ``native_ref`` the LiteLLM adapter forwards. The
+values mirror the legacy podcaster's English Studio voices.
+"""
+
+from __future__ import annotations
+
+from ..provider import TtsProvider
+from ..voice import CatalogVoice, VoiceGender
+
+
+def _voice(
+    key: str,
+    language: str,
+    locale: str,
+    name: str,
+    display: str,
+    gender: VoiceGender,
+) -> CatalogVoice:
+    return CatalogVoice(
+        voice_id=f"vertex_ai:{key}",
+        provider=TtsProvider.VERTEX_AI,
+        language=language,
+        display_name=display,
+        gender=gender,
+        native_ref={"languageCode": locale, "name": name},
+    )
+
+
+VERTEX_VOICES: tuple[CatalogVoice, ...] = (
+    _voice("en-US-Studio-O", "en-US", "en-US", "en-US-Studio-O", "Studio O (US)", VoiceGender.FEMALE),
+    _voice("en-US-Studio-M", "en-US", "en-US", "en-US-Studio-M", "Studio M (US)", VoiceGender.MALE),
+    _voice("en-GB-Studio-A", "en-GB", "en-UK", "en-UK-Studio-A", "Studio A (UK)", VoiceGender.FEMALE),
+    _voice("en-GB-Studio-B", "en-GB", "en-UK", "en-UK-Studio-B", "Studio B (UK)", VoiceGender.MALE),
+    _voice("en-AU-Studio-A", "en-AU", "en-AU", "en-AU-Studio-A", "Studio A (AU)", VoiceGender.FEMALE),
+    _voice("en-AU-Studio-B", "en-AU", "en-AU", "en-AU-Studio-B", "Studio B (AU)", VoiceGender.MALE),
+)