From 7b30a768563a7170c98730c9d1620b33a23146c4 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Fri, 12 Jun 2026 00:06:37 +0200 Subject: [PATCH] fix(gitignore): anchor data/ rule; track podcast voice catalogs --- surfsense_backend/.gitignore | 2 +- .../app/podcasts/voices/data/__init__.py | 10 +++ .../app/podcasts/voices/data/azure.py | 32 ++++++++++ .../app/podcasts/voices/data/kokoro.py | 63 +++++++++++++++++++ .../app/podcasts/voices/data/openai.py | 32 ++++++++++ .../app/podcasts/voices/data/vertex.py | 39 ++++++++++++ 6 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 surfsense_backend/app/podcasts/voices/data/__init__.py create mode 100644 surfsense_backend/app/podcasts/voices/data/azure.py create mode 100644 surfsense_backend/app/podcasts/voices/data/kokoro.py create mode 100644 surfsense_backend/app/podcasts/voices/data/openai.py create mode 100644 surfsense_backend/app/podcasts/voices/data/vertex.py diff --git a/surfsense_backend/.gitignore b/surfsense_backend/.gitignore index bd233e459..bda5961fe 100644 --- a/surfsense_backend/.gitignore +++ b/surfsense_backend/.gitignore @@ -1,7 +1,7 @@ .env .venv venv/ -data/ +/data/ .local_object_store/ __pycache__/ .flashrank_cache diff --git a/surfsense_backend/app/podcasts/voices/data/__init__.py b/surfsense_backend/app/podcasts/voices/data/__init__.py new file mode 100644 index 000000000..5316f10f6 --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/data/__init__.py @@ -0,0 +1,10 @@ +"""Static per-provider voice rosters that compose the catalog.""" + +from __future__ import annotations + +from .azure import AZURE_VOICES +from .kokoro import KOKORO_VOICES +from .openai import OPENAI_VOICES +from .vertex import VERTEX_VOICES + +__all__ = ["AZURE_VOICES", "KOKORO_VOICES", "OPENAI_VOICES", "VERTEX_VOICES"] diff --git a/surfsense_backend/app/podcasts/voices/data/azure.py b/surfsense_backend/app/podcasts/voices/data/azure.py new file mode 100644 index 000000000..104ab766d --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/data/azure.py @@ -0,0 +1,32 @@ +"""Azure TTS voices, routed through the OpenAI-compatible voice names. + +The deployment fronts Azure with OpenAI-style voice names (matching the legacy +podcaster), so these mirror the OpenAI roster and, like it, speak any requested +language. +""" + +from __future__ import annotations + +from ..provider import TtsProvider +from ..voice import ANY_LANGUAGE, CatalogVoice, VoiceGender + + +def _voice(name: str, display: str, gender: VoiceGender) -> CatalogVoice: + return CatalogVoice( + voice_id=f"azure:{name}", + provider=TtsProvider.AZURE, + language=ANY_LANGUAGE, + display_name=display, + gender=gender, + native_ref=name, + ) + + +AZURE_VOICES: tuple[CatalogVoice, ...] = ( + _voice("alloy", "Alloy", VoiceGender.NEUTRAL), + _voice("echo", "Echo", VoiceGender.MALE), + _voice("fable", "Fable", VoiceGender.NEUTRAL), + _voice("onyx", "Onyx", VoiceGender.MALE), + _voice("nova", "Nova", VoiceGender.FEMALE), + _voice("shimmer", "Shimmer", VoiceGender.FEMALE), +) diff --git a/surfsense_backend/app/podcasts/voices/data/kokoro.py b/surfsense_backend/app/podcasts/voices/data/kokoro.py new file mode 100644 index 000000000..732dced23 --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/data/kokoro.py @@ -0,0 +1,63 @@ +"""Curated Kokoro voices, the local provider's multilingual roster. + +Kokoro voice names encode language and gender in their first two letters +(``a``=American English, ``b``=British, ``e``=Spanish, ``f``=French, +``h``=Hindi, ``i``=Italian, ``j``=Japanese, ``p``=Brazilian Portuguese, +``z``=Mandarin; second letter ``f``/``m`` = female/male). We carry at least one +male and one female voice per language so a two-speaker brief always has a +distinct pair. ``native_ref`` is the bare voice name Kokoro expects. + +Reference: https://huggingface.co/hexgrad/Kokoro-82M/tree/main/voices +""" + +from __future__ import annotations + +from ..provider import TtsProvider +from ..voice import CatalogVoice, VoiceGender + + +def _voice(name: str, language: str, display: str, gender: VoiceGender) -> CatalogVoice: + return CatalogVoice( + voice_id=f"kokoro:{name}", + provider=TtsProvider.KOKORO, + language=language, + display_name=display, + gender=gender, + native_ref=name, + ) + + +KOKORO_VOICES: tuple[CatalogVoice, ...] = ( + # American English + _voice("am_adam", "en-US", "Adam (US)", VoiceGender.MALE), + _voice("am_michael", "en-US", "Michael (US)", VoiceGender.MALE), + _voice("af_bella", "en-US", "Bella (US)", VoiceGender.FEMALE), + _voice("af_heart", "en-US", "Heart (US)", VoiceGender.FEMALE), + _voice("af_nicole", "en-US", "Nicole (US)", VoiceGender.FEMALE), + _voice("af_sarah", "en-US", "Sarah (US)", VoiceGender.FEMALE), + # British English + _voice("bm_george", "en-GB", "George (UK)", VoiceGender.MALE), + _voice("bm_lewis", "en-GB", "Lewis (UK)", VoiceGender.MALE), + _voice("bf_emma", "en-GB", "Emma (UK)", VoiceGender.FEMALE), + _voice("bf_isabella", "en-GB", "Isabella (UK)", VoiceGender.FEMALE), + # Spanish + _voice("em_alex", "es", "Alex (ES)", VoiceGender.MALE), + _voice("ef_dora", "es", "Dora (ES)", VoiceGender.FEMALE), + # French + _voice("ff_siwis", "fr", "Siwis (FR)", VoiceGender.FEMALE), + # Hindi + _voice("hm_omega", "hi", "Omega (HI)", VoiceGender.MALE), + _voice("hf_alpha", "hi", "Alpha (HI)", VoiceGender.FEMALE), + # Italian + _voice("im_nicola", "it", "Nicola (IT)", VoiceGender.MALE), + _voice("if_sara", "it", "Sara (IT)", VoiceGender.FEMALE), + # Japanese + _voice("jm_kumo", "ja", "Kumo (JA)", VoiceGender.MALE), + _voice("jf_alpha", "ja", "Alpha (JA)", VoiceGender.FEMALE), + # Brazilian Portuguese + _voice("pm_alex", "pt-BR", "Alex (BR)", VoiceGender.MALE), + _voice("pf_dora", "pt-BR", "Dora (BR)", VoiceGender.FEMALE), + # Mandarin Chinese + _voice("zm_yunxi", "zh", "Yunxi (ZH)", VoiceGender.MALE), + _voice("zf_xiaoxiao", "zh", "Xiaoxiao (ZH)", VoiceGender.FEMALE), +) diff --git a/surfsense_backend/app/podcasts/voices/data/openai.py b/surfsense_backend/app/podcasts/voices/data/openai.py new file mode 100644 index 000000000..ce5c480c5 --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/data/openai.py @@ -0,0 +1,32 @@ +"""OpenAI TTS voices: language-agnostic, so each speaks any requested language. + +OpenAI voices follow the language of the input text rather than being tied to a +locale, so they are tagged :data:`ANY_LANGUAGE` and match every brief. The +``native_ref`` is the plain voice name the API expects. +""" + +from __future__ import annotations + +from ..provider import TtsProvider +from ..voice import ANY_LANGUAGE, CatalogVoice, VoiceGender + + +def _voice(name: str, display: str, gender: VoiceGender) -> CatalogVoice: + return CatalogVoice( + voice_id=f"openai:{name}", + provider=TtsProvider.OPENAI, + language=ANY_LANGUAGE, + display_name=display, + gender=gender, + native_ref=name, + ) + + +OPENAI_VOICES: tuple[CatalogVoice, ...] = ( + _voice("alloy", "Alloy", VoiceGender.NEUTRAL), + _voice("echo", "Echo", VoiceGender.MALE), + _voice("fable", "Fable", VoiceGender.NEUTRAL), + _voice("onyx", "Onyx", VoiceGender.MALE), + _voice("nova", "Nova", VoiceGender.FEMALE), + _voice("shimmer", "Shimmer", VoiceGender.FEMALE), +) diff --git a/surfsense_backend/app/podcasts/voices/data/vertex.py b/surfsense_backend/app/podcasts/voices/data/vertex.py new file mode 100644 index 000000000..8452a00ff --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/data/vertex.py @@ -0,0 +1,39 @@ +"""Vertex AI Studio voices: locale-specific, referenced by a mapping. + +Vertex voices are tied to a locale and named via a ``{languageCode, name}`` +mapping, which is exactly the ``native_ref`` the LiteLLM adapter forwards. The +values mirror the legacy podcaster's English Studio voices. +""" + +from __future__ import annotations + +from ..provider import TtsProvider +from ..voice import CatalogVoice, VoiceGender + + +def _voice( + key: str, + language: str, + locale: str, + name: str, + display: str, + gender: VoiceGender, +) -> CatalogVoice: + return CatalogVoice( + voice_id=f"vertex_ai:{key}", + provider=TtsProvider.VERTEX_AI, + language=language, + display_name=display, + gender=gender, + native_ref={"languageCode": locale, "name": name}, + ) + + +VERTEX_VOICES: tuple[CatalogVoice, ...] = ( + _voice("en-US-Studio-O", "en-US", "en-US", "en-US-Studio-O", "Studio O (US)", VoiceGender.FEMALE), + _voice("en-US-Studio-M", "en-US", "en-US", "en-US-Studio-M", "Studio M (US)", VoiceGender.MALE), + _voice("en-GB-Studio-A", "en-GB", "en-UK", "en-UK-Studio-A", "Studio A (UK)", VoiceGender.FEMALE), + _voice("en-GB-Studio-B", "en-GB", "en-UK", "en-UK-Studio-B", "Studio B (UK)", VoiceGender.MALE), + _voice("en-AU-Studio-A", "en-AU", "en-AU", "en-AU-Studio-A", "Studio A (AU)", VoiceGender.FEMALE), + _voice("en-AU-Studio-B", "en-AU", "en-AU", "en-AU-Studio-B", "Studio B (AU)", VoiceGender.MALE), +)