Merge remote-tracking branch 'upstream/dev' into features/documents-injestion-layered-cached

2026-07-02 22:01:05 +02:00 · 2026-06-12 19:35:34 +02:00 · 2026-06-12 19:35:34 +02:00 · dcebfc4756
commit dcebfc4756
parent 311570b4f0 4c28ba5295
10 changed files with 294 additions and 20 deletions
--- a/surfsense_backend/app/podcasts/api/routes.py
+++ b/surfsense_backend/app/podcasts/api/routes.py
@ -47,6 +47,7 @@ from app.utils.rbac import check_permission

 from .schemas import (
    CreatePodcastRequest,
+    LanguageOptions,
    PodcastDetail,
    PodcastSummary,
    UpdateSpecRequest,
@ -114,6 +115,20 @@ async def list_voices(language: str | None = None):
    ]


+@router.get("/podcasts/languages", response_model=LanguageOptions)
+async def list_languages():
+    """Languages the active TTS provider can offer the brief editor."""
+    if not app_config.TTS_SERVICE:
+        raise HTTPException(status_code=503, detail="No TTS provider configured")
+
+    provider = provider_from_service(app_config.TTS_SERVICE)
+    offering = get_voice_catalog().offerable_languages(provider)
+    return LanguageOptions(
+        languages=offering.languages,
+        allows_custom=offering.allows_custom,
+    )
+
+
@router.get("/podcasts/voices/{voice_id}/preview")
 async def preview_voice(
    voice_id: str,
--- a/surfsense_backend/app/podcasts/api/schemas.py
+++ b/surfsense_backend/app/podcasts/api/schemas.py
@ -51,6 +51,17 @@ class VoiceOption(BaseModel):
    gender: str


+class LanguageOptions(BaseModel):
+    """The languages the brief editor may offer for the active provider.
+
+    When ``allows_custom`` is true the list is a curated starting point and
+    the editor accepts any BCP-47 tag beyond it.
+    """
+
+    languages: list[str]
+    allows_custom: bool
+
+
 class PodcastSummary(BaseModel):
    """Lightweight list item."""

--- a/surfsense_backend/app/podcasts/voices/init.py
+++ b/surfsense_backend/app/podcasts/voices/init.py
@ -6,7 +6,7 @@ configured provider via :func:`provider_from_service`.

 from __future__ import annotations

-from .catalog import VoiceCatalog, get_voice_catalog
+from .catalog import LanguageOffering, VoiceCatalog, get_voice_catalog
 from .preview import render_voice_preview
 from .provider import TtsProvider, provider_from_service
 from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
@ -14,6 +14,7 @@ from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
 __all__ = [
    "ANY_LANGUAGE",
    "CatalogVoice",
+    "LanguageOffering",
    "TtsProvider",
    "VoiceCatalog",
    "VoiceGender",
--- a/surfsense_backend/app/podcasts/voices/catalog.py
+++ b/surfsense_backend/app/podcasts/voices/catalog.py
@ -9,11 +9,26 @@ provider-native reference.
 from __future__ import annotations

 from collections.abc import Iterable
+from dataclasses import dataclass
 from functools import lru_cache

 from .data import AZURE_VOICES, KOKORO_VOICES, OPENAI_VOICES, VERTEX_VOICES
+from .data.languages import COMMON_LANGUAGES
 from .provider import TtsProvider
-from .voice import CatalogVoice
+from .voice import ANY_LANGUAGE, CatalogVoice
+
+
+@dataclass(frozen=True, slots=True)
+class LanguageOffering:
+    """The languages a provider's roster can offer the brief form.
+
+    ``allows_custom`` is true when the roster has wildcard voices: the listed
+    languages are then a curated starting point, not a limit, and any BCP-47
+    tag may be entered.
+    """
+
+    languages: list[str]
+    allows_custom: bool


 class VoiceCatalog:
@ -44,6 +59,20 @@ class VoiceCatalog:
        """Whether ``provider`` has at least one voice for ``language``."""
        return any(v.speaks(language) for v in self.for_provider(provider))

+    def offerable_languages(self, provider: TtsProvider) -> LanguageOffering:
+        """The languages ``provider`` can offer up front.
+
+        Language-bound voices contribute their concrete tags; wildcard voices
+        cannot enumerate languages, so their presence merges in the curated
+        common list and opens free entry.
+        """
+        voices = self.for_provider(provider)
+        tags = {v.language for v in voices if v.language != ANY_LANGUAGE}
+        has_wildcard = any(v.language == ANY_LANGUAGE for v in voices)
+        if has_wildcard:
+            tags.update(COMMON_LANGUAGES)
+        return LanguageOffering(languages=sorted(tags), allows_custom=has_wildcard)
+

@lru_cache(maxsize=1)
 def get_voice_catalog() -> VoiceCatalog:
--- a/surfsense_backend/app/podcasts/voices/data/languages.py
+++ b/surfsense_backend/app/podcasts/voices/data/languages.py
@ -0,0 +1,33 @@
+"""Curated languages offered when a roster has wildcard (any-language) voices.
+
+OpenAI-style multilingual voices speak whatever language the text is in, so
+there is no provider list to enumerate. This is the set the brief form offers
+up front for such providers; it is an offering, not a limit — the API flags
+``allows_custom`` so users can enter any BCP-47 tag beyond it.
+"""
+
+from __future__ import annotations
+
+COMMON_LANGUAGES: tuple[str, ...] = (
+    "ar",
+    "bn",
+    "de",
+    "en",
+    "es",
+    "fr",
+    "hi",
+    "id",
+    "it",
+    "ja",
+    "ko",
+    "nl",
+    "pl",
+    "pt",
+    "ru",
+    "sw",
+    "th",
+    "tr",
+    "uk",
+    "vi",
+    "zh",
+)
--- a/surfsense_backend/tests/integration/podcasts/test_voices.py
+++ b/surfsense_backend/tests/integration/podcasts/test_voices.py
@ -29,3 +29,23 @@ async def test_voices_503_when_no_tts_configured(client, monkeypatch):
    resp = await client.get(f"{BASE}/voices")

    assert resp.status_code == 503
+
+
+async def test_languages_returns_the_active_providers_offering(client):
+    """The brief form renders exactly what the backend offers — for a wildcard
+    provider (openai/tts-1) that is the curated list plus free entry."""
+    resp = await client.get(f"{BASE}/languages")
+
+    assert resp.status_code == 200
+    offering = resp.json()
+    assert "en" in offering["languages"]
+    assert "fr" in offering["languages"]
+    assert offering["allows_custom"] is True
+
+
+async def test_languages_503_when_no_tts_configured(client, monkeypatch):
+    monkeypatch.setattr(app_config, "TTS_SERVICE", "")
+
+    resp = await client.get(f"{BASE}/languages")
+
+    assert resp.status_code == 503
--- a/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py
+++ b/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py
@ -75,6 +75,59 @@ def test_supports_language_reports_availability():
    assert not catalog.supports_language(TtsProvider.KOKORO, "de")


+def test_offerable_languages_for_a_concrete_roster_are_its_tags_only():
+    """A provider whose voices are language-bound offers exactly those tags."""
+    catalog = VoiceCatalog(
+        [
+            _voice("k1", language="en-US"),
+            _voice("k2", language="fr"),
+            _voice("k3", language="fr"),
+        ]
+    )
+
+    offering = catalog.offerable_languages(TtsProvider.KOKORO)
+
+    assert offering.languages == ["en-US", "fr"]
+    assert offering.allows_custom is False
+
+
+def test_a_wildcard_roster_offers_the_curated_languages_and_custom_entry():
+    """Voices that speak anything can't enumerate languages themselves, so the
+    catalog offers the curated common list and invites free entry."""
+    catalog = VoiceCatalog(
+        [_voice("o1", provider=TtsProvider.OPENAI, language=ANY_LANGUAGE)]
+    )
+
+    offering = catalog.offerable_languages(TtsProvider.OPENAI)
+
+    assert {"en", "fr", "sw", "hi", "zh"} <= set(offering.languages)
+    assert offering.allows_custom is True
+
+
+def test_a_mixed_roster_offers_the_union_of_concrete_and_curated():
+    catalog = VoiceCatalog(
+        [
+            _voice("v1", provider=TtsProvider.VERTEX_AI, language="en-GB"),
+            _voice("v2", provider=TtsProvider.VERTEX_AI, language=ANY_LANGUAGE),
+        ]
+    )
+
+    offering = catalog.offerable_languages(TtsProvider.VERTEX_AI)
+
+    assert "en-GB" in offering.languages
+    assert "fr" in offering.languages
+    assert offering.allows_custom is True
+
+
+def test_a_provider_with_no_voices_offers_nothing():
+    catalog = VoiceCatalog([_voice("k1")])
+
+    offering = catalog.offerable_languages(TtsProvider.OPENAI)
+
+    assert offering.languages == []
+    assert offering.allows_custom is False
+
+
 def test_get_raises_for_an_unknown_voice():
    catalog = VoiceCatalog([_voice("k1")])
    with pytest.raises(KeyError):