diff --git a/surfsense_backend/app/podcasts/api/routes.py b/surfsense_backend/app/podcasts/api/routes.py index 80e5e1c64..43a99f16e 100644 --- a/surfsense_backend/app/podcasts/api/routes.py +++ b/surfsense_backend/app/podcasts/api/routes.py @@ -47,6 +47,7 @@ from app.utils.rbac import check_permission from .schemas import ( CreatePodcastRequest, + LanguageOptions, PodcastDetail, PodcastSummary, UpdateSpecRequest, @@ -114,6 +115,20 @@ async def list_voices(language: str | None = None): ] +@router.get("/podcasts/languages", response_model=LanguageOptions) +async def list_languages(): + """Languages the active TTS provider can offer the brief editor.""" + if not app_config.TTS_SERVICE: + raise HTTPException(status_code=503, detail="No TTS provider configured") + + provider = provider_from_service(app_config.TTS_SERVICE) + offering = get_voice_catalog().offerable_languages(provider) + return LanguageOptions( + languages=offering.languages, + allows_custom=offering.allows_custom, + ) + + @router.get("/podcasts/voices/{voice_id}/preview") async def preview_voice( voice_id: str, diff --git a/surfsense_backend/app/podcasts/api/schemas.py b/surfsense_backend/app/podcasts/api/schemas.py index 7f1f8cc7c..c412e372f 100644 --- a/surfsense_backend/app/podcasts/api/schemas.py +++ b/surfsense_backend/app/podcasts/api/schemas.py @@ -51,6 +51,17 @@ class VoiceOption(BaseModel): gender: str +class LanguageOptions(BaseModel): + """The languages the brief editor may offer for the active provider. + + When ``allows_custom`` is true the list is a curated starting point and + the editor accepts any BCP-47 tag beyond it. + """ + + languages: list[str] + allows_custom: bool + + class PodcastSummary(BaseModel): """Lightweight list item.""" diff --git a/surfsense_backend/app/podcasts/voices/__init__.py b/surfsense_backend/app/podcasts/voices/__init__.py index ab1f8bbbf..97874a655 100644 --- a/surfsense_backend/app/podcasts/voices/__init__.py +++ b/surfsense_backend/app/podcasts/voices/__init__.py @@ -6,7 +6,7 @@ configured provider via :func:`provider_from_service`. from __future__ import annotations -from .catalog import VoiceCatalog, get_voice_catalog +from .catalog import LanguageOffering, VoiceCatalog, get_voice_catalog from .preview import render_voice_preview from .provider import TtsProvider, provider_from_service from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender @@ -14,6 +14,7 @@ from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender __all__ = [ "ANY_LANGUAGE", "CatalogVoice", + "LanguageOffering", "TtsProvider", "VoiceCatalog", "VoiceGender", diff --git a/surfsense_backend/app/podcasts/voices/catalog.py b/surfsense_backend/app/podcasts/voices/catalog.py index c36313a0c..6bf39510a 100644 --- a/surfsense_backend/app/podcasts/voices/catalog.py +++ b/surfsense_backend/app/podcasts/voices/catalog.py @@ -9,11 +9,26 @@ provider-native reference. from __future__ import annotations from collections.abc import Iterable +from dataclasses import dataclass from functools import lru_cache from .data import AZURE_VOICES, KOKORO_VOICES, OPENAI_VOICES, VERTEX_VOICES +from .data.languages import COMMON_LANGUAGES from .provider import TtsProvider -from .voice import CatalogVoice +from .voice import ANY_LANGUAGE, CatalogVoice + + +@dataclass(frozen=True, slots=True) +class LanguageOffering: + """The languages a provider's roster can offer the brief form. + + ``allows_custom`` is true when the roster has wildcard voices: the listed + languages are then a curated starting point, not a limit, and any BCP-47 + tag may be entered. + """ + + languages: list[str] + allows_custom: bool class VoiceCatalog: @@ -44,6 +59,20 @@ class VoiceCatalog: """Whether ``provider`` has at least one voice for ``language``.""" return any(v.speaks(language) for v in self.for_provider(provider)) + def offerable_languages(self, provider: TtsProvider) -> LanguageOffering: + """The languages ``provider`` can offer up front. + + Language-bound voices contribute their concrete tags; wildcard voices + cannot enumerate languages, so their presence merges in the curated + common list and opens free entry. + """ + voices = self.for_provider(provider) + tags = {v.language for v in voices if v.language != ANY_LANGUAGE} + has_wildcard = any(v.language == ANY_LANGUAGE for v in voices) + if has_wildcard: + tags.update(COMMON_LANGUAGES) + return LanguageOffering(languages=sorted(tags), allows_custom=has_wildcard) + @lru_cache(maxsize=1) def get_voice_catalog() -> VoiceCatalog: diff --git a/surfsense_backend/app/podcasts/voices/data/languages.py b/surfsense_backend/app/podcasts/voices/data/languages.py new file mode 100644 index 000000000..c00fd7f05 --- /dev/null +++ b/surfsense_backend/app/podcasts/voices/data/languages.py @@ -0,0 +1,33 @@ +"""Curated languages offered when a roster has wildcard (any-language) voices. + +OpenAI-style multilingual voices speak whatever language the text is in, so +there is no provider list to enumerate. This is the set the brief form offers +up front for such providers; it is an offering, not a limit — the API flags +``allows_custom`` so users can enter any BCP-47 tag beyond it. +""" + +from __future__ import annotations + +COMMON_LANGUAGES: tuple[str, ...] = ( + "ar", + "bn", + "de", + "en", + "es", + "fr", + "hi", + "id", + "it", + "ja", + "ko", + "nl", + "pl", + "pt", + "ru", + "sw", + "th", + "tr", + "uk", + "vi", + "zh", +) diff --git a/surfsense_backend/tests/integration/podcasts/test_voices.py b/surfsense_backend/tests/integration/podcasts/test_voices.py index 688ddad56..fd41bfd4e 100644 --- a/surfsense_backend/tests/integration/podcasts/test_voices.py +++ b/surfsense_backend/tests/integration/podcasts/test_voices.py @@ -29,3 +29,23 @@ async def test_voices_503_when_no_tts_configured(client, monkeypatch): resp = await client.get(f"{BASE}/voices") assert resp.status_code == 503 + + +async def test_languages_returns_the_active_providers_offering(client): + """The brief form renders exactly what the backend offers — for a wildcard + provider (openai/tts-1) that is the curated list plus free entry.""" + resp = await client.get(f"{BASE}/languages") + + assert resp.status_code == 200 + offering = resp.json() + assert "en" in offering["languages"] + assert "fr" in offering["languages"] + assert offering["allows_custom"] is True + + +async def test_languages_503_when_no_tts_configured(client, monkeypatch): + monkeypatch.setattr(app_config, "TTS_SERVICE", "") + + resp = await client.get(f"{BASE}/languages") + + assert resp.status_code == 503 diff --git a/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py b/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py index 861d8768c..d120d4bfc 100644 --- a/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py +++ b/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py @@ -75,6 +75,59 @@ def test_supports_language_reports_availability(): assert not catalog.supports_language(TtsProvider.KOKORO, "de") +def test_offerable_languages_for_a_concrete_roster_are_its_tags_only(): + """A provider whose voices are language-bound offers exactly those tags.""" + catalog = VoiceCatalog( + [ + _voice("k1", language="en-US"), + _voice("k2", language="fr"), + _voice("k3", language="fr"), + ] + ) + + offering = catalog.offerable_languages(TtsProvider.KOKORO) + + assert offering.languages == ["en-US", "fr"] + assert offering.allows_custom is False + + +def test_a_wildcard_roster_offers_the_curated_languages_and_custom_entry(): + """Voices that speak anything can't enumerate languages themselves, so the + catalog offers the curated common list and invites free entry.""" + catalog = VoiceCatalog( + [_voice("o1", provider=TtsProvider.OPENAI, language=ANY_LANGUAGE)] + ) + + offering = catalog.offerable_languages(TtsProvider.OPENAI) + + assert {"en", "fr", "sw", "hi", "zh"} <= set(offering.languages) + assert offering.allows_custom is True + + +def test_a_mixed_roster_offers_the_union_of_concrete_and_curated(): + catalog = VoiceCatalog( + [ + _voice("v1", provider=TtsProvider.VERTEX_AI, language="en-GB"), + _voice("v2", provider=TtsProvider.VERTEX_AI, language=ANY_LANGUAGE), + ] + ) + + offering = catalog.offerable_languages(TtsProvider.VERTEX_AI) + + assert "en-GB" in offering.languages + assert "fr" in offering.languages + assert offering.allows_custom is True + + +def test_a_provider_with_no_voices_offers_nothing(): + catalog = VoiceCatalog([_voice("k1")]) + + offering = catalog.offerable_languages(TtsProvider.OPENAI) + + assert offering.languages == [] + assert offering.allows_custom is False + + def test_get_raises_for_an_unknown_voice(): catalog = VoiceCatalog([_voice("k1")]) with pytest.raises(KeyError): diff --git a/surfsense_web/components/tool-ui/podcast/brief-review.tsx b/surfsense_web/components/tool-ui/podcast/brief-review.tsx index 3473b64d6..d662aebc2 100644 --- a/surfsense_web/components/tool-ui/podcast/brief-review.tsx +++ b/surfsense_web/components/tool-ui/podcast/brief-review.tsx @@ -1,11 +1,20 @@ "use client"; -import { Loader2, Plus, Trash2 } from "lucide-react"; +import { Check, ChevronDown, Loader2, Plus, Trash2 } from "lucide-react"; import { useEffect, useMemo, useState } from "react"; import { toast } from "sonner"; import { Button } from "@/components/ui/button"; +import { + Command, + CommandEmpty, + CommandGroup, + CommandInput, + CommandItem, + CommandList, +} from "@/components/ui/command"; import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; import { Select, SelectContent, @@ -15,6 +24,7 @@ import { } from "@/components/ui/select"; import { Textarea } from "@/components/ui/textarea"; import { + type LanguageOptions, MAX_SPEAKERS, type PodcastSpec, type PodcastStyle, @@ -56,6 +66,7 @@ interface BriefReviewProps { export function BriefReview({ podcast, spec }: BriefReviewProps) { const [draft, setDraft] = useState(spec); const [voices, setVoices] = useState(null); + const [offering, setOffering] = useState(null); const [isSubmitting, setIsSubmitting] = useState(false); // A pushed spec change (saved edit or concurrent editor) resets the form to @@ -75,19 +86,26 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) { .catch(() => { if (!cancelled) setVoices([]); }); + podcastsApiService + .listLanguages() + .then((options) => { + if (!cancelled) setOffering(options); + }) + .catch(() => { + if (!cancelled) setOffering({ languages: [], allows_custom: false }); + }); return () => { cancelled = true; }; }, []); + // The backend owns the offering; the draft's language stays listed even + // when it falls outside it (e.g. a custom tag entered earlier). const languages = useMemo(() => { - const tags = new Set(); - for (const voice of voices ?? []) { - if (voice.language !== ANY_LANGUAGE) tags.add(voice.language); - } + const tags = new Set(offering?.languages ?? []); tags.add(draft.language); return [...tags].sort(); - }, [voices, draft.language]); + }, [offering, draft.language]); const voicesForLanguage = useMemo( () => (voices ?? []).filter((voice) => speaks(voice, draft.language)), @@ -193,18 +211,22 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
- + {offering?.allows_custom ? ( + + ) : ( + + )}
@@ -375,6 +397,80 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) { ); } +/** A searchable language picker for providers whose voices speak anything: + * the offered list comes from the backend, and any BCP-47 tag may be typed + * when none of them fits. */ +function LanguageCombobox({ + value, + languages, + onSelect, +}: { + value: string; + languages: string[]; + onSelect: (language: string) => void; +}) { + const [open, setOpen] = useState(false); + const [query, setQuery] = useState(""); + + const pick = (tag: string) => { + onSelect(tag); + setOpen(false); + setQuery(""); + }; + + const customTag = query.trim(); + const isNewTag = + customTag.length > 0 && !languages.some((tag) => tag.toLowerCase() === customTag.toLowerCase()); + + return ( + + + + + + + + + No matching language. + + {languages.map((tag) => ( + pick(tag)} + > + + {languageLabel(tag)} + + ))} + {isNewTag ? ( + pick(customTag)}> + + Use “{customTag}” + + ) : null} + + + + + + ); +} + /** The current selection stays listed even when it no longer matches the * language filter, so the Select never renders an orphaned value. */ function voiceItems(candidates: VoiceOption[], selectedId: string): VoiceOption[] { diff --git a/surfsense_web/contracts/types/podcast.types.ts b/surfsense_web/contracts/types/podcast.types.ts index e6332d5b2..627cc6f58 100644 --- a/surfsense_web/contracts/types/podcast.types.ts +++ b/surfsense_web/contracts/types/podcast.types.ts @@ -103,6 +103,15 @@ export const voiceOption = z.object({ }); export type VoiceOption = z.infer; +// The languages the backend offers for the active TTS provider. When +// `allows_custom` is true the list is a starting point and any BCP-47 tag +// may be entered. +export const languageOptions = z.object({ + languages: z.array(z.string()), + allows_custom: z.boolean(), +}); +export type LanguageOptions = z.infer; + export const updateSpecRequest = z.object({ spec: podcastSpec, expected_version: z.number().int().min(1), diff --git a/surfsense_web/lib/apis/podcasts-api.service.ts b/surfsense_web/lib/apis/podcasts-api.service.ts index bd7bb784e..2e13d63cc 100644 --- a/surfsense_web/lib/apis/podcasts-api.service.ts +++ b/surfsense_web/lib/apis/podcasts-api.service.ts @@ -1,5 +1,6 @@ import { z } from "zod"; import { + languageOptions, type PodcastSpec, podcastDetail, updateSpecRequest, @@ -60,6 +61,12 @@ class PodcastsApiService { return baseApiService.get(`${BASE}/voices${qs}`, voiceOptionList); }; + // The languages the active provider can offer; the brief form renders + // exactly this list and only opens free entry when the backend allows it. + listLanguages = async () => { + return baseApiService.get(`${BASE}/languages`, languageOptions); + }; + // A short audio sample of a voice, cached server-side per voice. previewVoice = async (voiceId: string) => { return baseApiService.getBlob(`${BASE}/voices/${encodeURIComponent(voiceId)}/preview`);