Merge pull request #1487 from CREDO23/improvement-podcast-graph

[Feat] Podcast: Backend-owned language offering for the brief form
2026-06-12 20:45:20 +02:00 · 2026-06-12 00:58:02 -07:00 · 2026-06-12 00:58:02 -07:00 · 4c28ba5295
commit 4c28ba5295
parent c855be8ccd 8dd174d304
11 changed files with 295 additions and 22 deletions
--- a/surfsense_backend/app/podcasts/api/routes.py
+++ b/surfsense_backend/app/podcasts/api/routes.py
@ -47,6 +47,7 @@ from app.utils.rbac import check_permission

 from .schemas import (
    CreatePodcastRequest,
+    LanguageOptions,
    PodcastDetail,
    PodcastSummary,
    UpdateSpecRequest,
@ -114,6 +115,20 @@ async def list_voices(language: str | None = None):
    ]


+@router.get("/podcasts/languages", response_model=LanguageOptions)
+async def list_languages():
+    """Languages the active TTS provider can offer the brief editor."""
+    if not app_config.TTS_SERVICE:
+        raise HTTPException(status_code=503, detail="No TTS provider configured")
+
+    provider = provider_from_service(app_config.TTS_SERVICE)
+    offering = get_voice_catalog().offerable_languages(provider)
+    return LanguageOptions(
+        languages=offering.languages,
+        allows_custom=offering.allows_custom,
+    )
+
+
@router.get("/podcasts/voices/{voice_id}/preview")
 async def preview_voice(
    voice_id: str,
--- a/surfsense_backend/app/podcasts/api/schemas.py
+++ b/surfsense_backend/app/podcasts/api/schemas.py
@ -51,6 +51,17 @@ class VoiceOption(BaseModel):
    gender: str


+class LanguageOptions(BaseModel):
+    """The languages the brief editor may offer for the active provider.
+
+    When ``allows_custom`` is true the list is a curated starting point and
+    the editor accepts any BCP-47 tag beyond it.
+    """
+
+    languages: list[str]
+    allows_custom: bool
+
+
 class PodcastSummary(BaseModel):
    """Lightweight list item."""

--- a/surfsense_backend/app/podcasts/voices/init.py
+++ b/surfsense_backend/app/podcasts/voices/init.py
@ -6,7 +6,7 @@ configured provider via :func:`provider_from_service`.

 from __future__ import annotations

-from .catalog import VoiceCatalog, get_voice_catalog
+from .catalog import LanguageOffering, VoiceCatalog, get_voice_catalog
 from .preview import render_voice_preview
 from .provider import TtsProvider, provider_from_service
 from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
@ -14,6 +14,7 @@ from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
 __all__ = [
    "ANY_LANGUAGE",
    "CatalogVoice",
+    "LanguageOffering",
    "TtsProvider",
    "VoiceCatalog",
    "VoiceGender",
--- a/surfsense_backend/app/podcasts/voices/catalog.py
+++ b/surfsense_backend/app/podcasts/voices/catalog.py
@ -9,11 +9,26 @@ provider-native reference.
 from __future__ import annotations

 from collections.abc import Iterable
+from dataclasses import dataclass
 from functools import lru_cache

 from .data import AZURE_VOICES, KOKORO_VOICES, OPENAI_VOICES, VERTEX_VOICES
+from .data.languages import COMMON_LANGUAGES
 from .provider import TtsProvider
-from .voice import CatalogVoice
+from .voice import ANY_LANGUAGE, CatalogVoice
+
+
+@dataclass(frozen=True, slots=True)
+class LanguageOffering:
+    """The languages a provider's roster can offer the brief form.
+
+    ``allows_custom`` is true when the roster has wildcard voices: the listed
+    languages are then a curated starting point, not a limit, and any BCP-47
+    tag may be entered.
+    """
+
+    languages: list[str]
+    allows_custom: bool


 class VoiceCatalog:
@ -44,6 +59,20 @@ class VoiceCatalog:
        """Whether ``provider`` has at least one voice for ``language``."""
        return any(v.speaks(language) for v in self.for_provider(provider))

+    def offerable_languages(self, provider: TtsProvider) -> LanguageOffering:
+        """The languages ``provider`` can offer up front.
+
+        Language-bound voices contribute their concrete tags; wildcard voices
+        cannot enumerate languages, so their presence merges in the curated
+        common list and opens free entry.
+        """
+        voices = self.for_provider(provider)
+        tags = {v.language for v in voices if v.language != ANY_LANGUAGE}
+        has_wildcard = any(v.language == ANY_LANGUAGE for v in voices)
+        if has_wildcard:
+            tags.update(COMMON_LANGUAGES)
+        return LanguageOffering(languages=sorted(tags), allows_custom=has_wildcard)
+

@lru_cache(maxsize=1)
 def get_voice_catalog() -> VoiceCatalog:
--- a/surfsense_backend/app/podcasts/voices/data/languages.py
+++ b/surfsense_backend/app/podcasts/voices/data/languages.py
@ -0,0 +1,33 @@
+"""Curated languages offered when a roster has wildcard (any-language) voices.
+
+OpenAI-style multilingual voices speak whatever language the text is in, so
+there is no provider list to enumerate. This is the set the brief form offers
+up front for such providers; it is an offering, not a limit — the API flags
+``allows_custom`` so users can enter any BCP-47 tag beyond it.
+"""
+
+from __future__ import annotations
+
+COMMON_LANGUAGES: tuple[str, ...] = (
+    "ar",
+    "bn",
+    "de",
+    "en",
+    "es",
+    "fr",
+    "hi",
+    "id",
+    "it",
+    "ja",
+    "ko",
+    "nl",
+    "pl",
+    "pt",
+    "ru",
+    "sw",
+    "th",
+    "tr",
+    "uk",
+    "vi",
+    "zh",
+)
--- a/surfsense_backend/tests/integration/podcasts/test_draft_task.py
+++ b/surfsense_backend/tests/integration/podcasts/test_draft_task.py
@ -76,8 +76,7 @@ async def test_quota_denial_fails_the_podcast_without_a_transcript(
    async def _deny(**_kwargs):
        raise QuotaInsufficientError(
            usage_type="podcast_generation",
-            used_micros=5_000_000,
-            limit_micros=5_000_000,
+            balance_micros=0,
            remaining_micros=0,
        )
        yield  # pragma: no cover - unreachable, satisfies the CM protocol
--- a/surfsense_backend/tests/integration/podcasts/test_voices.py
+++ b/surfsense_backend/tests/integration/podcasts/test_voices.py
@ -29,3 +29,23 @@ async def test_voices_503_when_no_tts_configured(client, monkeypatch):
    resp = await client.get(f"{BASE}/voices")

    assert resp.status_code == 503
+
+
+async def test_languages_returns_the_active_providers_offering(client):
+    """The brief form renders exactly what the backend offers — for a wildcard
+    provider (openai/tts-1) that is the curated list plus free entry."""
+    resp = await client.get(f"{BASE}/languages")
+
+    assert resp.status_code == 200
+    offering = resp.json()
+    assert "en" in offering["languages"]
+    assert "fr" in offering["languages"]
+    assert offering["allows_custom"] is True
+
+
+async def test_languages_503_when_no_tts_configured(client, monkeypatch):
+    monkeypatch.setattr(app_config, "TTS_SERVICE", "")
+
+    resp = await client.get(f"{BASE}/languages")
+
+    assert resp.status_code == 503
--- a/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py
+++ b/surfsense_backend/tests/unit/podcasts/test_voice_catalog.py
@ -75,6 +75,59 @@ def test_supports_language_reports_availability():
    assert not catalog.supports_language(TtsProvider.KOKORO, "de")


+def test_offerable_languages_for_a_concrete_roster_are_its_tags_only():
+    """A provider whose voices are language-bound offers exactly those tags."""
+    catalog = VoiceCatalog(
+        [
+            _voice("k1", language="en-US"),
+            _voice("k2", language="fr"),
+            _voice("k3", language="fr"),
+        ]
+    )
+
+    offering = catalog.offerable_languages(TtsProvider.KOKORO)
+
+    assert offering.languages == ["en-US", "fr"]
+    assert offering.allows_custom is False
+
+
+def test_a_wildcard_roster_offers_the_curated_languages_and_custom_entry():
+    """Voices that speak anything can't enumerate languages themselves, so the
+    catalog offers the curated common list and invites free entry."""
+    catalog = VoiceCatalog(
+        [_voice("o1", provider=TtsProvider.OPENAI, language=ANY_LANGUAGE)]
+    )
+
+    offering = catalog.offerable_languages(TtsProvider.OPENAI)
+
+    assert {"en", "fr", "sw", "hi", "zh"} <= set(offering.languages)
+    assert offering.allows_custom is True
+
+
+def test_a_mixed_roster_offers_the_union_of_concrete_and_curated():
+    catalog = VoiceCatalog(
+        [
+            _voice("v1", provider=TtsProvider.VERTEX_AI, language="en-GB"),
+            _voice("v2", provider=TtsProvider.VERTEX_AI, language=ANY_LANGUAGE),
+        ]
+    )
+
+    offering = catalog.offerable_languages(TtsProvider.VERTEX_AI)
+
+    assert "en-GB" in offering.languages
+    assert "fr" in offering.languages
+    assert offering.allows_custom is True
+
+
+def test_a_provider_with_no_voices_offers_nothing():
+    catalog = VoiceCatalog([_voice("k1")])
+
+    offering = catalog.offerable_languages(TtsProvider.OPENAI)
+
+    assert offering.languages == []
+    assert offering.allows_custom is False
+
+
 def test_get_raises_for_an_unknown_voice():
    catalog = VoiceCatalog([_voice("k1")])
    with pytest.raises(KeyError):
--- a/surfsense_web/components/tool-ui/podcast/brief-review.tsx
+++ b/surfsense_web/components/tool-ui/podcast/brief-review.tsx
@ -1,11 +1,20 @@
 "use client";

-import { Loader2, Plus, Trash2 } from "lucide-react";
+import { Check, ChevronDown, Loader2, Plus, Trash2 } from "lucide-react";
 import { useEffect, useMemo, useState } from "react";
 import { toast } from "sonner";
 import { Button } from "@/components/ui/button";
+import {
+	Command,
+	CommandEmpty,
+	CommandGroup,
+	CommandInput,
+	CommandItem,
+	CommandList,
+} from "@/components/ui/command";
 import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
+import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
 import {
 	Select,
 	SelectContent,
@ -15,6 +24,7 @@ import {
 } from "@/components/ui/select";
 import { Textarea } from "@/components/ui/textarea";
 import {
+	type LanguageOptions,
 	MAX_SPEAKERS,
 	type PodcastSpec,
 	type PodcastStyle,
@ -56,6 +66,7 @@ interface BriefReviewProps {
 export function BriefReview({ podcast, spec }: BriefReviewProps) {
 	const [draft, setDraft] = useState<PodcastSpec>(spec);
 	const [voices, setVoices] = useState<VoiceOption[] | null>(null);
+	const [offering, setOffering] = useState<LanguageOptions | null>(null);
 	const [isSubmitting, setIsSubmitting] = useState(false);

 	// A pushed spec change (saved edit or concurrent editor) resets the form to
@ -75,19 +86,26 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
 			.catch(() => {
 				if (!cancelled) setVoices([]);
 			});
+		podcastsApiService
+			.listLanguages()
+			.then((options) => {
+				if (!cancelled) setOffering(options);
+			})
+			.catch(() => {
+				if (!cancelled) setOffering({ languages: [], allows_custom: false });
+			});
 		return () => {
 			cancelled = true;
 		};
 	}, []);

+	// The backend owns the offering; the draft's language stays listed even
+	// when it falls outside it (e.g. a custom tag entered earlier).
 	const languages = useMemo(() => {
-		const tags = new Set<string>();
-		for (const voice of voices ?? []) {
-			if (voice.language !== ANY_LANGUAGE) tags.add(voice.language);
-		}
+		const tags = new Set(offering?.languages ?? []);
 		tags.add(draft.language);
 		return [...tags].sort();
-	}, [voices, draft.language]);
+	}, [offering, draft.language]);

 	const voicesForLanguage = useMemo(
 		() => (voices ?? []).filter((voice) => speaks(voice, draft.language)),
@ -193,18 +211,22 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
 			<div className="grid grid-cols-2 gap-4">
 				<div className="flex flex-col gap-2">
 					<Label htmlFor="podcast-language">Language</Label>
-					<Select value={draft.language} onValueChange={setLanguage}>
-						<SelectTrigger id="podcast-language">
-							<SelectValue placeholder="Language" />
-						</SelectTrigger>
-						<SelectContent>
-							{languages.map((tag) => (
-								<SelectItem key={tag} value={tag}>
-									{languageLabel(tag)}
-								</SelectItem>
-							))}
-						</SelectContent>
-					</Select>
+					{offering?.allows_custom ? (
+						<LanguageCombobox value={draft.language} languages={languages} onSelect={setLanguage} />
+					) : (
+						<Select value={draft.language} onValueChange={setLanguage}>
+							<SelectTrigger id="podcast-language">
+								<SelectValue placeholder="Language" />
+							</SelectTrigger>
+							<SelectContent>
+								{languages.map((tag) => (
+									<SelectItem key={tag} value={tag}>
+										{languageLabel(tag)}
+									</SelectItem>
+								))}
+							</SelectContent>
+						</Select>
+					)}
 				</div>
 				<div className="flex flex-col gap-2">
 					<Label htmlFor="podcast-style">Style</Label>
@ -375,6 +397,80 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
 	);
 }

+/** A searchable language picker for providers whose voices speak anything:
+ * the offered list comes from the backend, and any BCP-47 tag may be typed
+ * when none of them fits. */
+function LanguageCombobox({
+	value,
+	languages,
+	onSelect,
+}: {
+	value: string;
+	languages: string[];
+	onSelect: (language: string) => void;
+}) {
+	const [open, setOpen] = useState(false);
+	const [query, setQuery] = useState("");
+
+	const pick = (tag: string) => {
+		onSelect(tag);
+		setOpen(false);
+		setQuery("");
+	};
+
+	const customTag = query.trim();
+	const isNewTag =
+		customTag.length > 0 && !languages.some((tag) => tag.toLowerCase() === customTag.toLowerCase());
+
+	return (
+		<Popover open={open} onOpenChange={setOpen}>
+			<PopoverTrigger asChild>
+				<button
+					type="button"
+					role="combobox"
+					aria-expanded={open}
+					id="podcast-language"
+					className="border-popover-border flex h-9 w-full items-center justify-between gap-2 rounded-md border bg-transparent px-3 py-2 text-sm whitespace-nowrap shadow-xs outline-none transition-[color,box-shadow] disabled:cursor-not-allowed disabled:opacity-50"
+				>
+					<span className="line-clamp-1 text-left">{languageLabel(value)}</span>
+					<ChevronDown className="size-4 shrink-0 opacity-50" />
+				</button>
+			</PopoverTrigger>
+			<PopoverContent className="w-[var(--radix-popover-trigger-width)] p-0" align="start">
+				<Command>
+					<CommandInput
+						placeholder="Search or type a language tag…"
+						value={query}
+						onValueChange={setQuery}
+					/>
+					<CommandList>
+						<CommandEmpty>No matching language.</CommandEmpty>
+						<CommandGroup>
+							{languages.map((tag) => (
+								<CommandItem
+									key={tag}
+									value={tag}
+									keywords={[languageLabel(tag)]}
+									onSelect={() => pick(tag)}
+								>
+									<Check className={tag === value ? "size-4" : "size-4 opacity-0"} />
+									{languageLabel(tag)}
+								</CommandItem>
+							))}
+							{isNewTag ? (
+								<CommandItem value={customTag} onSelect={() => pick(customTag)}>
+									<Plus className="size-4" />
+									Use “{customTag}”
+								</CommandItem>
+							) : null}
+						</CommandGroup>
+					</CommandList>
+				</Command>
+			</PopoverContent>
+		</Popover>
+	);
+}
+
 /** The current selection stays listed even when it no longer matches the
 * language filter, so the Select never renders an orphaned value. */
 function voiceItems(candidates: VoiceOption[], selectedId: string): VoiceOption[] {
--- a/surfsense_web/contracts/types/podcast.types.ts
+++ b/surfsense_web/contracts/types/podcast.types.ts
@ -103,6 +103,15 @@ export const voiceOption = z.object({
 });
 export type VoiceOption = z.infer<typeof voiceOption>;

+// The languages the backend offers for the active TTS provider. When
+// `allows_custom` is true the list is a starting point and any BCP-47 tag
+// may be entered.
+export const languageOptions = z.object({
+	languages: z.array(z.string()),
+	allows_custom: z.boolean(),
+});
+export type LanguageOptions = z.infer<typeof languageOptions>;
+
 export const updateSpecRequest = z.object({
 	spec: podcastSpec,
 	expected_version: z.number().int().min(1),
--- a/surfsense_web/lib/apis/podcasts-api.service.ts
+++ b/surfsense_web/lib/apis/podcasts-api.service.ts
@ -1,5 +1,6 @@
 import { z } from "zod";
 import {
+	languageOptions,
 	type PodcastSpec,
 	podcastDetail,
 	updateSpecRequest,
@ -60,6 +61,12 @@ class PodcastsApiService {
 		return baseApiService.get(`${BASE}/voices${qs}`, voiceOptionList);
 	};

+	// The languages the active provider can offer; the brief form renders
+	// exactly this list and only opens free entry when the backend allows it.
+	listLanguages = async () => {
+		return baseApiService.get(`${BASE}/languages`, languageOptions);
+	};
+
 	// A short audio sample of a voice, cached server-side per voice.
 	previewVoice = async (voiceId: string) => {
 		return baseApiService.getBlob(`${BASE}/voices/${encodeURIComponent(voiceId)}/preview`);