Merge pull request #1487 from CREDO23/improvement-podcast-graph

[Feat] Podcast: Backend-owned language offering for the brief form
This commit is contained in:
Rohan Verma 2026-06-12 00:58:02 -07:00 committed by GitHub
commit 4c28ba5295
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 295 additions and 22 deletions

View file

@ -47,6 +47,7 @@ from app.utils.rbac import check_permission
from .schemas import (
CreatePodcastRequest,
LanguageOptions,
PodcastDetail,
PodcastSummary,
UpdateSpecRequest,
@ -114,6 +115,20 @@ async def list_voices(language: str | None = None):
]
@router.get("/podcasts/languages", response_model=LanguageOptions)
async def list_languages():
"""Languages the active TTS provider can offer the brief editor."""
if not app_config.TTS_SERVICE:
raise HTTPException(status_code=503, detail="No TTS provider configured")
provider = provider_from_service(app_config.TTS_SERVICE)
offering = get_voice_catalog().offerable_languages(provider)
return LanguageOptions(
languages=offering.languages,
allows_custom=offering.allows_custom,
)
@router.get("/podcasts/voices/{voice_id}/preview")
async def preview_voice(
voice_id: str,

View file

@ -51,6 +51,17 @@ class VoiceOption(BaseModel):
gender: str
class LanguageOptions(BaseModel):
"""The languages the brief editor may offer for the active provider.
When ``allows_custom`` is true the list is a curated starting point and
the editor accepts any BCP-47 tag beyond it.
"""
languages: list[str]
allows_custom: bool
class PodcastSummary(BaseModel):
"""Lightweight list item."""

View file

@ -6,7 +6,7 @@ configured provider via :func:`provider_from_service`.
from __future__ import annotations
from .catalog import VoiceCatalog, get_voice_catalog
from .catalog import LanguageOffering, VoiceCatalog, get_voice_catalog
from .preview import render_voice_preview
from .provider import TtsProvider, provider_from_service
from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
@ -14,6 +14,7 @@ from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
__all__ = [
"ANY_LANGUAGE",
"CatalogVoice",
"LanguageOffering",
"TtsProvider",
"VoiceCatalog",
"VoiceGender",

View file

@ -9,11 +9,26 @@ provider-native reference.
from __future__ import annotations
from collections.abc import Iterable
from dataclasses import dataclass
from functools import lru_cache
from .data import AZURE_VOICES, KOKORO_VOICES, OPENAI_VOICES, VERTEX_VOICES
from .data.languages import COMMON_LANGUAGES
from .provider import TtsProvider
from .voice import CatalogVoice
from .voice import ANY_LANGUAGE, CatalogVoice
@dataclass(frozen=True, slots=True)
class LanguageOffering:
"""The languages a provider's roster can offer the brief form.
``allows_custom`` is true when the roster has wildcard voices: the listed
languages are then a curated starting point, not a limit, and any BCP-47
tag may be entered.
"""
languages: list[str]
allows_custom: bool
class VoiceCatalog:
@ -44,6 +59,20 @@ class VoiceCatalog:
"""Whether ``provider`` has at least one voice for ``language``."""
return any(v.speaks(language) for v in self.for_provider(provider))
def offerable_languages(self, provider: TtsProvider) -> LanguageOffering:
"""The languages ``provider`` can offer up front.
Language-bound voices contribute their concrete tags; wildcard voices
cannot enumerate languages, so their presence merges in the curated
common list and opens free entry.
"""
voices = self.for_provider(provider)
tags = {v.language for v in voices if v.language != ANY_LANGUAGE}
has_wildcard = any(v.language == ANY_LANGUAGE for v in voices)
if has_wildcard:
tags.update(COMMON_LANGUAGES)
return LanguageOffering(languages=sorted(tags), allows_custom=has_wildcard)
@lru_cache(maxsize=1)
def get_voice_catalog() -> VoiceCatalog:

View file

@ -0,0 +1,33 @@
"""Curated languages offered when a roster has wildcard (any-language) voices.
OpenAI-style multilingual voices speak whatever language the text is in, so
there is no provider list to enumerate. This is the set the brief form offers
up front for such providers; it is an offering, not a limit the API flags
``allows_custom`` so users can enter any BCP-47 tag beyond it.
"""
from __future__ import annotations
COMMON_LANGUAGES: tuple[str, ...] = (
"ar",
"bn",
"de",
"en",
"es",
"fr",
"hi",
"id",
"it",
"ja",
"ko",
"nl",
"pl",
"pt",
"ru",
"sw",
"th",
"tr",
"uk",
"vi",
"zh",
)

View file

@ -76,8 +76,7 @@ async def test_quota_denial_fails_the_podcast_without_a_transcript(
async def _deny(**_kwargs):
raise QuotaInsufficientError(
usage_type="podcast_generation",
used_micros=5_000_000,
limit_micros=5_000_000,
balance_micros=0,
remaining_micros=0,
)
yield # pragma: no cover - unreachable, satisfies the CM protocol

View file

@ -29,3 +29,23 @@ async def test_voices_503_when_no_tts_configured(client, monkeypatch):
resp = await client.get(f"{BASE}/voices")
assert resp.status_code == 503
async def test_languages_returns_the_active_providers_offering(client):
"""The brief form renders exactly what the backend offers — for a wildcard
provider (openai/tts-1) that is the curated list plus free entry."""
resp = await client.get(f"{BASE}/languages")
assert resp.status_code == 200
offering = resp.json()
assert "en" in offering["languages"]
assert "fr" in offering["languages"]
assert offering["allows_custom"] is True
async def test_languages_503_when_no_tts_configured(client, monkeypatch):
monkeypatch.setattr(app_config, "TTS_SERVICE", "")
resp = await client.get(f"{BASE}/languages")
assert resp.status_code == 503

View file

@ -75,6 +75,59 @@ def test_supports_language_reports_availability():
assert not catalog.supports_language(TtsProvider.KOKORO, "de")
def test_offerable_languages_for_a_concrete_roster_are_its_tags_only():
"""A provider whose voices are language-bound offers exactly those tags."""
catalog = VoiceCatalog(
[
_voice("k1", language="en-US"),
_voice("k2", language="fr"),
_voice("k3", language="fr"),
]
)
offering = catalog.offerable_languages(TtsProvider.KOKORO)
assert offering.languages == ["en-US", "fr"]
assert offering.allows_custom is False
def test_a_wildcard_roster_offers_the_curated_languages_and_custom_entry():
"""Voices that speak anything can't enumerate languages themselves, so the
catalog offers the curated common list and invites free entry."""
catalog = VoiceCatalog(
[_voice("o1", provider=TtsProvider.OPENAI, language=ANY_LANGUAGE)]
)
offering = catalog.offerable_languages(TtsProvider.OPENAI)
assert {"en", "fr", "sw", "hi", "zh"} <= set(offering.languages)
assert offering.allows_custom is True
def test_a_mixed_roster_offers_the_union_of_concrete_and_curated():
catalog = VoiceCatalog(
[
_voice("v1", provider=TtsProvider.VERTEX_AI, language="en-GB"),
_voice("v2", provider=TtsProvider.VERTEX_AI, language=ANY_LANGUAGE),
]
)
offering = catalog.offerable_languages(TtsProvider.VERTEX_AI)
assert "en-GB" in offering.languages
assert "fr" in offering.languages
assert offering.allows_custom is True
def test_a_provider_with_no_voices_offers_nothing():
catalog = VoiceCatalog([_voice("k1")])
offering = catalog.offerable_languages(TtsProvider.OPENAI)
assert offering.languages == []
assert offering.allows_custom is False
def test_get_raises_for_an_unknown_voice():
catalog = VoiceCatalog([_voice("k1")])
with pytest.raises(KeyError):

View file

@ -1,11 +1,20 @@
"use client";
import { Loader2, Plus, Trash2 } from "lucide-react";
import { Check, ChevronDown, Loader2, Plus, Trash2 } from "lucide-react";
import { useEffect, useMemo, useState } from "react";
import { toast } from "sonner";
import { Button } from "@/components/ui/button";
import {
Command,
CommandEmpty,
CommandGroup,
CommandInput,
CommandItem,
CommandList,
} from "@/components/ui/command";
import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label";
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
import {
Select,
SelectContent,
@ -15,6 +24,7 @@ import {
} from "@/components/ui/select";
import { Textarea } from "@/components/ui/textarea";
import {
type LanguageOptions,
MAX_SPEAKERS,
type PodcastSpec,
type PodcastStyle,
@ -56,6 +66,7 @@ interface BriefReviewProps {
export function BriefReview({ podcast, spec }: BriefReviewProps) {
const [draft, setDraft] = useState<PodcastSpec>(spec);
const [voices, setVoices] = useState<VoiceOption[] | null>(null);
const [offering, setOffering] = useState<LanguageOptions | null>(null);
const [isSubmitting, setIsSubmitting] = useState(false);
// A pushed spec change (saved edit or concurrent editor) resets the form to
@ -75,19 +86,26 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
.catch(() => {
if (!cancelled) setVoices([]);
});
podcastsApiService
.listLanguages()
.then((options) => {
if (!cancelled) setOffering(options);
})
.catch(() => {
if (!cancelled) setOffering({ languages: [], allows_custom: false });
});
return () => {
cancelled = true;
};
}, []);
// The backend owns the offering; the draft's language stays listed even
// when it falls outside it (e.g. a custom tag entered earlier).
const languages = useMemo(() => {
const tags = new Set<string>();
for (const voice of voices ?? []) {
if (voice.language !== ANY_LANGUAGE) tags.add(voice.language);
}
const tags = new Set(offering?.languages ?? []);
tags.add(draft.language);
return [...tags].sort();
}, [voices, draft.language]);
}, [offering, draft.language]);
const voicesForLanguage = useMemo(
() => (voices ?? []).filter((voice) => speaks(voice, draft.language)),
@ -193,18 +211,22 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
<div className="grid grid-cols-2 gap-4">
<div className="flex flex-col gap-2">
<Label htmlFor="podcast-language">Language</Label>
<Select value={draft.language} onValueChange={setLanguage}>
<SelectTrigger id="podcast-language">
<SelectValue placeholder="Language" />
</SelectTrigger>
<SelectContent>
{languages.map((tag) => (
<SelectItem key={tag} value={tag}>
{languageLabel(tag)}
</SelectItem>
))}
</SelectContent>
</Select>
{offering?.allows_custom ? (
<LanguageCombobox value={draft.language} languages={languages} onSelect={setLanguage} />
) : (
<Select value={draft.language} onValueChange={setLanguage}>
<SelectTrigger id="podcast-language">
<SelectValue placeholder="Language" />
</SelectTrigger>
<SelectContent>
{languages.map((tag) => (
<SelectItem key={tag} value={tag}>
{languageLabel(tag)}
</SelectItem>
))}
</SelectContent>
</Select>
)}
</div>
<div className="flex flex-col gap-2">
<Label htmlFor="podcast-style">Style</Label>
@ -375,6 +397,80 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
);
}
/** A searchable language picker for providers whose voices speak anything:
* the offered list comes from the backend, and any BCP-47 tag may be typed
* when none of them fits. */
function LanguageCombobox({
value,
languages,
onSelect,
}: {
value: string;
languages: string[];
onSelect: (language: string) => void;
}) {
const [open, setOpen] = useState(false);
const [query, setQuery] = useState("");
const pick = (tag: string) => {
onSelect(tag);
setOpen(false);
setQuery("");
};
const customTag = query.trim();
const isNewTag =
customTag.length > 0 && !languages.some((tag) => tag.toLowerCase() === customTag.toLowerCase());
return (
<Popover open={open} onOpenChange={setOpen}>
<PopoverTrigger asChild>
<button
type="button"
role="combobox"
aria-expanded={open}
id="podcast-language"
className="border-popover-border flex h-9 w-full items-center justify-between gap-2 rounded-md border bg-transparent px-3 py-2 text-sm whitespace-nowrap shadow-xs outline-none transition-[color,box-shadow] disabled:cursor-not-allowed disabled:opacity-50"
>
<span className="line-clamp-1 text-left">{languageLabel(value)}</span>
<ChevronDown className="size-4 shrink-0 opacity-50" />
</button>
</PopoverTrigger>
<PopoverContent className="w-[var(--radix-popover-trigger-width)] p-0" align="start">
<Command>
<CommandInput
placeholder="Search or type a language tag…"
value={query}
onValueChange={setQuery}
/>
<CommandList>
<CommandEmpty>No matching language.</CommandEmpty>
<CommandGroup>
{languages.map((tag) => (
<CommandItem
key={tag}
value={tag}
keywords={[languageLabel(tag)]}
onSelect={() => pick(tag)}
>
<Check className={tag === value ? "size-4" : "size-4 opacity-0"} />
{languageLabel(tag)}
</CommandItem>
))}
{isNewTag ? (
<CommandItem value={customTag} onSelect={() => pick(customTag)}>
<Plus className="size-4" />
Use {customTag}
</CommandItem>
) : null}
</CommandGroup>
</CommandList>
</Command>
</PopoverContent>
</Popover>
);
}
/** The current selection stays listed even when it no longer matches the
* language filter, so the Select never renders an orphaned value. */
function voiceItems(candidates: VoiceOption[], selectedId: string): VoiceOption[] {

View file

@ -103,6 +103,15 @@ export const voiceOption = z.object({
});
export type VoiceOption = z.infer<typeof voiceOption>;
// The languages the backend offers for the active TTS provider. When
// `allows_custom` is true the list is a starting point and any BCP-47 tag
// may be entered.
export const languageOptions = z.object({
languages: z.array(z.string()),
allows_custom: z.boolean(),
});
export type LanguageOptions = z.infer<typeof languageOptions>;
export const updateSpecRequest = z.object({
spec: podcastSpec,
expected_version: z.number().int().min(1),

View file

@ -1,5 +1,6 @@
import { z } from "zod";
import {
languageOptions,
type PodcastSpec,
podcastDetail,
updateSpecRequest,
@ -60,6 +61,12 @@ class PodcastsApiService {
return baseApiService.get(`${BASE}/voices${qs}`, voiceOptionList);
};
// The languages the active provider can offer; the brief form renders
// exactly this list and only opens free entry when the backend allows it.
listLanguages = async () => {
return baseApiService.get(`${BASE}/languages`, languageOptions);
};
// A short audio sample of a voice, cached server-side per voice.
previewVoice = async (voiceId: string) => {
return baseApiService.getBlob(`${BASE}/voices/${encodeURIComponent(voiceId)}/preview`);