mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-12 20:45:20 +02:00
Merge pull request #1487 from CREDO23/improvement-podcast-graph
[Feat] Podcast: Backend-owned language offering for the brief form
This commit is contained in:
commit
4c28ba5295
11 changed files with 295 additions and 22 deletions
|
|
@ -47,6 +47,7 @@ from app.utils.rbac import check_permission
|
|||
|
||||
from .schemas import (
|
||||
CreatePodcastRequest,
|
||||
LanguageOptions,
|
||||
PodcastDetail,
|
||||
PodcastSummary,
|
||||
UpdateSpecRequest,
|
||||
|
|
@ -114,6 +115,20 @@ async def list_voices(language: str | None = None):
|
|||
]
|
||||
|
||||
|
||||
@router.get("/podcasts/languages", response_model=LanguageOptions)
|
||||
async def list_languages():
|
||||
"""Languages the active TTS provider can offer the brief editor."""
|
||||
if not app_config.TTS_SERVICE:
|
||||
raise HTTPException(status_code=503, detail="No TTS provider configured")
|
||||
|
||||
provider = provider_from_service(app_config.TTS_SERVICE)
|
||||
offering = get_voice_catalog().offerable_languages(provider)
|
||||
return LanguageOptions(
|
||||
languages=offering.languages,
|
||||
allows_custom=offering.allows_custom,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/podcasts/voices/{voice_id}/preview")
|
||||
async def preview_voice(
|
||||
voice_id: str,
|
||||
|
|
|
|||
|
|
@ -51,6 +51,17 @@ class VoiceOption(BaseModel):
|
|||
gender: str
|
||||
|
||||
|
||||
class LanguageOptions(BaseModel):
|
||||
"""The languages the brief editor may offer for the active provider.
|
||||
|
||||
When ``allows_custom`` is true the list is a curated starting point and
|
||||
the editor accepts any BCP-47 tag beyond it.
|
||||
"""
|
||||
|
||||
languages: list[str]
|
||||
allows_custom: bool
|
||||
|
||||
|
||||
class PodcastSummary(BaseModel):
|
||||
"""Lightweight list item."""
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ configured provider via :func:`provider_from_service`.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from .catalog import VoiceCatalog, get_voice_catalog
|
||||
from .catalog import LanguageOffering, VoiceCatalog, get_voice_catalog
|
||||
from .preview import render_voice_preview
|
||||
from .provider import TtsProvider, provider_from_service
|
||||
from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
|
||||
|
|
@ -14,6 +14,7 @@ from .voice import ANY_LANGUAGE, CatalogVoice, VoiceGender
|
|||
__all__ = [
|
||||
"ANY_LANGUAGE",
|
||||
"CatalogVoice",
|
||||
"LanguageOffering",
|
||||
"TtsProvider",
|
||||
"VoiceCatalog",
|
||||
"VoiceGender",
|
||||
|
|
|
|||
|
|
@ -9,11 +9,26 @@ provider-native reference.
|
|||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
|
||||
from .data import AZURE_VOICES, KOKORO_VOICES, OPENAI_VOICES, VERTEX_VOICES
|
||||
from .data.languages import COMMON_LANGUAGES
|
||||
from .provider import TtsProvider
|
||||
from .voice import CatalogVoice
|
||||
from .voice import ANY_LANGUAGE, CatalogVoice
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class LanguageOffering:
|
||||
"""The languages a provider's roster can offer the brief form.
|
||||
|
||||
``allows_custom`` is true when the roster has wildcard voices: the listed
|
||||
languages are then a curated starting point, not a limit, and any BCP-47
|
||||
tag may be entered.
|
||||
"""
|
||||
|
||||
languages: list[str]
|
||||
allows_custom: bool
|
||||
|
||||
|
||||
class VoiceCatalog:
|
||||
|
|
@ -44,6 +59,20 @@ class VoiceCatalog:
|
|||
"""Whether ``provider`` has at least one voice for ``language``."""
|
||||
return any(v.speaks(language) for v in self.for_provider(provider))
|
||||
|
||||
def offerable_languages(self, provider: TtsProvider) -> LanguageOffering:
|
||||
"""The languages ``provider`` can offer up front.
|
||||
|
||||
Language-bound voices contribute their concrete tags; wildcard voices
|
||||
cannot enumerate languages, so their presence merges in the curated
|
||||
common list and opens free entry.
|
||||
"""
|
||||
voices = self.for_provider(provider)
|
||||
tags = {v.language for v in voices if v.language != ANY_LANGUAGE}
|
||||
has_wildcard = any(v.language == ANY_LANGUAGE for v in voices)
|
||||
if has_wildcard:
|
||||
tags.update(COMMON_LANGUAGES)
|
||||
return LanguageOffering(languages=sorted(tags), allows_custom=has_wildcard)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_voice_catalog() -> VoiceCatalog:
|
||||
|
|
|
|||
33
surfsense_backend/app/podcasts/voices/data/languages.py
Normal file
33
surfsense_backend/app/podcasts/voices/data/languages.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
"""Curated languages offered when a roster has wildcard (any-language) voices.
|
||||
|
||||
OpenAI-style multilingual voices speak whatever language the text is in, so
|
||||
there is no provider list to enumerate. This is the set the brief form offers
|
||||
up front for such providers; it is an offering, not a limit — the API flags
|
||||
``allows_custom`` so users can enter any BCP-47 tag beyond it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
COMMON_LANGUAGES: tuple[str, ...] = (
|
||||
"ar",
|
||||
"bn",
|
||||
"de",
|
||||
"en",
|
||||
"es",
|
||||
"fr",
|
||||
"hi",
|
||||
"id",
|
||||
"it",
|
||||
"ja",
|
||||
"ko",
|
||||
"nl",
|
||||
"pl",
|
||||
"pt",
|
||||
"ru",
|
||||
"sw",
|
||||
"th",
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"zh",
|
||||
)
|
||||
|
|
@ -76,8 +76,7 @@ async def test_quota_denial_fails_the_podcast_without_a_transcript(
|
|||
async def _deny(**_kwargs):
|
||||
raise QuotaInsufficientError(
|
||||
usage_type="podcast_generation",
|
||||
used_micros=5_000_000,
|
||||
limit_micros=5_000_000,
|
||||
balance_micros=0,
|
||||
remaining_micros=0,
|
||||
)
|
||||
yield # pragma: no cover - unreachable, satisfies the CM protocol
|
||||
|
|
|
|||
|
|
@ -29,3 +29,23 @@ async def test_voices_503_when_no_tts_configured(client, monkeypatch):
|
|||
resp = await client.get(f"{BASE}/voices")
|
||||
|
||||
assert resp.status_code == 503
|
||||
|
||||
|
||||
async def test_languages_returns_the_active_providers_offering(client):
|
||||
"""The brief form renders exactly what the backend offers — for a wildcard
|
||||
provider (openai/tts-1) that is the curated list plus free entry."""
|
||||
resp = await client.get(f"{BASE}/languages")
|
||||
|
||||
assert resp.status_code == 200
|
||||
offering = resp.json()
|
||||
assert "en" in offering["languages"]
|
||||
assert "fr" in offering["languages"]
|
||||
assert offering["allows_custom"] is True
|
||||
|
||||
|
||||
async def test_languages_503_when_no_tts_configured(client, monkeypatch):
|
||||
monkeypatch.setattr(app_config, "TTS_SERVICE", "")
|
||||
|
||||
resp = await client.get(f"{BASE}/languages")
|
||||
|
||||
assert resp.status_code == 503
|
||||
|
|
|
|||
|
|
@ -75,6 +75,59 @@ def test_supports_language_reports_availability():
|
|||
assert not catalog.supports_language(TtsProvider.KOKORO, "de")
|
||||
|
||||
|
||||
def test_offerable_languages_for_a_concrete_roster_are_its_tags_only():
|
||||
"""A provider whose voices are language-bound offers exactly those tags."""
|
||||
catalog = VoiceCatalog(
|
||||
[
|
||||
_voice("k1", language="en-US"),
|
||||
_voice("k2", language="fr"),
|
||||
_voice("k3", language="fr"),
|
||||
]
|
||||
)
|
||||
|
||||
offering = catalog.offerable_languages(TtsProvider.KOKORO)
|
||||
|
||||
assert offering.languages == ["en-US", "fr"]
|
||||
assert offering.allows_custom is False
|
||||
|
||||
|
||||
def test_a_wildcard_roster_offers_the_curated_languages_and_custom_entry():
|
||||
"""Voices that speak anything can't enumerate languages themselves, so the
|
||||
catalog offers the curated common list and invites free entry."""
|
||||
catalog = VoiceCatalog(
|
||||
[_voice("o1", provider=TtsProvider.OPENAI, language=ANY_LANGUAGE)]
|
||||
)
|
||||
|
||||
offering = catalog.offerable_languages(TtsProvider.OPENAI)
|
||||
|
||||
assert {"en", "fr", "sw", "hi", "zh"} <= set(offering.languages)
|
||||
assert offering.allows_custom is True
|
||||
|
||||
|
||||
def test_a_mixed_roster_offers_the_union_of_concrete_and_curated():
|
||||
catalog = VoiceCatalog(
|
||||
[
|
||||
_voice("v1", provider=TtsProvider.VERTEX_AI, language="en-GB"),
|
||||
_voice("v2", provider=TtsProvider.VERTEX_AI, language=ANY_LANGUAGE),
|
||||
]
|
||||
)
|
||||
|
||||
offering = catalog.offerable_languages(TtsProvider.VERTEX_AI)
|
||||
|
||||
assert "en-GB" in offering.languages
|
||||
assert "fr" in offering.languages
|
||||
assert offering.allows_custom is True
|
||||
|
||||
|
||||
def test_a_provider_with_no_voices_offers_nothing():
|
||||
catalog = VoiceCatalog([_voice("k1")])
|
||||
|
||||
offering = catalog.offerable_languages(TtsProvider.OPENAI)
|
||||
|
||||
assert offering.languages == []
|
||||
assert offering.allows_custom is False
|
||||
|
||||
|
||||
def test_get_raises_for_an_unknown_voice():
|
||||
catalog = VoiceCatalog([_voice("k1")])
|
||||
with pytest.raises(KeyError):
|
||||
|
|
|
|||
|
|
@ -1,11 +1,20 @@
|
|||
"use client";
|
||||
|
||||
import { Loader2, Plus, Trash2 } from "lucide-react";
|
||||
import { Check, ChevronDown, Loader2, Plus, Trash2 } from "lucide-react";
|
||||
import { useEffect, useMemo, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Command,
|
||||
CommandEmpty,
|
||||
CommandGroup,
|
||||
CommandInput,
|
||||
CommandItem,
|
||||
CommandList,
|
||||
} from "@/components/ui/command";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover";
|
||||
import {
|
||||
Select,
|
||||
SelectContent,
|
||||
|
|
@ -15,6 +24,7 @@ import {
|
|||
} from "@/components/ui/select";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import {
|
||||
type LanguageOptions,
|
||||
MAX_SPEAKERS,
|
||||
type PodcastSpec,
|
||||
type PodcastStyle,
|
||||
|
|
@ -56,6 +66,7 @@ interface BriefReviewProps {
|
|||
export function BriefReview({ podcast, spec }: BriefReviewProps) {
|
||||
const [draft, setDraft] = useState<PodcastSpec>(spec);
|
||||
const [voices, setVoices] = useState<VoiceOption[] | null>(null);
|
||||
const [offering, setOffering] = useState<LanguageOptions | null>(null);
|
||||
const [isSubmitting, setIsSubmitting] = useState(false);
|
||||
|
||||
// A pushed spec change (saved edit or concurrent editor) resets the form to
|
||||
|
|
@ -75,19 +86,26 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
|
|||
.catch(() => {
|
||||
if (!cancelled) setVoices([]);
|
||||
});
|
||||
podcastsApiService
|
||||
.listLanguages()
|
||||
.then((options) => {
|
||||
if (!cancelled) setOffering(options);
|
||||
})
|
||||
.catch(() => {
|
||||
if (!cancelled) setOffering({ languages: [], allows_custom: false });
|
||||
});
|
||||
return () => {
|
||||
cancelled = true;
|
||||
};
|
||||
}, []);
|
||||
|
||||
// The backend owns the offering; the draft's language stays listed even
|
||||
// when it falls outside it (e.g. a custom tag entered earlier).
|
||||
const languages = useMemo(() => {
|
||||
const tags = new Set<string>();
|
||||
for (const voice of voices ?? []) {
|
||||
if (voice.language !== ANY_LANGUAGE) tags.add(voice.language);
|
||||
}
|
||||
const tags = new Set(offering?.languages ?? []);
|
||||
tags.add(draft.language);
|
||||
return [...tags].sort();
|
||||
}, [voices, draft.language]);
|
||||
}, [offering, draft.language]);
|
||||
|
||||
const voicesForLanguage = useMemo(
|
||||
() => (voices ?? []).filter((voice) => speaks(voice, draft.language)),
|
||||
|
|
@ -193,18 +211,22 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
|
|||
<div className="grid grid-cols-2 gap-4">
|
||||
<div className="flex flex-col gap-2">
|
||||
<Label htmlFor="podcast-language">Language</Label>
|
||||
<Select value={draft.language} onValueChange={setLanguage}>
|
||||
<SelectTrigger id="podcast-language">
|
||||
<SelectValue placeholder="Language" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{languages.map((tag) => (
|
||||
<SelectItem key={tag} value={tag}>
|
||||
{languageLabel(tag)}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
{offering?.allows_custom ? (
|
||||
<LanguageCombobox value={draft.language} languages={languages} onSelect={setLanguage} />
|
||||
) : (
|
||||
<Select value={draft.language} onValueChange={setLanguage}>
|
||||
<SelectTrigger id="podcast-language">
|
||||
<SelectValue placeholder="Language" />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{languages.map((tag) => (
|
||||
<SelectItem key={tag} value={tag}>
|
||||
{languageLabel(tag)}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex flex-col gap-2">
|
||||
<Label htmlFor="podcast-style">Style</Label>
|
||||
|
|
@ -375,6 +397,80 @@ export function BriefReview({ podcast, spec }: BriefReviewProps) {
|
|||
);
|
||||
}
|
||||
|
||||
/** A searchable language picker for providers whose voices speak anything:
|
||||
* the offered list comes from the backend, and any BCP-47 tag may be typed
|
||||
* when none of them fits. */
|
||||
function LanguageCombobox({
|
||||
value,
|
||||
languages,
|
||||
onSelect,
|
||||
}: {
|
||||
value: string;
|
||||
languages: string[];
|
||||
onSelect: (language: string) => void;
|
||||
}) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const [query, setQuery] = useState("");
|
||||
|
||||
const pick = (tag: string) => {
|
||||
onSelect(tag);
|
||||
setOpen(false);
|
||||
setQuery("");
|
||||
};
|
||||
|
||||
const customTag = query.trim();
|
||||
const isNewTag =
|
||||
customTag.length > 0 && !languages.some((tag) => tag.toLowerCase() === customTag.toLowerCase());
|
||||
|
||||
return (
|
||||
<Popover open={open} onOpenChange={setOpen}>
|
||||
<PopoverTrigger asChild>
|
||||
<button
|
||||
type="button"
|
||||
role="combobox"
|
||||
aria-expanded={open}
|
||||
id="podcast-language"
|
||||
className="border-popover-border flex h-9 w-full items-center justify-between gap-2 rounded-md border bg-transparent px-3 py-2 text-sm whitespace-nowrap shadow-xs outline-none transition-[color,box-shadow] disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
<span className="line-clamp-1 text-left">{languageLabel(value)}</span>
|
||||
<ChevronDown className="size-4 shrink-0 opacity-50" />
|
||||
</button>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent className="w-[var(--radix-popover-trigger-width)] p-0" align="start">
|
||||
<Command>
|
||||
<CommandInput
|
||||
placeholder="Search or type a language tag…"
|
||||
value={query}
|
||||
onValueChange={setQuery}
|
||||
/>
|
||||
<CommandList>
|
||||
<CommandEmpty>No matching language.</CommandEmpty>
|
||||
<CommandGroup>
|
||||
{languages.map((tag) => (
|
||||
<CommandItem
|
||||
key={tag}
|
||||
value={tag}
|
||||
keywords={[languageLabel(tag)]}
|
||||
onSelect={() => pick(tag)}
|
||||
>
|
||||
<Check className={tag === value ? "size-4" : "size-4 opacity-0"} />
|
||||
{languageLabel(tag)}
|
||||
</CommandItem>
|
||||
))}
|
||||
{isNewTag ? (
|
||||
<CommandItem value={customTag} onSelect={() => pick(customTag)}>
|
||||
<Plus className="size-4" />
|
||||
Use “{customTag}”
|
||||
</CommandItem>
|
||||
) : null}
|
||||
</CommandGroup>
|
||||
</CommandList>
|
||||
</Command>
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
);
|
||||
}
|
||||
|
||||
/** The current selection stays listed even when it no longer matches the
|
||||
* language filter, so the Select never renders an orphaned value. */
|
||||
function voiceItems(candidates: VoiceOption[], selectedId: string): VoiceOption[] {
|
||||
|
|
|
|||
|
|
@ -103,6 +103,15 @@ export const voiceOption = z.object({
|
|||
});
|
||||
export type VoiceOption = z.infer<typeof voiceOption>;
|
||||
|
||||
// The languages the backend offers for the active TTS provider. When
|
||||
// `allows_custom` is true the list is a starting point and any BCP-47 tag
|
||||
// may be entered.
|
||||
export const languageOptions = z.object({
|
||||
languages: z.array(z.string()),
|
||||
allows_custom: z.boolean(),
|
||||
});
|
||||
export type LanguageOptions = z.infer<typeof languageOptions>;
|
||||
|
||||
export const updateSpecRequest = z.object({
|
||||
spec: podcastSpec,
|
||||
expected_version: z.number().int().min(1),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { z } from "zod";
|
||||
import {
|
||||
languageOptions,
|
||||
type PodcastSpec,
|
||||
podcastDetail,
|
||||
updateSpecRequest,
|
||||
|
|
@ -60,6 +61,12 @@ class PodcastsApiService {
|
|||
return baseApiService.get(`${BASE}/voices${qs}`, voiceOptionList);
|
||||
};
|
||||
|
||||
// The languages the active provider can offer; the brief form renders
|
||||
// exactly this list and only opens free entry when the backend allows it.
|
||||
listLanguages = async () => {
|
||||
return baseApiService.get(`${BASE}/languages`, languageOptions);
|
||||
};
|
||||
|
||||
// A short audio sample of a voice, cached server-side per voice.
|
||||
previewVoice = async (voiceId: string) => {
|
||||
return baseApiService.getBlob(`${BASE}/voices/${encodeURIComponent(voiceId)}/preview`);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue