feat(podcasts): add language and voice resolution

This commit is contained in:
CREDO23 2026-06-10 18:44:03 +02:00
parent 75287020e1
commit bd6d079030
3 changed files with 180 additions and 0 deletions

View file

@ -0,0 +1,27 @@
"""Resolution: deterministic default chains for a fresh brief.
Turns weak signals (detected language, last-used preferences) into concrete
language and voice defaults, so the brief gate opens pre-filled and most users
approve without editing.
"""
from __future__ import annotations
from .language import (
DEFAULT_LANGUAGE,
DEFAULT_LANGUAGE_CHAIN,
LanguageContext,
LanguageResolver,
resolve_language,
)
from .voices import VoiceResolutionError, resolve_voices
__all__ = [
"DEFAULT_LANGUAGE",
"DEFAULT_LANGUAGE_CHAIN",
"LanguageContext",
"LanguageResolver",
"VoiceResolutionError",
"resolve_language",
"resolve_voices",
]

View file

@ -0,0 +1,74 @@
"""Resolve the brief's language without spending tokens at the gate.
The chain mirrors the agreed policy: prefer a language detected from the source,
fall back to what the user last chose, and finally default to English (which the
user can still override in the brief). Detection itself is performed upstream
where an LLM is available and passed in as :attr:`LanguageContext.detected`, so
this layer stays pure and deterministic.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
# What a brand-new user with no signal gets, and what every chain ends on.
DEFAULT_LANGUAGE = "en"
@dataclass(frozen=True, slots=True)
class LanguageContext:
"""Signals available when proposing a language for a fresh podcast."""
detected: str | None = None
last_used: str | None = None
class LanguageResolver(ABC):
"""One step in the language fallback chain."""
@abstractmethod
def resolve(self, context: LanguageContext) -> str | None:
"""Return a language tag, or ``None`` to defer to the next resolver."""
class DetectedLanguage(LanguageResolver):
"""Use the language detected from the source, when confident enough."""
def resolve(self, context: LanguageContext) -> str | None:
return context.detected
class LastUsedLanguage(LanguageResolver):
"""Reuse the language from the user's previous podcast."""
def resolve(self, context: LanguageContext) -> str | None:
return context.last_used
class DefaultLanguage(LanguageResolver):
"""Terminal step: always yields the default so the chain never fails."""
def resolve(self, context: LanguageContext) -> str | None:
return DEFAULT_LANGUAGE
# Order encodes the policy; prepend stronger signals here as they appear.
DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = (
DetectedLanguage(),
LastUsedLanguage(),
DefaultLanguage(),
)
def resolve_language(
context: LanguageContext,
chain: tuple[LanguageResolver, ...] = DEFAULT_LANGUAGE_CHAIN,
) -> str:
"""Walk ``chain`` and return the first language a resolver yields."""
for resolver in chain:
language = resolver.resolve(context)
if language:
return language.strip()
# The default resolver guarantees a value; this guards a misconfigured chain.
return DEFAULT_LANGUAGE

View file

@ -0,0 +1,79 @@
"""Assign a default voice to each speaker for the resolved language.
The default chain reuses the user's previously chosen voices where they are
still valid for the new language/provider, then fills any remaining speakers
with distinct catalog voices (preferring an unused gender so a two-speaker
episode sounds like two people). The user can override any of these in the
brief; this only seeds sensible defaults so most briefs need no edits.
"""
from __future__ import annotations
from collections.abc import Sequence
from app.podcasts.voices import CatalogVoice, TtsProvider, VoiceCatalog
class VoiceResolutionError(RuntimeError):
"""No catalog voice exists for the requested provider and language."""
def resolve_voices(
*,
catalog: VoiceCatalog,
provider: TtsProvider,
language: str,
speaker_count: int,
preferred: Sequence[str] | None = None,
) -> list[CatalogVoice]:
"""Return one :class:`CatalogVoice` per speaker, in slot order.
``preferred`` is the user's last-used voice ids (by slot); any that no
longer fit the provider/language are silently dropped and replaced.
"""
if speaker_count < 1:
raise ValueError("speaker_count must be >= 1")
available = catalog.for_language(provider, language)
if not available:
raise VoiceResolutionError(
f"{provider.value} has no voice for language {language!r}"
)
preferred = preferred or ()
by_id = {voice.voice_id: voice for voice in available}
assignment: list[CatalogVoice] = []
used_ids: set[str] = set()
used_genders: set = set()
for slot in range(speaker_count):
reuse_id = preferred[slot] if slot < len(preferred) else None
if reuse_id and reuse_id in by_id and reuse_id not in used_ids:
voice = by_id[reuse_id]
else:
voice = _pick_distinct(available, used_ids, used_genders)
assignment.append(voice)
used_ids.add(voice.voice_id)
used_genders.add(voice.gender)
return assignment
def _pick_distinct(
available: list[CatalogVoice],
used_ids: set[str],
used_genders: set,
) -> CatalogVoice:
"""Pick a fresh voice, preferring an unused gender, then any unused voice.
Falls back to the first catalog voice when speakers outnumber distinct
voices, so resolution always assigns every speaker rather than failing.
"""
fresh = [v for v in available if v.voice_id not in used_ids]
if fresh:
for voice in fresh:
if voice.gender not in used_genders:
return voice
return fresh[0]
return available[0]