diff --git a/surfsense_backend/app/podcasts/resolution/__init__.py b/surfsense_backend/app/podcasts/resolution/__init__.py new file mode 100644 index 000000000..ebfd3153a --- /dev/null +++ b/surfsense_backend/app/podcasts/resolution/__init__.py @@ -0,0 +1,27 @@ +"""Resolution: deterministic default chains for a fresh brief. + +Turns weak signals (detected language, last-used preferences) into concrete +language and voice defaults, so the brief gate opens pre-filled and most users +approve without editing. +""" + +from __future__ import annotations + +from .language import ( + DEFAULT_LANGUAGE, + DEFAULT_LANGUAGE_CHAIN, + LanguageContext, + LanguageResolver, + resolve_language, +) +from .voices import VoiceResolutionError, resolve_voices + +__all__ = [ + "DEFAULT_LANGUAGE", + "DEFAULT_LANGUAGE_CHAIN", + "LanguageContext", + "LanguageResolver", + "VoiceResolutionError", + "resolve_language", + "resolve_voices", +] diff --git a/surfsense_backend/app/podcasts/resolution/language.py b/surfsense_backend/app/podcasts/resolution/language.py new file mode 100644 index 000000000..2da90ef37 --- /dev/null +++ b/surfsense_backend/app/podcasts/resolution/language.py @@ -0,0 +1,74 @@ +"""Resolve the brief's language without spending tokens at the gate. + +The chain mirrors the agreed policy: prefer a language detected from the source, +fall back to what the user last chose, and finally default to English (which the +user can still override in the brief). Detection itself is performed upstream +where an LLM is available and passed in as :attr:`LanguageContext.detected`, so +this layer stays pure and deterministic. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass + +# What a brand-new user with no signal gets, and what every chain ends on. +DEFAULT_LANGUAGE = "en" + + +@dataclass(frozen=True, slots=True) +class LanguageContext: + """Signals available when proposing a language for a fresh podcast.""" + + detected: str | None = None + last_used: str | None = None + + +class LanguageResolver(ABC): + """One step in the language fallback chain.""" + + @abstractmethod + def resolve(self, context: LanguageContext) -> str | None: + """Return a language tag, or ``None`` to defer to the next resolver.""" + + +class DetectedLanguage(LanguageResolver): + """Use the language detected from the source, when confident enough.""" + + def resolve(self, context: LanguageContext) -> str | None: + return context.detected + + +class LastUsedLanguage(LanguageResolver): + """Reuse the language from the user's previous podcast.""" + + def resolve(self, context: LanguageContext) -> str | None: + return context.last_used + + +class DefaultLanguage(LanguageResolver): + """Terminal step: always yields the default so the chain never fails.""" + + def resolve(self, context: LanguageContext) -> str | None: + return DEFAULT_LANGUAGE + + +# Order encodes the policy; prepend stronger signals here as they appear. +DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = ( + DetectedLanguage(), + LastUsedLanguage(), + DefaultLanguage(), +) + + +def resolve_language( + context: LanguageContext, + chain: tuple[LanguageResolver, ...] = DEFAULT_LANGUAGE_CHAIN, +) -> str: + """Walk ``chain`` and return the first language a resolver yields.""" + for resolver in chain: + language = resolver.resolve(context) + if language: + return language.strip() + # The default resolver guarantees a value; this guards a misconfigured chain. + return DEFAULT_LANGUAGE diff --git a/surfsense_backend/app/podcasts/resolution/voices.py b/surfsense_backend/app/podcasts/resolution/voices.py new file mode 100644 index 000000000..8d865fbaa --- /dev/null +++ b/surfsense_backend/app/podcasts/resolution/voices.py @@ -0,0 +1,79 @@ +"""Assign a default voice to each speaker for the resolved language. + +The default chain reuses the user's previously chosen voices where they are +still valid for the new language/provider, then fills any remaining speakers +with distinct catalog voices (preferring an unused gender so a two-speaker +episode sounds like two people). The user can override any of these in the +brief; this only seeds sensible defaults so most briefs need no edits. +""" + +from __future__ import annotations + +from collections.abc import Sequence + +from app.podcasts.voices import CatalogVoice, TtsProvider, VoiceCatalog + + +class VoiceResolutionError(RuntimeError): + """No catalog voice exists for the requested provider and language.""" + + +def resolve_voices( + *, + catalog: VoiceCatalog, + provider: TtsProvider, + language: str, + speaker_count: int, + preferred: Sequence[str] | None = None, +) -> list[CatalogVoice]: + """Return one :class:`CatalogVoice` per speaker, in slot order. + + ``preferred`` is the user's last-used voice ids (by slot); any that no + longer fit the provider/language are silently dropped and replaced. + """ + if speaker_count < 1: + raise ValueError("speaker_count must be >= 1") + + available = catalog.for_language(provider, language) + if not available: + raise VoiceResolutionError( + f"{provider.value} has no voice for language {language!r}" + ) + + preferred = preferred or () + by_id = {voice.voice_id: voice for voice in available} + + assignment: list[CatalogVoice] = [] + used_ids: set[str] = set() + used_genders: set = set() + + for slot in range(speaker_count): + reuse_id = preferred[slot] if slot < len(preferred) else None + if reuse_id and reuse_id in by_id and reuse_id not in used_ids: + voice = by_id[reuse_id] + else: + voice = _pick_distinct(available, used_ids, used_genders) + assignment.append(voice) + used_ids.add(voice.voice_id) + used_genders.add(voice.gender) + + return assignment + + +def _pick_distinct( + available: list[CatalogVoice], + used_ids: set[str], + used_genders: set, +) -> CatalogVoice: + """Pick a fresh voice, preferring an unused gender, then any unused voice. + + Falls back to the first catalog voice when speakers outnumber distinct + voices, so resolution always assigns every speaker rather than failing. + """ + fresh = [v for v in available if v.voice_id not in used_ids] + if fresh: + for voice in fresh: + if voice.gender not in used_genders: + return voice + return fresh[0] + return available[0]