mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-12 20:45:20 +02:00
feat(podcasts): add language and voice resolution
This commit is contained in:
parent
75287020e1
commit
bd6d079030
3 changed files with 180 additions and 0 deletions
27
surfsense_backend/app/podcasts/resolution/__init__.py
Normal file
27
surfsense_backend/app/podcasts/resolution/__init__.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
"""Resolution: deterministic default chains for a fresh brief.
|
||||
|
||||
Turns weak signals (detected language, last-used preferences) into concrete
|
||||
language and voice defaults, so the brief gate opens pre-filled and most users
|
||||
approve without editing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from .language import (
|
||||
DEFAULT_LANGUAGE,
|
||||
DEFAULT_LANGUAGE_CHAIN,
|
||||
LanguageContext,
|
||||
LanguageResolver,
|
||||
resolve_language,
|
||||
)
|
||||
from .voices import VoiceResolutionError, resolve_voices
|
||||
|
||||
__all__ = [
|
||||
"DEFAULT_LANGUAGE",
|
||||
"DEFAULT_LANGUAGE_CHAIN",
|
||||
"LanguageContext",
|
||||
"LanguageResolver",
|
||||
"VoiceResolutionError",
|
||||
"resolve_language",
|
||||
"resolve_voices",
|
||||
]
|
||||
74
surfsense_backend/app/podcasts/resolution/language.py
Normal file
74
surfsense_backend/app/podcasts/resolution/language.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
"""Resolve the brief's language without spending tokens at the gate.
|
||||
|
||||
The chain mirrors the agreed policy: prefer a language detected from the source,
|
||||
fall back to what the user last chose, and finally default to English (which the
|
||||
user can still override in the brief). Detection itself is performed upstream
|
||||
where an LLM is available and passed in as :attr:`LanguageContext.detected`, so
|
||||
this layer stays pure and deterministic.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
# What a brand-new user with no signal gets, and what every chain ends on.
|
||||
DEFAULT_LANGUAGE = "en"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class LanguageContext:
|
||||
"""Signals available when proposing a language for a fresh podcast."""
|
||||
|
||||
detected: str | None = None
|
||||
last_used: str | None = None
|
||||
|
||||
|
||||
class LanguageResolver(ABC):
|
||||
"""One step in the language fallback chain."""
|
||||
|
||||
@abstractmethod
|
||||
def resolve(self, context: LanguageContext) -> str | None:
|
||||
"""Return a language tag, or ``None`` to defer to the next resolver."""
|
||||
|
||||
|
||||
class DetectedLanguage(LanguageResolver):
|
||||
"""Use the language detected from the source, when confident enough."""
|
||||
|
||||
def resolve(self, context: LanguageContext) -> str | None:
|
||||
return context.detected
|
||||
|
||||
|
||||
class LastUsedLanguage(LanguageResolver):
|
||||
"""Reuse the language from the user's previous podcast."""
|
||||
|
||||
def resolve(self, context: LanguageContext) -> str | None:
|
||||
return context.last_used
|
||||
|
||||
|
||||
class DefaultLanguage(LanguageResolver):
|
||||
"""Terminal step: always yields the default so the chain never fails."""
|
||||
|
||||
def resolve(self, context: LanguageContext) -> str | None:
|
||||
return DEFAULT_LANGUAGE
|
||||
|
||||
|
||||
# Order encodes the policy; prepend stronger signals here as they appear.
|
||||
DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = (
|
||||
DetectedLanguage(),
|
||||
LastUsedLanguage(),
|
||||
DefaultLanguage(),
|
||||
)
|
||||
|
||||
|
||||
def resolve_language(
|
||||
context: LanguageContext,
|
||||
chain: tuple[LanguageResolver, ...] = DEFAULT_LANGUAGE_CHAIN,
|
||||
) -> str:
|
||||
"""Walk ``chain`` and return the first language a resolver yields."""
|
||||
for resolver in chain:
|
||||
language = resolver.resolve(context)
|
||||
if language:
|
||||
return language.strip()
|
||||
# The default resolver guarantees a value; this guards a misconfigured chain.
|
||||
return DEFAULT_LANGUAGE
|
||||
79
surfsense_backend/app/podcasts/resolution/voices.py
Normal file
79
surfsense_backend/app/podcasts/resolution/voices.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""Assign a default voice to each speaker for the resolved language.
|
||||
|
||||
The default chain reuses the user's previously chosen voices where they are
|
||||
still valid for the new language/provider, then fills any remaining speakers
|
||||
with distinct catalog voices (preferring an unused gender so a two-speaker
|
||||
episode sounds like two people). The user can override any of these in the
|
||||
brief; this only seeds sensible defaults so most briefs need no edits.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from app.podcasts.voices import CatalogVoice, TtsProvider, VoiceCatalog
|
||||
|
||||
|
||||
class VoiceResolutionError(RuntimeError):
|
||||
"""No catalog voice exists for the requested provider and language."""
|
||||
|
||||
|
||||
def resolve_voices(
|
||||
*,
|
||||
catalog: VoiceCatalog,
|
||||
provider: TtsProvider,
|
||||
language: str,
|
||||
speaker_count: int,
|
||||
preferred: Sequence[str] | None = None,
|
||||
) -> list[CatalogVoice]:
|
||||
"""Return one :class:`CatalogVoice` per speaker, in slot order.
|
||||
|
||||
``preferred`` is the user's last-used voice ids (by slot); any that no
|
||||
longer fit the provider/language are silently dropped and replaced.
|
||||
"""
|
||||
if speaker_count < 1:
|
||||
raise ValueError("speaker_count must be >= 1")
|
||||
|
||||
available = catalog.for_language(provider, language)
|
||||
if not available:
|
||||
raise VoiceResolutionError(
|
||||
f"{provider.value} has no voice for language {language!r}"
|
||||
)
|
||||
|
||||
preferred = preferred or ()
|
||||
by_id = {voice.voice_id: voice for voice in available}
|
||||
|
||||
assignment: list[CatalogVoice] = []
|
||||
used_ids: set[str] = set()
|
||||
used_genders: set = set()
|
||||
|
||||
for slot in range(speaker_count):
|
||||
reuse_id = preferred[slot] if slot < len(preferred) else None
|
||||
if reuse_id and reuse_id in by_id and reuse_id not in used_ids:
|
||||
voice = by_id[reuse_id]
|
||||
else:
|
||||
voice = _pick_distinct(available, used_ids, used_genders)
|
||||
assignment.append(voice)
|
||||
used_ids.add(voice.voice_id)
|
||||
used_genders.add(voice.gender)
|
||||
|
||||
return assignment
|
||||
|
||||
|
||||
def _pick_distinct(
|
||||
available: list[CatalogVoice],
|
||||
used_ids: set[str],
|
||||
used_genders: set,
|
||||
) -> CatalogVoice:
|
||||
"""Pick a fresh voice, preferring an unused gender, then any unused voice.
|
||||
|
||||
Falls back to the first catalog voice when speakers outnumber distinct
|
||||
voices, so resolution always assigns every speaker rather than failing.
|
||||
"""
|
||||
fresh = [v for v in available if v.voice_id not in used_ids]
|
||||
if fresh:
|
||||
for voice in fresh:
|
||||
if voice.gender not in used_genders:
|
||||
return voice
|
||||
return fresh[0]
|
||||
return available[0]
|
||||
Loading…
Add table
Add a link
Reference in a new issue