mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-12 20:45:20 +02:00
refactor(podcasts): drop language detection from brief
This commit is contained in:
parent
e61308387c
commit
aa7aa81c16
11 changed files with 22 additions and 130 deletions
|
|
@ -16,7 +16,6 @@ DEFAULT_MAX_MINUTES = 20
|
|||
class BriefConfig:
|
||||
"""Signals used to propose a brief; everything here is non-LLM context."""
|
||||
|
||||
search_space_id: int
|
||||
speaker_count: int = DEFAULT_SPEAKER_COUNT
|
||||
min_minutes: int = DEFAULT_MIN_MINUTES
|
||||
max_minutes: int = DEFAULT_MAX_MINUTES
|
||||
|
|
|
|||
|
|
@ -1,28 +0,0 @@
|
|||
"""The language-detection reply shape, normalised to a safe tag or ``None``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pydantic import BaseModel, field_validator
|
||||
|
||||
from app.podcasts.schemas import normalize_language_tag
|
||||
|
||||
|
||||
class DetectedLanguage(BaseModel):
|
||||
"""What the detector returns: a usable BCP-47 tag, or ``None`` when unsure.
|
||||
|
||||
A malformed or non-language reply is coerced to ``None`` so a bad detection
|
||||
quietly defers to the rest of the resolution chain rather than poisoning the
|
||||
spec with an invalid tag.
|
||||
"""
|
||||
|
||||
language: str | None = None
|
||||
|
||||
@field_validator("language")
|
||||
@classmethod
|
||||
def _normalise(cls, value: str | None) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
try:
|
||||
return normalize_language_tag(value)
|
||||
except ValueError:
|
||||
return None
|
||||
|
|
@ -1,22 +1,20 @@
|
|||
"""The brief-planning graph: detect language, then propose a spec."""
|
||||
"""The brief-planning graph: propose a reviewable spec from defaults."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from langgraph.graph import StateGraph
|
||||
|
||||
from .config import BriefConfig
|
||||
from .nodes import detect_language, propose_spec
|
||||
from .nodes import propose_spec
|
||||
from .state import BriefState
|
||||
|
||||
|
||||
def build_brief_graph():
|
||||
workflow = StateGraph(BriefState, config_schema=BriefConfig)
|
||||
|
||||
workflow.add_node("detect_language", detect_language)
|
||||
workflow.add_node("propose_spec", propose_spec)
|
||||
|
||||
workflow.add_edge("__start__", "detect_language")
|
||||
workflow.add_edge("detect_language", "propose_spec")
|
||||
workflow.add_edge("__start__", "propose_spec")
|
||||
workflow.add_edge("propose_spec", "__end__")
|
||||
|
||||
graph = workflow.compile()
|
||||
|
|
|
|||
|
|
@ -1,15 +1,14 @@
|
|||
"""Brief-planning nodes: detect the language, then propose a full spec.
|
||||
"""Brief-planning node: propose a full spec from deterministic defaults.
|
||||
|
||||
Only ``detect_language`` spends tokens, and only a small sample of source text;
|
||||
``propose_spec`` is pure resolution. Together they open the brief gate pre-filled
|
||||
so the common case needs no edits.
|
||||
``propose_spec`` is pure resolution — it never spends tokens. It reuses the
|
||||
user's last-used language/voices when available and otherwise falls back to
|
||||
English, so the brief gate opens pre-filled and the common case needs no edits.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from langchain_core.runnables import RunnableConfig
|
||||
|
||||
from app.config import config as app_config
|
||||
|
|
@ -28,22 +27,15 @@ from app.podcasts.schemas import (
|
|||
normalize_language_tag,
|
||||
)
|
||||
from app.podcasts.voices import (
|
||||
VoiceCatalog,
|
||||
TtsProvider,
|
||||
VoiceCatalog,
|
||||
get_voice_catalog,
|
||||
provider_from_service,
|
||||
)
|
||||
from app.services.llm_service import get_agent_llm
|
||||
|
||||
from ..prompts import detect_language_prompt
|
||||
from ..structured import StructuredOutputError, invoke_json
|
||||
from .config import BriefConfig
|
||||
from .detection import DetectedLanguage
|
||||
from .state import BriefState
|
||||
|
||||
# Only the head of the source is needed to judge language; this caps tokens.
|
||||
_DETECTION_SAMPLE_CHARS = 4000
|
||||
|
||||
# Default role per speaker slot; extra speakers beyond the list fall back to guest.
|
||||
_ROLE_BY_SLOT = (
|
||||
SpeakerRole.HOST,
|
||||
|
|
@ -54,30 +46,6 @@ _ROLE_BY_SLOT = (
|
|||
)
|
||||
|
||||
|
||||
async def detect_language(
|
||||
state: BriefState, config: RunnableConfig
|
||||
) -> dict[str, Any]:
|
||||
"""Detect the source language; defer (``None``) on any uncertainty."""
|
||||
brief = BriefConfig.from_runnable_config(config)
|
||||
llm = await get_agent_llm(state.db_session, brief.search_space_id)
|
||||
if llm is None:
|
||||
return {"detected_language": None}
|
||||
|
||||
sample = (state.source_content or "")[:_DETECTION_SAMPLE_CHARS].strip()
|
||||
if not sample:
|
||||
return {"detected_language": None}
|
||||
|
||||
messages = [
|
||||
SystemMessage(content=detect_language_prompt()),
|
||||
HumanMessage(content=f"<source_content>{sample}</source_content>"),
|
||||
]
|
||||
try:
|
||||
detected = await invoke_json(llm, messages, DetectedLanguage)
|
||||
except StructuredOutputError:
|
||||
return {"detected_language": None}
|
||||
return {"detected_language": detected.language}
|
||||
|
||||
|
||||
def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
|
||||
"""Build a complete :class:`PodcastSpec` from the resolved defaults."""
|
||||
brief = BriefConfig.from_runnable_config(config)
|
||||
|
|
@ -85,7 +53,6 @@ def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
|
|||
catalog = get_voice_catalog()
|
||||
|
||||
language = _supported_language(
|
||||
detected=state.detected_language,
|
||||
last_used=brief.last_used_language,
|
||||
provider=provider,
|
||||
catalog=catalog,
|
||||
|
|
@ -128,12 +95,11 @@ def _active_provider() -> TtsProvider:
|
|||
|
||||
def _supported_language(
|
||||
*,
|
||||
detected: str | None,
|
||||
last_used: str | None,
|
||||
provider: TtsProvider,
|
||||
catalog: VoiceCatalog,
|
||||
) -> str:
|
||||
raw = resolve_language(LanguageContext(detected=detected, last_used=last_used))
|
||||
raw = resolve_language(LanguageContext(last_used=last_used))
|
||||
try:
|
||||
language = normalize_language_tag(raw)
|
||||
except ValueError:
|
||||
|
|
|
|||
|
|
@ -4,16 +4,11 @@ from __future__ import annotations
|
|||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.podcasts.schemas import PodcastSpec
|
||||
|
||||
|
||||
@dataclass
|
||||
class BriefState:
|
||||
"""Runtime inputs and the proposed spec the graph produces."""
|
||||
"""The proposed spec the graph produces; inputs arrive via the config."""
|
||||
|
||||
db_session: AsyncSession
|
||||
source_content: str
|
||||
detected_language: str | None = None
|
||||
spec: PodcastSpec | None = None
|
||||
|
|
|
|||
|
|
@ -2,13 +2,11 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
from .detect_language import detect_language_prompt
|
||||
from .draft_segment import draft_segment_prompt
|
||||
from .plan_outline import plan_outline_prompt
|
||||
from .speakers import render_speaker_roster
|
||||
|
||||
__all__ = [
|
||||
"detect_language_prompt",
|
||||
"draft_segment_prompt",
|
||||
"plan_outline_prompt",
|
||||
"render_speaker_roster",
|
||||
|
|
|
|||
|
|
@ -1,22 +0,0 @@
|
|||
"""Prompt for detecting the dominant natural language of source content."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
_SYSTEM = """\
|
||||
You identify the dominant natural language of a piece of source content for a \
|
||||
podcast that will be generated from it.
|
||||
|
||||
Rules:
|
||||
- Report the language the listener-facing podcast should be spoken in, i.e. the \
|
||||
language most of the meaningful prose is written in.
|
||||
- Ignore code, markup, URLs, numbers, and proper nouns when judging.
|
||||
- If the content is too short, ambiguous, mixed without a clear majority, or not \
|
||||
natural-language prose, return null rather than guessing.
|
||||
|
||||
Respond with strict JSON and nothing else:
|
||||
{"language": "<BCP-47 tag like en, en-US, fr, pt-BR>"} or {"language": null}
|
||||
"""
|
||||
|
||||
|
||||
def detect_language_prompt() -> str:
|
||||
return _SYSTEM
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
"""Resolution: deterministic default chains for a fresh brief.
|
||||
|
||||
Turns weak signals (detected language, last-used preferences) into concrete
|
||||
language and voice defaults, so the brief gate opens pre-filled and most users
|
||||
approve without editing.
|
||||
Turns the user's last-used preferences into concrete language and voice
|
||||
defaults, so the brief gate opens pre-filled and most users approve without
|
||||
editing.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
"""Resolve the brief's language without spending tokens at the gate.
|
||||
|
||||
The chain mirrors the agreed policy: prefer a language detected from the source,
|
||||
fall back to what the user last chose, and finally default to English (which the
|
||||
user can still override in the brief). Detection itself is performed upstream
|
||||
where an LLM is available and passed in as :attr:`LanguageContext.detected`, so
|
||||
this layer stays pure and deterministic.
|
||||
The chain mirrors the agreed policy: reuse the language the user last chose, and
|
||||
otherwise default to English (which the user can still override in the brief). We
|
||||
deliberately never guess the language from the source content — proposing a
|
||||
language the user did not ask for is worse than a predictable default.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -20,7 +19,6 @@ DEFAULT_LANGUAGE = "en"
|
|||
class LanguageContext:
|
||||
"""Signals available when proposing a language for a fresh podcast."""
|
||||
|
||||
detected: str | None = None
|
||||
last_used: str | None = None
|
||||
|
||||
|
||||
|
|
@ -32,13 +30,6 @@ class LanguageResolver(ABC):
|
|||
"""Return a language tag, or ``None`` to defer to the next resolver."""
|
||||
|
||||
|
||||
class DetectedLanguage(LanguageResolver):
|
||||
"""Use the language detected from the source, when confident enough."""
|
||||
|
||||
def resolve(self, context: LanguageContext) -> str | None:
|
||||
return context.detected
|
||||
|
||||
|
||||
class LastUsedLanguage(LanguageResolver):
|
||||
"""Reuse the language from the user's previous podcast."""
|
||||
|
||||
|
|
@ -55,7 +46,6 @@ class DefaultLanguage(LanguageResolver):
|
|||
|
||||
# Order encodes the policy; prepend stronger signals here as they appear.
|
||||
DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = (
|
||||
DetectedLanguage(),
|
||||
LastUsedLanguage(),
|
||||
DefaultLanguage(),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ _LANGUAGE_TAG = re.compile(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$")
|
|||
def normalize_language_tag(value: str) -> str:
|
||||
"""Validate and canonicalise a BCP-47 tag (lowercased primary subtag).
|
||||
|
||||
Shared with the generation layer so detected and user-entered languages are
|
||||
Shared with the generation layer so resolved and user-entered languages are
|
||||
normalised identically before they reach a :class:`PodcastSpec`.
|
||||
"""
|
||||
cleaned = value.strip()
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@
|
|||
|
||||
Resolution is what lets most briefs need no edits: it proposes a sensible
|
||||
language and a distinct voice per speaker. These tests state the policy
|
||||
("detected wins, else last-used, else English"; "two speakers should sound
|
||||
("reuse what the user last chose, else English"; "two speakers should sound
|
||||
like two people") through the public resolver functions and the real catalog.
|
||||
We never guess the language from source content.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -22,13 +23,8 @@ from app.podcasts.voices import TtsProvider, get_voice_catalog
|
|||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
def test_detected_language_is_preferred_over_everything():
|
||||
context = LanguageContext(detected="es", last_used="fr")
|
||||
assert resolve_language(context) == "es"
|
||||
|
||||
|
||||
def test_falls_back_to_last_used_when_nothing_detected():
|
||||
context = LanguageContext(detected=None, last_used="fr")
|
||||
def test_last_used_language_is_reused():
|
||||
context = LanguageContext(last_used="fr")
|
||||
assert resolve_language(context) == "fr"
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue