refactor(podcasts): drop language detection from brief

2026-06-12 20:45:20 +02:00 · 2026-06-10 20:51:38 +02:00 · 2026-06-10 20:51:38 +02:00 · aa7aa81c16
commit aa7aa81c16
parent e61308387c
11 changed files with 22 additions and 130 deletions
--- a/surfsense_backend/app/podcasts/generation/brief/config.py
+++ b/surfsense_backend/app/podcasts/generation/brief/config.py
@ -16,7 +16,6 @@ DEFAULT_MAX_MINUTES = 20
 class BriefConfig:
    """Signals used to propose a brief; everything here is non-LLM context."""

-    search_space_id: int
    speaker_count: int = DEFAULT_SPEAKER_COUNT
    min_minutes: int = DEFAULT_MIN_MINUTES
    max_minutes: int = DEFAULT_MAX_MINUTES
--- a/surfsense_backend/app/podcasts/generation/brief/detection.py
+++ b/surfsense_backend/app/podcasts/generation/brief/detection.py
@ -1,28 +0,0 @@
-"""The language-detection reply shape, normalised to a safe tag or ``None``."""
-
-from __future__ import annotations
-
-from pydantic import BaseModel, field_validator
-
-from app.podcasts.schemas import normalize_language_tag
-
-
-class DetectedLanguage(BaseModel):
-    """What the detector returns: a usable BCP-47 tag, or ``None`` when unsure.
-
-    A malformed or non-language reply is coerced to ``None`` so a bad detection
-    quietly defers to the rest of the resolution chain rather than poisoning the
-    spec with an invalid tag.
-    """
-
-    language: str | None = None
-
-    @field_validator("language")
-    @classmethod
-    def _normalise(cls, value: str | None) -> str | None:
-        if value is None:
-            return None
-        try:
-            return normalize_language_tag(value)
-        except ValueError:
-            return None
--- a/surfsense_backend/app/podcasts/generation/brief/graph.py
+++ b/surfsense_backend/app/podcasts/generation/brief/graph.py
@ -1,22 +1,20 @@
-"""The brief-planning graph: detect language, then propose a spec."""
+"""The brief-planning graph: propose a reviewable spec from defaults."""

 from __future__ import annotations

 from langgraph.graph import StateGraph

 from .config import BriefConfig
-from .nodes import detect_language, propose_spec
+from .nodes import propose_spec
 from .state import BriefState


 def build_brief_graph():
    workflow = StateGraph(BriefState, config_schema=BriefConfig)

-    workflow.add_node("detect_language", detect_language)
    workflow.add_node("propose_spec", propose_spec)

-    workflow.add_edge("__start__", "detect_language")
-    workflow.add_edge("detect_language", "propose_spec")
+    workflow.add_edge("__start__", "propose_spec")
    workflow.add_edge("propose_spec", "__end__")

    graph = workflow.compile()
--- a/surfsense_backend/app/podcasts/generation/brief/nodes.py
+++ b/surfsense_backend/app/podcasts/generation/brief/nodes.py
@ -1,15 +1,14 @@
-"""Brief-planning nodes: detect the language, then propose a full spec.
+"""Brief-planning node: propose a full spec from deterministic defaults.

-Only ``detect_language`` spends tokens, and only a small sample of source text;
-``propose_spec`` is pure resolution. Together they open the brief gate pre-filled
-so the common case needs no edits.
+``propose_spec`` is pure resolution — it never spends tokens. It reuses the
+user's last-used language/voices when available and otherwise falls back to
+English, so the brief gate opens pre-filled and the common case needs no edits.
 """

 from __future__ import annotations

 from typing import Any

-from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.runnables import RunnableConfig

 from app.config import config as app_config
@ -28,22 +27,15 @@ from app.podcasts.schemas import (
    normalize_language_tag,
 )
 from app.podcasts.voices import (
-    VoiceCatalog,
    TtsProvider,
+    VoiceCatalog,
    get_voice_catalog,
    provider_from_service,
 )
-from app.services.llm_service import get_agent_llm

-from ..prompts import detect_language_prompt
-from ..structured import StructuredOutputError, invoke_json
 from .config import BriefConfig
-from .detection import DetectedLanguage
 from .state import BriefState

-# Only the head of the source is needed to judge language; this caps tokens.
-_DETECTION_SAMPLE_CHARS = 4000
-
 # Default role per speaker slot; extra speakers beyond the list fall back to guest.
 _ROLE_BY_SLOT = (
    SpeakerRole.HOST,
@ -54,30 +46,6 @@ _ROLE_BY_SLOT = (
 )


-async def detect_language(
-    state: BriefState, config: RunnableConfig
-) -> dict[str, Any]:
-    """Detect the source language; defer (``None``) on any uncertainty."""
-    brief = BriefConfig.from_runnable_config(config)
-    llm = await get_agent_llm(state.db_session, brief.search_space_id)
-    if llm is None:
-        return {"detected_language": None}
-
-    sample = (state.source_content or "")[:_DETECTION_SAMPLE_CHARS].strip()
-    if not sample:
-        return {"detected_language": None}
-
-    messages = [
-        SystemMessage(content=detect_language_prompt()),
-        HumanMessage(content=f"<source_content>{sample}</source_content>"),
-    ]
-    try:
-        detected = await invoke_json(llm, messages, DetectedLanguage)
-    except StructuredOutputError:
-        return {"detected_language": None}
-    return {"detected_language": detected.language}
-
-
 def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
    """Build a complete :class:`PodcastSpec` from the resolved defaults."""
    brief = BriefConfig.from_runnable_config(config)
@ -85,7 +53,6 @@ def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
    catalog = get_voice_catalog()

    language = _supported_language(
-        detected=state.detected_language,
        last_used=brief.last_used_language,
        provider=provider,
        catalog=catalog,
@ -128,12 +95,11 @@ def _active_provider() -> TtsProvider:

 def _supported_language(
    *,
-    detected: str | None,
    last_used: str | None,
    provider: TtsProvider,
    catalog: VoiceCatalog,
 ) -> str:
-    raw = resolve_language(LanguageContext(detected=detected, last_used=last_used))
+    raw = resolve_language(LanguageContext(last_used=last_used))
    try:
        language = normalize_language_tag(raw)
    except ValueError:
--- a/surfsense_backend/app/podcasts/generation/brief/state.py
+++ b/surfsense_backend/app/podcasts/generation/brief/state.py
@ -4,16 +4,11 @@ from __future__ import annotations

 from dataclasses import dataclass

-from sqlalchemy.ext.asyncio import AsyncSession
-
 from app.podcasts.schemas import PodcastSpec


@dataclass
 class BriefState:
-    """Runtime inputs and the proposed spec the graph produces."""
+    """The proposed spec the graph produces; inputs arrive via the config."""

-    db_session: AsyncSession
-    source_content: str
-    detected_language: str | None = None
    spec: PodcastSpec | None = None
--- a/surfsense_backend/app/podcasts/generation/prompts/init.py
+++ b/surfsense_backend/app/podcasts/generation/prompts/init.py
@ -2,13 +2,11 @@

 from __future__ import annotations

-from .detect_language import detect_language_prompt
 from .draft_segment import draft_segment_prompt
 from .plan_outline import plan_outline_prompt
 from .speakers import render_speaker_roster

 __all__ = [
-    "detect_language_prompt",
    "draft_segment_prompt",
    "plan_outline_prompt",
    "render_speaker_roster",
--- a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py
+++ b/surfsense_backend/app/podcasts/generation/prompts/detect_language.py
@ -1,22 +0,0 @@
-"""Prompt for detecting the dominant natural language of source content."""
-
-from __future__ import annotations
-
-_SYSTEM = """\
-You identify the dominant natural language of a piece of source content for a \
-podcast that will be generated from it.
-
-Rules:
- Report the language the listener-facing podcast should be spoken in, i.e. the \
-language most of the meaningful prose is written in.
- Ignore code, markup, URLs, numbers, and proper nouns when judging.
- If the content is too short, ambiguous, mixed without a clear majority, or not \
-natural-language prose, return null rather than guessing.
-
-Respond with strict JSON and nothing else:
-{"language": "<BCP-47 tag like en, en-US, fr, pt-BR>"}  or  {"language": null}
-"""
-
-
-def detect_language_prompt() -> str:
-    return _SYSTEM
--- a/surfsense_backend/app/podcasts/resolution/init.py
+++ b/surfsense_backend/app/podcasts/resolution/init.py
@ -1,8 +1,8 @@
 """Resolution: deterministic default chains for a fresh brief.

-Turns weak signals (detected language, last-used preferences) into concrete
-language and voice defaults, so the brief gate opens pre-filled and most users
-approve without editing.
+Turns the user's last-used preferences into concrete language and voice
+defaults, so the brief gate opens pre-filled and most users approve without
+editing.
 """

 from __future__ import annotations
--- a/surfsense_backend/app/podcasts/resolution/language.py
+++ b/surfsense_backend/app/podcasts/resolution/language.py
@ -1,10 +1,9 @@
 """Resolve the brief's language without spending tokens at the gate.

-The chain mirrors the agreed policy: prefer a language detected from the source,
-fall back to what the user last chose, and finally default to English (which the
-user can still override in the brief). Detection itself is performed upstream
-where an LLM is available and passed in as :attr:`LanguageContext.detected`, so
-this layer stays pure and deterministic.
+The chain mirrors the agreed policy: reuse the language the user last chose, and
+otherwise default to English (which the user can still override in the brief). We
+deliberately never guess the language from the source content — proposing a
+language the user did not ask for is worse than a predictable default.
 """

 from __future__ import annotations
@ -20,7 +19,6 @@ DEFAULT_LANGUAGE = "en"
 class LanguageContext:
    """Signals available when proposing a language for a fresh podcast."""

-    detected: str | None = None
    last_used: str | None = None


@ -32,13 +30,6 @@ class LanguageResolver(ABC):
        """Return a language tag, or ``None`` to defer to the next resolver."""


-class DetectedLanguage(LanguageResolver):
-    """Use the language detected from the source, when confident enough."""
-
-    def resolve(self, context: LanguageContext) -> str | None:
-        return context.detected
-
-
 class LastUsedLanguage(LanguageResolver):
    """Reuse the language from the user's previous podcast."""

@ -55,7 +46,6 @@ class DefaultLanguage(LanguageResolver):

 # Order encodes the policy; prepend stronger signals here as they appear.
 DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = (
-    DetectedLanguage(),
    LastUsedLanguage(),
    DefaultLanguage(),
 )
--- a/surfsense_backend/app/podcasts/schemas/spec.py
+++ b/surfsense_backend/app/podcasts/schemas/spec.py
@ -30,7 +30,7 @@ _LANGUAGE_TAG = re.compile(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$")
 def normalize_language_tag(value: str) -> str:
    """Validate and canonicalise a BCP-47 tag (lowercased primary subtag).

-    Shared with the generation layer so detected and user-entered languages are
+    Shared with the generation layer so resolved and user-entered languages are
    normalised identically before they reach a :class:`PodcastSpec`.
    """
    cleaned = value.strip()
--- a/surfsense_backend/tests/unit/podcasts/test_resolution.py
+++ b/surfsense_backend/tests/unit/podcasts/test_resolution.py
@ -2,8 +2,9 @@

 Resolution is what lets most briefs need no edits: it proposes a sensible
 language and a distinct voice per speaker. These tests state the policy
-("detected wins, else last-used, else English"; "two speakers should sound
+("reuse what the user last chose, else English"; "two speakers should sound
 like two people") through the public resolver functions and the real catalog.
+We never guess the language from source content.
 """

 from __future__ import annotations
@ -22,13 +23,8 @@ from app.podcasts.voices import TtsProvider, get_voice_catalog
 pytestmark = pytest.mark.unit


-def test_detected_language_is_preferred_over_everything():
-    context = LanguageContext(detected="es", last_used="fr")
-    assert resolve_language(context) == "es"
-
-
-def test_falls_back_to_last_used_when_nothing_detected():
-    context = LanguageContext(detected=None, last_used="fr")
+def test_last_used_language_is_reused():
+    context = LanguageContext(last_used="fr")
    assert resolve_language(context) == "fr"