From aa7aa81c1677bc410fd658fccd0370e49242e712 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 20:51:38 +0200 Subject: [PATCH] refactor(podcasts): drop language detection from brief --- .../app/podcasts/generation/brief/config.py | 1 - .../podcasts/generation/brief/detection.py | 28 ----------- .../app/podcasts/generation/brief/graph.py | 8 ++-- .../app/podcasts/generation/brief/nodes.py | 46 +++---------------- .../app/podcasts/generation/brief/state.py | 7 +-- .../podcasts/generation/prompts/__init__.py | 2 - .../generation/prompts/detect_language.py | 22 --------- .../app/podcasts/resolution/__init__.py | 6 +-- .../app/podcasts/resolution/language.py | 18 ++------ .../app/podcasts/schemas/spec.py | 2 +- .../tests/unit/podcasts/test_resolution.py | 12 ++--- 11 files changed, 22 insertions(+), 130 deletions(-) delete mode 100644 surfsense_backend/app/podcasts/generation/brief/detection.py delete mode 100644 surfsense_backend/app/podcasts/generation/prompts/detect_language.py diff --git a/surfsense_backend/app/podcasts/generation/brief/config.py b/surfsense_backend/app/podcasts/generation/brief/config.py index a9f2f9dec..4f92585ae 100644 --- a/surfsense_backend/app/podcasts/generation/brief/config.py +++ b/surfsense_backend/app/podcasts/generation/brief/config.py @@ -16,7 +16,6 @@ DEFAULT_MAX_MINUTES = 20 class BriefConfig: """Signals used to propose a brief; everything here is non-LLM context.""" - search_space_id: int speaker_count: int = DEFAULT_SPEAKER_COUNT min_minutes: int = DEFAULT_MIN_MINUTES max_minutes: int = DEFAULT_MAX_MINUTES diff --git a/surfsense_backend/app/podcasts/generation/brief/detection.py b/surfsense_backend/app/podcasts/generation/brief/detection.py deleted file mode 100644 index d505d4993..000000000 --- a/surfsense_backend/app/podcasts/generation/brief/detection.py +++ /dev/null @@ -1,28 +0,0 @@ -"""The language-detection reply shape, normalised to a safe tag or ``None``.""" - -from __future__ import annotations - -from pydantic import BaseModel, field_validator - -from app.podcasts.schemas import normalize_language_tag - - -class DetectedLanguage(BaseModel): - """What the detector returns: a usable BCP-47 tag, or ``None`` when unsure. - - A malformed or non-language reply is coerced to ``None`` so a bad detection - quietly defers to the rest of the resolution chain rather than poisoning the - spec with an invalid tag. - """ - - language: str | None = None - - @field_validator("language") - @classmethod - def _normalise(cls, value: str | None) -> str | None: - if value is None: - return None - try: - return normalize_language_tag(value) - except ValueError: - return None diff --git a/surfsense_backend/app/podcasts/generation/brief/graph.py b/surfsense_backend/app/podcasts/generation/brief/graph.py index 328529e59..a643bdbb4 100644 --- a/surfsense_backend/app/podcasts/generation/brief/graph.py +++ b/surfsense_backend/app/podcasts/generation/brief/graph.py @@ -1,22 +1,20 @@ -"""The brief-planning graph: detect language, then propose a spec.""" +"""The brief-planning graph: propose a reviewable spec from defaults.""" from __future__ import annotations from langgraph.graph import StateGraph from .config import BriefConfig -from .nodes import detect_language, propose_spec +from .nodes import propose_spec from .state import BriefState def build_brief_graph(): workflow = StateGraph(BriefState, config_schema=BriefConfig) - workflow.add_node("detect_language", detect_language) workflow.add_node("propose_spec", propose_spec) - workflow.add_edge("__start__", "detect_language") - workflow.add_edge("detect_language", "propose_spec") + workflow.add_edge("__start__", "propose_spec") workflow.add_edge("propose_spec", "__end__") graph = workflow.compile() diff --git a/surfsense_backend/app/podcasts/generation/brief/nodes.py b/surfsense_backend/app/podcasts/generation/brief/nodes.py index e0477940c..c0a6f1ae1 100644 --- a/surfsense_backend/app/podcasts/generation/brief/nodes.py +++ b/surfsense_backend/app/podcasts/generation/brief/nodes.py @@ -1,15 +1,14 @@ -"""Brief-planning nodes: detect the language, then propose a full spec. +"""Brief-planning node: propose a full spec from deterministic defaults. -Only ``detect_language`` spends tokens, and only a small sample of source text; -``propose_spec`` is pure resolution. Together they open the brief gate pre-filled -so the common case needs no edits. +``propose_spec`` is pure resolution — it never spends tokens. It reuses the +user's last-used language/voices when available and otherwise falls back to +English, so the brief gate opens pre-filled and the common case needs no edits. """ from __future__ import annotations from typing import Any -from langchain_core.messages import HumanMessage, SystemMessage from langchain_core.runnables import RunnableConfig from app.config import config as app_config @@ -28,22 +27,15 @@ from app.podcasts.schemas import ( normalize_language_tag, ) from app.podcasts.voices import ( - VoiceCatalog, TtsProvider, + VoiceCatalog, get_voice_catalog, provider_from_service, ) -from app.services.llm_service import get_agent_llm -from ..prompts import detect_language_prompt -from ..structured import StructuredOutputError, invoke_json from .config import BriefConfig -from .detection import DetectedLanguage from .state import BriefState -# Only the head of the source is needed to judge language; this caps tokens. -_DETECTION_SAMPLE_CHARS = 4000 - # Default role per speaker slot; extra speakers beyond the list fall back to guest. _ROLE_BY_SLOT = ( SpeakerRole.HOST, @@ -54,30 +46,6 @@ _ROLE_BY_SLOT = ( ) -async def detect_language( - state: BriefState, config: RunnableConfig -) -> dict[str, Any]: - """Detect the source language; defer (``None``) on any uncertainty.""" - brief = BriefConfig.from_runnable_config(config) - llm = await get_agent_llm(state.db_session, brief.search_space_id) - if llm is None: - return {"detected_language": None} - - sample = (state.source_content or "")[:_DETECTION_SAMPLE_CHARS].strip() - if not sample: - return {"detected_language": None} - - messages = [ - SystemMessage(content=detect_language_prompt()), - HumanMessage(content=f"{sample}"), - ] - try: - detected = await invoke_json(llm, messages, DetectedLanguage) - except StructuredOutputError: - return {"detected_language": None} - return {"detected_language": detected.language} - - def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]: """Build a complete :class:`PodcastSpec` from the resolved defaults.""" brief = BriefConfig.from_runnable_config(config) @@ -85,7 +53,6 @@ def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]: catalog = get_voice_catalog() language = _supported_language( - detected=state.detected_language, last_used=brief.last_used_language, provider=provider, catalog=catalog, @@ -128,12 +95,11 @@ def _active_provider() -> TtsProvider: def _supported_language( *, - detected: str | None, last_used: str | None, provider: TtsProvider, catalog: VoiceCatalog, ) -> str: - raw = resolve_language(LanguageContext(detected=detected, last_used=last_used)) + raw = resolve_language(LanguageContext(last_used=last_used)) try: language = normalize_language_tag(raw) except ValueError: diff --git a/surfsense_backend/app/podcasts/generation/brief/state.py b/surfsense_backend/app/podcasts/generation/brief/state.py index 976a72df5..418fb6fa9 100644 --- a/surfsense_backend/app/podcasts/generation/brief/state.py +++ b/surfsense_backend/app/podcasts/generation/brief/state.py @@ -4,16 +4,11 @@ from __future__ import annotations from dataclasses import dataclass -from sqlalchemy.ext.asyncio import AsyncSession - from app.podcasts.schemas import PodcastSpec @dataclass class BriefState: - """Runtime inputs and the proposed spec the graph produces.""" + """The proposed spec the graph produces; inputs arrive via the config.""" - db_session: AsyncSession - source_content: str - detected_language: str | None = None spec: PodcastSpec | None = None diff --git a/surfsense_backend/app/podcasts/generation/prompts/__init__.py b/surfsense_backend/app/podcasts/generation/prompts/__init__.py index 1f6d3993b..041dd4e6d 100644 --- a/surfsense_backend/app/podcasts/generation/prompts/__init__.py +++ b/surfsense_backend/app/podcasts/generation/prompts/__init__.py @@ -2,13 +2,11 @@ from __future__ import annotations -from .detect_language import detect_language_prompt from .draft_segment import draft_segment_prompt from .plan_outline import plan_outline_prompt from .speakers import render_speaker_roster __all__ = [ - "detect_language_prompt", "draft_segment_prompt", "plan_outline_prompt", "render_speaker_roster", diff --git a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py b/surfsense_backend/app/podcasts/generation/prompts/detect_language.py deleted file mode 100644 index a5ab4da5c..000000000 --- a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Prompt for detecting the dominant natural language of source content.""" - -from __future__ import annotations - -_SYSTEM = """\ -You identify the dominant natural language of a piece of source content for a \ -podcast that will be generated from it. - -Rules: -- Report the language the listener-facing podcast should be spoken in, i.e. the \ -language most of the meaningful prose is written in. -- Ignore code, markup, URLs, numbers, and proper nouns when judging. -- If the content is too short, ambiguous, mixed without a clear majority, or not \ -natural-language prose, return null rather than guessing. - -Respond with strict JSON and nothing else: -{"language": ""} or {"language": null} -""" - - -def detect_language_prompt() -> str: - return _SYSTEM diff --git a/surfsense_backend/app/podcasts/resolution/__init__.py b/surfsense_backend/app/podcasts/resolution/__init__.py index ebfd3153a..19a7edfb3 100644 --- a/surfsense_backend/app/podcasts/resolution/__init__.py +++ b/surfsense_backend/app/podcasts/resolution/__init__.py @@ -1,8 +1,8 @@ """Resolution: deterministic default chains for a fresh brief. -Turns weak signals (detected language, last-used preferences) into concrete -language and voice defaults, so the brief gate opens pre-filled and most users -approve without editing. +Turns the user's last-used preferences into concrete language and voice +defaults, so the brief gate opens pre-filled and most users approve without +editing. """ from __future__ import annotations diff --git a/surfsense_backend/app/podcasts/resolution/language.py b/surfsense_backend/app/podcasts/resolution/language.py index 2da90ef37..336d9036b 100644 --- a/surfsense_backend/app/podcasts/resolution/language.py +++ b/surfsense_backend/app/podcasts/resolution/language.py @@ -1,10 +1,9 @@ """Resolve the brief's language without spending tokens at the gate. -The chain mirrors the agreed policy: prefer a language detected from the source, -fall back to what the user last chose, and finally default to English (which the -user can still override in the brief). Detection itself is performed upstream -where an LLM is available and passed in as :attr:`LanguageContext.detected`, so -this layer stays pure and deterministic. +The chain mirrors the agreed policy: reuse the language the user last chose, and +otherwise default to English (which the user can still override in the brief). We +deliberately never guess the language from the source content — proposing a +language the user did not ask for is worse than a predictable default. """ from __future__ import annotations @@ -20,7 +19,6 @@ DEFAULT_LANGUAGE = "en" class LanguageContext: """Signals available when proposing a language for a fresh podcast.""" - detected: str | None = None last_used: str | None = None @@ -32,13 +30,6 @@ class LanguageResolver(ABC): """Return a language tag, or ``None`` to defer to the next resolver.""" -class DetectedLanguage(LanguageResolver): - """Use the language detected from the source, when confident enough.""" - - def resolve(self, context: LanguageContext) -> str | None: - return context.detected - - class LastUsedLanguage(LanguageResolver): """Reuse the language from the user's previous podcast.""" @@ -55,7 +46,6 @@ class DefaultLanguage(LanguageResolver): # Order encodes the policy; prepend stronger signals here as they appear. DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = ( - DetectedLanguage(), LastUsedLanguage(), DefaultLanguage(), ) diff --git a/surfsense_backend/app/podcasts/schemas/spec.py b/surfsense_backend/app/podcasts/schemas/spec.py index 2d3b3c74e..973e26167 100644 --- a/surfsense_backend/app/podcasts/schemas/spec.py +++ b/surfsense_backend/app/podcasts/schemas/spec.py @@ -30,7 +30,7 @@ _LANGUAGE_TAG = re.compile(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$") def normalize_language_tag(value: str) -> str: """Validate and canonicalise a BCP-47 tag (lowercased primary subtag). - Shared with the generation layer so detected and user-entered languages are + Shared with the generation layer so resolved and user-entered languages are normalised identically before they reach a :class:`PodcastSpec`. """ cleaned = value.strip() diff --git a/surfsense_backend/tests/unit/podcasts/test_resolution.py b/surfsense_backend/tests/unit/podcasts/test_resolution.py index 4fe3df5cd..48e834096 100644 --- a/surfsense_backend/tests/unit/podcasts/test_resolution.py +++ b/surfsense_backend/tests/unit/podcasts/test_resolution.py @@ -2,8 +2,9 @@ Resolution is what lets most briefs need no edits: it proposes a sensible language and a distinct voice per speaker. These tests state the policy -("detected wins, else last-used, else English"; "two speakers should sound +("reuse what the user last chose, else English"; "two speakers should sound like two people") through the public resolver functions and the real catalog. +We never guess the language from source content. """ from __future__ import annotations @@ -22,13 +23,8 @@ from app.podcasts.voices import TtsProvider, get_voice_catalog pytestmark = pytest.mark.unit -def test_detected_language_is_preferred_over_everything(): - context = LanguageContext(detected="es", last_used="fr") - assert resolve_language(context) == "es" - - -def test_falls_back_to_last_used_when_nothing_detected(): - context = LanguageContext(detected=None, last_used="fr") +def test_last_used_language_is_reused(): + context = LanguageContext(last_used="fr") assert resolve_language(context) == "fr"