diff --git a/surfsense_backend/app/podcasts/generation/brief/config.py b/surfsense_backend/app/podcasts/generation/brief/config.py
index a9f2f9dec..4f92585ae 100644
--- a/surfsense_backend/app/podcasts/generation/brief/config.py
+++ b/surfsense_backend/app/podcasts/generation/brief/config.py
@@ -16,7 +16,6 @@ DEFAULT_MAX_MINUTES = 20
class BriefConfig:
"""Signals used to propose a brief; everything here is non-LLM context."""
- search_space_id: int
speaker_count: int = DEFAULT_SPEAKER_COUNT
min_minutes: int = DEFAULT_MIN_MINUTES
max_minutes: int = DEFAULT_MAX_MINUTES
diff --git a/surfsense_backend/app/podcasts/generation/brief/detection.py b/surfsense_backend/app/podcasts/generation/brief/detection.py
deleted file mode 100644
index d505d4993..000000000
--- a/surfsense_backend/app/podcasts/generation/brief/detection.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""The language-detection reply shape, normalised to a safe tag or ``None``."""
-
-from __future__ import annotations
-
-from pydantic import BaseModel, field_validator
-
-from app.podcasts.schemas import normalize_language_tag
-
-
-class DetectedLanguage(BaseModel):
- """What the detector returns: a usable BCP-47 tag, or ``None`` when unsure.
-
- A malformed or non-language reply is coerced to ``None`` so a bad detection
- quietly defers to the rest of the resolution chain rather than poisoning the
- spec with an invalid tag.
- """
-
- language: str | None = None
-
- @field_validator("language")
- @classmethod
- def _normalise(cls, value: str | None) -> str | None:
- if value is None:
- return None
- try:
- return normalize_language_tag(value)
- except ValueError:
- return None
diff --git a/surfsense_backend/app/podcasts/generation/brief/graph.py b/surfsense_backend/app/podcasts/generation/brief/graph.py
index 328529e59..a643bdbb4 100644
--- a/surfsense_backend/app/podcasts/generation/brief/graph.py
+++ b/surfsense_backend/app/podcasts/generation/brief/graph.py
@@ -1,22 +1,20 @@
-"""The brief-planning graph: detect language, then propose a spec."""
+"""The brief-planning graph: propose a reviewable spec from defaults."""
from __future__ import annotations
from langgraph.graph import StateGraph
from .config import BriefConfig
-from .nodes import detect_language, propose_spec
+from .nodes import propose_spec
from .state import BriefState
def build_brief_graph():
workflow = StateGraph(BriefState, config_schema=BriefConfig)
- workflow.add_node("detect_language", detect_language)
workflow.add_node("propose_spec", propose_spec)
- workflow.add_edge("__start__", "detect_language")
- workflow.add_edge("detect_language", "propose_spec")
+ workflow.add_edge("__start__", "propose_spec")
workflow.add_edge("propose_spec", "__end__")
graph = workflow.compile()
diff --git a/surfsense_backend/app/podcasts/generation/brief/nodes.py b/surfsense_backend/app/podcasts/generation/brief/nodes.py
index e0477940c..c0a6f1ae1 100644
--- a/surfsense_backend/app/podcasts/generation/brief/nodes.py
+++ b/surfsense_backend/app/podcasts/generation/brief/nodes.py
@@ -1,15 +1,14 @@
-"""Brief-planning nodes: detect the language, then propose a full spec.
+"""Brief-planning node: propose a full spec from deterministic defaults.
-Only ``detect_language`` spends tokens, and only a small sample of source text;
-``propose_spec`` is pure resolution. Together they open the brief gate pre-filled
-so the common case needs no edits.
+``propose_spec`` is pure resolution — it never spends tokens. It reuses the
+user's last-used language/voices when available and otherwise falls back to
+English, so the brief gate opens pre-filled and the common case needs no edits.
"""
from __future__ import annotations
from typing import Any
-from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.runnables import RunnableConfig
from app.config import config as app_config
@@ -28,22 +27,15 @@ from app.podcasts.schemas import (
normalize_language_tag,
)
from app.podcasts.voices import (
- VoiceCatalog,
TtsProvider,
+ VoiceCatalog,
get_voice_catalog,
provider_from_service,
)
-from app.services.llm_service import get_agent_llm
-from ..prompts import detect_language_prompt
-from ..structured import StructuredOutputError, invoke_json
from .config import BriefConfig
-from .detection import DetectedLanguage
from .state import BriefState
-# Only the head of the source is needed to judge language; this caps tokens.
-_DETECTION_SAMPLE_CHARS = 4000
-
# Default role per speaker slot; extra speakers beyond the list fall back to guest.
_ROLE_BY_SLOT = (
SpeakerRole.HOST,
@@ -54,30 +46,6 @@ _ROLE_BY_SLOT = (
)
-async def detect_language(
- state: BriefState, config: RunnableConfig
-) -> dict[str, Any]:
- """Detect the source language; defer (``None``) on any uncertainty."""
- brief = BriefConfig.from_runnable_config(config)
- llm = await get_agent_llm(state.db_session, brief.search_space_id)
- if llm is None:
- return {"detected_language": None}
-
- sample = (state.source_content or "")[:_DETECTION_SAMPLE_CHARS].strip()
- if not sample:
- return {"detected_language": None}
-
- messages = [
- SystemMessage(content=detect_language_prompt()),
- HumanMessage(content=f"{sample}"),
- ]
- try:
- detected = await invoke_json(llm, messages, DetectedLanguage)
- except StructuredOutputError:
- return {"detected_language": None}
- return {"detected_language": detected.language}
-
-
def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
"""Build a complete :class:`PodcastSpec` from the resolved defaults."""
brief = BriefConfig.from_runnable_config(config)
@@ -85,7 +53,6 @@ def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
catalog = get_voice_catalog()
language = _supported_language(
- detected=state.detected_language,
last_used=brief.last_used_language,
provider=provider,
catalog=catalog,
@@ -128,12 +95,11 @@ def _active_provider() -> TtsProvider:
def _supported_language(
*,
- detected: str | None,
last_used: str | None,
provider: TtsProvider,
catalog: VoiceCatalog,
) -> str:
- raw = resolve_language(LanguageContext(detected=detected, last_used=last_used))
+ raw = resolve_language(LanguageContext(last_used=last_used))
try:
language = normalize_language_tag(raw)
except ValueError:
diff --git a/surfsense_backend/app/podcasts/generation/brief/state.py b/surfsense_backend/app/podcasts/generation/brief/state.py
index 976a72df5..418fb6fa9 100644
--- a/surfsense_backend/app/podcasts/generation/brief/state.py
+++ b/surfsense_backend/app/podcasts/generation/brief/state.py
@@ -4,16 +4,11 @@ from __future__ import annotations
from dataclasses import dataclass
-from sqlalchemy.ext.asyncio import AsyncSession
-
from app.podcasts.schemas import PodcastSpec
@dataclass
class BriefState:
- """Runtime inputs and the proposed spec the graph produces."""
+ """The proposed spec the graph produces; inputs arrive via the config."""
- db_session: AsyncSession
- source_content: str
- detected_language: str | None = None
spec: PodcastSpec | None = None
diff --git a/surfsense_backend/app/podcasts/generation/prompts/__init__.py b/surfsense_backend/app/podcasts/generation/prompts/__init__.py
index 1f6d3993b..041dd4e6d 100644
--- a/surfsense_backend/app/podcasts/generation/prompts/__init__.py
+++ b/surfsense_backend/app/podcasts/generation/prompts/__init__.py
@@ -2,13 +2,11 @@
from __future__ import annotations
-from .detect_language import detect_language_prompt
from .draft_segment import draft_segment_prompt
from .plan_outline import plan_outline_prompt
from .speakers import render_speaker_roster
__all__ = [
- "detect_language_prompt",
"draft_segment_prompt",
"plan_outline_prompt",
"render_speaker_roster",
diff --git a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py b/surfsense_backend/app/podcasts/generation/prompts/detect_language.py
deleted file mode 100644
index a5ab4da5c..000000000
--- a/surfsense_backend/app/podcasts/generation/prompts/detect_language.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""Prompt for detecting the dominant natural language of source content."""
-
-from __future__ import annotations
-
-_SYSTEM = """\
-You identify the dominant natural language of a piece of source content for a \
-podcast that will be generated from it.
-
-Rules:
-- Report the language the listener-facing podcast should be spoken in, i.e. the \
-language most of the meaningful prose is written in.
-- Ignore code, markup, URLs, numbers, and proper nouns when judging.
-- If the content is too short, ambiguous, mixed without a clear majority, or not \
-natural-language prose, return null rather than guessing.
-
-Respond with strict JSON and nothing else:
-{"language": ""} or {"language": null}
-"""
-
-
-def detect_language_prompt() -> str:
- return _SYSTEM
diff --git a/surfsense_backend/app/podcasts/resolution/__init__.py b/surfsense_backend/app/podcasts/resolution/__init__.py
index ebfd3153a..19a7edfb3 100644
--- a/surfsense_backend/app/podcasts/resolution/__init__.py
+++ b/surfsense_backend/app/podcasts/resolution/__init__.py
@@ -1,8 +1,8 @@
"""Resolution: deterministic default chains for a fresh brief.
-Turns weak signals (detected language, last-used preferences) into concrete
-language and voice defaults, so the brief gate opens pre-filled and most users
-approve without editing.
+Turns the user's last-used preferences into concrete language and voice
+defaults, so the brief gate opens pre-filled and most users approve without
+editing.
"""
from __future__ import annotations
diff --git a/surfsense_backend/app/podcasts/resolution/language.py b/surfsense_backend/app/podcasts/resolution/language.py
index 2da90ef37..336d9036b 100644
--- a/surfsense_backend/app/podcasts/resolution/language.py
+++ b/surfsense_backend/app/podcasts/resolution/language.py
@@ -1,10 +1,9 @@
"""Resolve the brief's language without spending tokens at the gate.
-The chain mirrors the agreed policy: prefer a language detected from the source,
-fall back to what the user last chose, and finally default to English (which the
-user can still override in the brief). Detection itself is performed upstream
-where an LLM is available and passed in as :attr:`LanguageContext.detected`, so
-this layer stays pure and deterministic.
+The chain mirrors the agreed policy: reuse the language the user last chose, and
+otherwise default to English (which the user can still override in the brief). We
+deliberately never guess the language from the source content — proposing a
+language the user did not ask for is worse than a predictable default.
"""
from __future__ import annotations
@@ -20,7 +19,6 @@ DEFAULT_LANGUAGE = "en"
class LanguageContext:
"""Signals available when proposing a language for a fresh podcast."""
- detected: str | None = None
last_used: str | None = None
@@ -32,13 +30,6 @@ class LanguageResolver(ABC):
"""Return a language tag, or ``None`` to defer to the next resolver."""
-class DetectedLanguage(LanguageResolver):
- """Use the language detected from the source, when confident enough."""
-
- def resolve(self, context: LanguageContext) -> str | None:
- return context.detected
-
-
class LastUsedLanguage(LanguageResolver):
"""Reuse the language from the user's previous podcast."""
@@ -55,7 +46,6 @@ class DefaultLanguage(LanguageResolver):
# Order encodes the policy; prepend stronger signals here as they appear.
DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = (
- DetectedLanguage(),
LastUsedLanguage(),
DefaultLanguage(),
)
diff --git a/surfsense_backend/app/podcasts/schemas/spec.py b/surfsense_backend/app/podcasts/schemas/spec.py
index 2d3b3c74e..973e26167 100644
--- a/surfsense_backend/app/podcasts/schemas/spec.py
+++ b/surfsense_backend/app/podcasts/schemas/spec.py
@@ -30,7 +30,7 @@ _LANGUAGE_TAG = re.compile(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$")
def normalize_language_tag(value: str) -> str:
"""Validate and canonicalise a BCP-47 tag (lowercased primary subtag).
- Shared with the generation layer so detected and user-entered languages are
+ Shared with the generation layer so resolved and user-entered languages are
normalised identically before they reach a :class:`PodcastSpec`.
"""
cleaned = value.strip()
diff --git a/surfsense_backend/tests/unit/podcasts/test_resolution.py b/surfsense_backend/tests/unit/podcasts/test_resolution.py
index 4fe3df5cd..48e834096 100644
--- a/surfsense_backend/tests/unit/podcasts/test_resolution.py
+++ b/surfsense_backend/tests/unit/podcasts/test_resolution.py
@@ -2,8 +2,9 @@
Resolution is what lets most briefs need no edits: it proposes a sensible
language and a distinct voice per speaker. These tests state the policy
-("detected wins, else last-used, else English"; "two speakers should sound
+("reuse what the user last chose, else English"; "two speakers should sound
like two people") through the public resolver functions and the real catalog.
+We never guess the language from source content.
"""
from __future__ import annotations
@@ -22,13 +23,8 @@ from app.podcasts.voices import TtsProvider, get_voice_catalog
pytestmark = pytest.mark.unit
-def test_detected_language_is_preferred_over_everything():
- context = LanguageContext(detected="es", last_used="fr")
- assert resolve_language(context) == "es"
-
-
-def test_falls_back_to_last_used_when_nothing_detected():
- context = LanguageContext(detected=None, last_used="fr")
+def test_last_used_language_is_reused():
+ context = LanguageContext(last_used="fr")
assert resolve_language(context) == "fr"