refactor(podcasts): drop language detection from brief

This commit is contained in:
CREDO23 2026-06-10 20:51:38 +02:00
parent e61308387c
commit aa7aa81c16
11 changed files with 22 additions and 130 deletions

View file

@ -16,7 +16,6 @@ DEFAULT_MAX_MINUTES = 20
class BriefConfig:
"""Signals used to propose a brief; everything here is non-LLM context."""
search_space_id: int
speaker_count: int = DEFAULT_SPEAKER_COUNT
min_minutes: int = DEFAULT_MIN_MINUTES
max_minutes: int = DEFAULT_MAX_MINUTES

View file

@ -1,28 +0,0 @@
"""The language-detection reply shape, normalised to a safe tag or ``None``."""
from __future__ import annotations
from pydantic import BaseModel, field_validator
from app.podcasts.schemas import normalize_language_tag
class DetectedLanguage(BaseModel):
"""What the detector returns: a usable BCP-47 tag, or ``None`` when unsure.
A malformed or non-language reply is coerced to ``None`` so a bad detection
quietly defers to the rest of the resolution chain rather than poisoning the
spec with an invalid tag.
"""
language: str | None = None
@field_validator("language")
@classmethod
def _normalise(cls, value: str | None) -> str | None:
if value is None:
return None
try:
return normalize_language_tag(value)
except ValueError:
return None

View file

@ -1,22 +1,20 @@
"""The brief-planning graph: detect language, then propose a spec."""
"""The brief-planning graph: propose a reviewable spec from defaults."""
from __future__ import annotations
from langgraph.graph import StateGraph
from .config import BriefConfig
from .nodes import detect_language, propose_spec
from .nodes import propose_spec
from .state import BriefState
def build_brief_graph():
workflow = StateGraph(BriefState, config_schema=BriefConfig)
workflow.add_node("detect_language", detect_language)
workflow.add_node("propose_spec", propose_spec)
workflow.add_edge("__start__", "detect_language")
workflow.add_edge("detect_language", "propose_spec")
workflow.add_edge("__start__", "propose_spec")
workflow.add_edge("propose_spec", "__end__")
graph = workflow.compile()

View file

@ -1,15 +1,14 @@
"""Brief-planning nodes: detect the language, then propose a full spec.
"""Brief-planning node: propose a full spec from deterministic defaults.
Only ``detect_language`` spends tokens, and only a small sample of source text;
``propose_spec`` is pure resolution. Together they open the brief gate pre-filled
so the common case needs no edits.
``propose_spec`` is pure resolution it never spends tokens. It reuses the
user's last-used language/voices when available and otherwise falls back to
English, so the brief gate opens pre-filled and the common case needs no edits.
"""
from __future__ import annotations
from typing import Any
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.runnables import RunnableConfig
from app.config import config as app_config
@ -28,22 +27,15 @@ from app.podcasts.schemas import (
normalize_language_tag,
)
from app.podcasts.voices import (
VoiceCatalog,
TtsProvider,
VoiceCatalog,
get_voice_catalog,
provider_from_service,
)
from app.services.llm_service import get_agent_llm
from ..prompts import detect_language_prompt
from ..structured import StructuredOutputError, invoke_json
from .config import BriefConfig
from .detection import DetectedLanguage
from .state import BriefState
# Only the head of the source is needed to judge language; this caps tokens.
_DETECTION_SAMPLE_CHARS = 4000
# Default role per speaker slot; extra speakers beyond the list fall back to guest.
_ROLE_BY_SLOT = (
SpeakerRole.HOST,
@ -54,30 +46,6 @@ _ROLE_BY_SLOT = (
)
async def detect_language(
state: BriefState, config: RunnableConfig
) -> dict[str, Any]:
"""Detect the source language; defer (``None``) on any uncertainty."""
brief = BriefConfig.from_runnable_config(config)
llm = await get_agent_llm(state.db_session, brief.search_space_id)
if llm is None:
return {"detected_language": None}
sample = (state.source_content or "")[:_DETECTION_SAMPLE_CHARS].strip()
if not sample:
return {"detected_language": None}
messages = [
SystemMessage(content=detect_language_prompt()),
HumanMessage(content=f"<source_content>{sample}</source_content>"),
]
try:
detected = await invoke_json(llm, messages, DetectedLanguage)
except StructuredOutputError:
return {"detected_language": None}
return {"detected_language": detected.language}
def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
"""Build a complete :class:`PodcastSpec` from the resolved defaults."""
brief = BriefConfig.from_runnable_config(config)
@ -85,7 +53,6 @@ def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
catalog = get_voice_catalog()
language = _supported_language(
detected=state.detected_language,
last_used=brief.last_used_language,
provider=provider,
catalog=catalog,
@ -128,12 +95,11 @@ def _active_provider() -> TtsProvider:
def _supported_language(
*,
detected: str | None,
last_used: str | None,
provider: TtsProvider,
catalog: VoiceCatalog,
) -> str:
raw = resolve_language(LanguageContext(detected=detected, last_used=last_used))
raw = resolve_language(LanguageContext(last_used=last_used))
try:
language = normalize_language_tag(raw)
except ValueError:

View file

@ -4,16 +4,11 @@ from __future__ import annotations
from dataclasses import dataclass
from sqlalchemy.ext.asyncio import AsyncSession
from app.podcasts.schemas import PodcastSpec
@dataclass
class BriefState:
"""Runtime inputs and the proposed spec the graph produces."""
"""The proposed spec the graph produces; inputs arrive via the config."""
db_session: AsyncSession
source_content: str
detected_language: str | None = None
spec: PodcastSpec | None = None

View file

@ -2,13 +2,11 @@
from __future__ import annotations
from .detect_language import detect_language_prompt
from .draft_segment import draft_segment_prompt
from .plan_outline import plan_outline_prompt
from .speakers import render_speaker_roster
__all__ = [
"detect_language_prompt",
"draft_segment_prompt",
"plan_outline_prompt",
"render_speaker_roster",

View file

@ -1,22 +0,0 @@
"""Prompt for detecting the dominant natural language of source content."""
from __future__ import annotations
_SYSTEM = """\
You identify the dominant natural language of a piece of source content for a \
podcast that will be generated from it.
Rules:
- Report the language the listener-facing podcast should be spoken in, i.e. the \
language most of the meaningful prose is written in.
- Ignore code, markup, URLs, numbers, and proper nouns when judging.
- If the content is too short, ambiguous, mixed without a clear majority, or not \
natural-language prose, return null rather than guessing.
Respond with strict JSON and nothing else:
{"language": "<BCP-47 tag like en, en-US, fr, pt-BR>"} or {"language": null}
"""
def detect_language_prompt() -> str:
return _SYSTEM

View file

@ -1,8 +1,8 @@
"""Resolution: deterministic default chains for a fresh brief.
Turns weak signals (detected language, last-used preferences) into concrete
language and voice defaults, so the brief gate opens pre-filled and most users
approve without editing.
Turns the user's last-used preferences into concrete language and voice
defaults, so the brief gate opens pre-filled and most users approve without
editing.
"""
from __future__ import annotations

View file

@ -1,10 +1,9 @@
"""Resolve the brief's language without spending tokens at the gate.
The chain mirrors the agreed policy: prefer a language detected from the source,
fall back to what the user last chose, and finally default to English (which the
user can still override in the brief). Detection itself is performed upstream
where an LLM is available and passed in as :attr:`LanguageContext.detected`, so
this layer stays pure and deterministic.
The chain mirrors the agreed policy: reuse the language the user last chose, and
otherwise default to English (which the user can still override in the brief). We
deliberately never guess the language from the source content proposing a
language the user did not ask for is worse than a predictable default.
"""
from __future__ import annotations
@ -20,7 +19,6 @@ DEFAULT_LANGUAGE = "en"
class LanguageContext:
"""Signals available when proposing a language for a fresh podcast."""
detected: str | None = None
last_used: str | None = None
@ -32,13 +30,6 @@ class LanguageResolver(ABC):
"""Return a language tag, or ``None`` to defer to the next resolver."""
class DetectedLanguage(LanguageResolver):
"""Use the language detected from the source, when confident enough."""
def resolve(self, context: LanguageContext) -> str | None:
return context.detected
class LastUsedLanguage(LanguageResolver):
"""Reuse the language from the user's previous podcast."""
@ -55,7 +46,6 @@ class DefaultLanguage(LanguageResolver):
# Order encodes the policy; prepend stronger signals here as they appear.
DEFAULT_LANGUAGE_CHAIN: tuple[LanguageResolver, ...] = (
DetectedLanguage(),
LastUsedLanguage(),
DefaultLanguage(),
)

View file

@ -30,7 +30,7 @@ _LANGUAGE_TAG = re.compile(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$")
def normalize_language_tag(value: str) -> str:
"""Validate and canonicalise a BCP-47 tag (lowercased primary subtag).
Shared with the generation layer so detected and user-entered languages are
Shared with the generation layer so resolved and user-entered languages are
normalised identically before they reach a :class:`PodcastSpec`.
"""
cleaned = value.strip()

View file

@ -2,8 +2,9 @@
Resolution is what lets most briefs need no edits: it proposes a sensible
language and a distinct voice per speaker. These tests state the policy
("detected wins, else last-used, else English"; "two speakers should sound
("reuse what the user last chose, else English"; "two speakers should sound
like two people") through the public resolver functions and the real catalog.
We never guess the language from source content.
"""
from __future__ import annotations
@ -22,13 +23,8 @@ from app.podcasts.voices import TtsProvider, get_voice_catalog
pytestmark = pytest.mark.unit
def test_detected_language_is_preferred_over_everything():
context = LanguageContext(detected="es", last_used="fr")
assert resolve_language(context) == "es"
def test_falls_back_to_last_used_when_nothing_detected():
context = LanguageContext(detected=None, last_used="fr")
def test_last_used_language_is_reused():
context = LanguageContext(last_used="fr")
assert resolve_language(context) == "fr"