From aaa9f01087721787580391a4d052afd10f1f4db6 Mon Sep 17 00:00:00 2001
From: CREDO23 <bakerathierry@gmail.com>
Date: Wed, 10 Jun 2026 18:44:25 +0200
Subject: [PATCH] test(podcasts): cover brief and transcript contracts

---
 .../tests/unit/podcasts/test_spec.py          | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 surfsense_backend/tests/unit/podcasts/test_spec.py

diff --git a/surfsense_backend/tests/unit/podcasts/test_spec.py b/surfsense_backend/tests/unit/podcasts/test_spec.py
new file mode 100644
index 000000000..938515988
--- /dev/null
+++ b/surfsense_backend/tests/unit/podcasts/test_spec.py
@@ -0,0 +1,141 @@
+"""The brief and transcript contracts.
+
+A brief is what a user approves before any tokens or audio are spent, so its
+validation rules are real behavior: they are the guardrails that keep a
+nonsensical or ambiguous brief from ever reaching the expensive stages. These
+tests pin those rules through construction of the public Pydantic models.
+"""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import ValidationError
+
+from app.podcasts.schemas import (
+    DurationTarget,
+    PodcastSpec,
+    SpeakerRole,
+    SpeakerSpec,
+    Transcript,
+    TranscriptTurn,
+    normalize_language_tag,
+)
+
+pytestmark = pytest.mark.unit
+
+
+def _speaker(slot: int, voice_id: str = "kokoro:am_adam") -> SpeakerSpec:
+    return SpeakerSpec(
+        slot=slot, name=f"Speaker {slot}", role=SpeakerRole.HOST, voice_id=voice_id
+    )
+
+
+@pytest.mark.parametrize(
+    ("raw", "expected"),
+    [
+        ("EN", "en"),
+        ("en-US", "en-US"),
+        ("PT-BR", "pt-BR"),
+        ("  fr  ", "fr"),
+    ],
+)
+def test_language_is_normalized_to_canonical_form(raw, expected):
+    """The primary subtag is lowercased and surrounding space trimmed."""
+    assert normalize_language_tag(raw) == expected
+
+
+@pytest.mark.parametrize("invalid", ["", "e", "english!", "123", "en_US"])
+def test_invalid_language_tags_are_rejected(invalid):
+    """Tags that are not BCP-47-shaped never reach a brief."""
+    with pytest.raises(ValueError):
+        normalize_language_tag(invalid)
+
+
+def test_spec_normalizes_its_language_on_construction():
+    """A brief stores a canonical language regardless of how it was entered."""
+    spec = PodcastSpec(
+        language="EN-us",
+        speakers=[_speaker(0)],
+        duration=DurationTarget(min_minutes=5, max_minutes=10),
+    )
+    assert spec.language == "en-us"
+
+
+def test_speakers_must_have_unique_slots():
+    """Slots are the join key to transcript turns, so duplicates are invalid."""
+    with pytest.raises(ValidationError):
+        PodcastSpec(
+            language="en",
+            speakers=[_speaker(0), _speaker(0, voice_id="kokoro:af_bella")],
+            duration=DurationTarget(min_minutes=5, max_minutes=10),
+        )
+
+
+def test_a_brief_needs_at_least_one_speaker():
+    with pytest.raises(ValidationError):
+        PodcastSpec(
+            language="en",
+            speakers=[],
+            duration=DurationTarget(min_minutes=5, max_minutes=10),
+        )
+
+
+def test_duration_rejects_an_inverted_range():
+    """A max below the min is a user error caught at the brief gate."""
+    with pytest.raises(ValidationError):
+        DurationTarget(min_minutes=20, max_minutes=10)
+
+
+def test_duration_midpoint_is_where_drafting_aims():
+    assert DurationTarget(min_minutes=10, max_minutes=20).midpoint_minutes == 15
+
+
+def test_blank_focus_becomes_absent():
+    """Whitespace-only steer is treated as no steer."""
+    spec = PodcastSpec(
+        language="en",
+        speakers=[_speaker(0)],
+        duration=DurationTarget(min_minutes=5, max_minutes=10),
+        focus="   ",
+    )
+    assert spec.focus is None
+
+
+def test_speaker_for_returns_the_speaker_bound_to_a_slot():
+    spec = PodcastSpec(
+        language="en",
+        speakers=[_speaker(0), _speaker(1, voice_id="kokoro:af_bella")],
+        duration=DurationTarget(min_minutes=5, max_minutes=10),
+    )
+    assert spec.speaker_for(1).voice_id == "kokoro:af_bella"
+
+
+def test_speaker_for_raises_when_no_speaker_matches():
+    spec = PodcastSpec(
+        language="en",
+        speakers=[_speaker(0)],
+        duration=DurationTarget(min_minutes=5, max_minutes=10),
+    )
+    with pytest.raises(KeyError):
+        spec.speaker_for(99)
+
+
+def test_transcript_word_count_sums_spoken_words():
+    """Word count is what drafting checks runtime against, so it must be exact."""
+    transcript = Transcript(
+        turns=[
+            TranscriptTurn(speaker=0, text="hello there world"),
+            TranscriptTurn(speaker=1, text="one two"),
+        ]
+    )
+    assert transcript.word_count == 5
+
+
+def test_blank_transcript_turns_are_rejected():
+    with pytest.raises(ValidationError):
+        TranscriptTurn(speaker=0, text="   ")
+
+
+def test_a_transcript_needs_at_least_one_turn():
+    with pytest.raises(ValidationError):
+        Transcript(turns=[])