From aaa9f01087721787580391a4d052afd10f1f4db6 Mon Sep 17 00:00:00 2001 From: CREDO23 Date: Wed, 10 Jun 2026 18:44:25 +0200 Subject: [PATCH] test(podcasts): cover brief and transcript contracts --- .../tests/unit/podcasts/test_spec.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 surfsense_backend/tests/unit/podcasts/test_spec.py diff --git a/surfsense_backend/tests/unit/podcasts/test_spec.py b/surfsense_backend/tests/unit/podcasts/test_spec.py new file mode 100644 index 000000000..938515988 --- /dev/null +++ b/surfsense_backend/tests/unit/podcasts/test_spec.py @@ -0,0 +1,141 @@ +"""The brief and transcript contracts. + +A brief is what a user approves before any tokens or audio are spent, so its +validation rules are real behavior: they are the guardrails that keep a +nonsensical or ambiguous brief from ever reaching the expensive stages. These +tests pin those rules through construction of the public Pydantic models. +""" + +from __future__ import annotations + +import pytest +from pydantic import ValidationError + +from app.podcasts.schemas import ( + DurationTarget, + PodcastSpec, + SpeakerRole, + SpeakerSpec, + Transcript, + TranscriptTurn, + normalize_language_tag, +) + +pytestmark = pytest.mark.unit + + +def _speaker(slot: int, voice_id: str = "kokoro:am_adam") -> SpeakerSpec: + return SpeakerSpec( + slot=slot, name=f"Speaker {slot}", role=SpeakerRole.HOST, voice_id=voice_id + ) + + +@pytest.mark.parametrize( + ("raw", "expected"), + [ + ("EN", "en"), + ("en-US", "en-US"), + ("PT-BR", "pt-BR"), + (" fr ", "fr"), + ], +) +def test_language_is_normalized_to_canonical_form(raw, expected): + """The primary subtag is lowercased and surrounding space trimmed.""" + assert normalize_language_tag(raw) == expected + + +@pytest.mark.parametrize("invalid", ["", "e", "english!", "123", "en_US"]) +def test_invalid_language_tags_are_rejected(invalid): + """Tags that are not BCP-47-shaped never reach a brief.""" + with pytest.raises(ValueError): + normalize_language_tag(invalid) + + +def test_spec_normalizes_its_language_on_construction(): + """A brief stores a canonical language regardless of how it was entered.""" + spec = PodcastSpec( + language="EN-us", + speakers=[_speaker(0)], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + assert spec.language == "en-us" + + +def test_speakers_must_have_unique_slots(): + """Slots are the join key to transcript turns, so duplicates are invalid.""" + with pytest.raises(ValidationError): + PodcastSpec( + language="en", + speakers=[_speaker(0), _speaker(0, voice_id="kokoro:af_bella")], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + + +def test_a_brief_needs_at_least_one_speaker(): + with pytest.raises(ValidationError): + PodcastSpec( + language="en", + speakers=[], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + + +def test_duration_rejects_an_inverted_range(): + """A max below the min is a user error caught at the brief gate.""" + with pytest.raises(ValidationError): + DurationTarget(min_minutes=20, max_minutes=10) + + +def test_duration_midpoint_is_where_drafting_aims(): + assert DurationTarget(min_minutes=10, max_minutes=20).midpoint_minutes == 15 + + +def test_blank_focus_becomes_absent(): + """Whitespace-only steer is treated as no steer.""" + spec = PodcastSpec( + language="en", + speakers=[_speaker(0)], + duration=DurationTarget(min_minutes=5, max_minutes=10), + focus=" ", + ) + assert spec.focus is None + + +def test_speaker_for_returns_the_speaker_bound_to_a_slot(): + spec = PodcastSpec( + language="en", + speakers=[_speaker(0), _speaker(1, voice_id="kokoro:af_bella")], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + assert spec.speaker_for(1).voice_id == "kokoro:af_bella" + + +def test_speaker_for_raises_when_no_speaker_matches(): + spec = PodcastSpec( + language="en", + speakers=[_speaker(0)], + duration=DurationTarget(min_minutes=5, max_minutes=10), + ) + with pytest.raises(KeyError): + spec.speaker_for(99) + + +def test_transcript_word_count_sums_spoken_words(): + """Word count is what drafting checks runtime against, so it must be exact.""" + transcript = Transcript( + turns=[ + TranscriptTurn(speaker=0, text="hello there world"), + TranscriptTurn(speaker=1, text="one two"), + ] + ) + assert transcript.word_count == 5 + + +def test_blank_transcript_turns_are_rejected(): + with pytest.raises(ValidationError): + TranscriptTurn(speaker=0, text=" ") + + +def test_a_transcript_needs_at_least_one_turn(): + with pytest.raises(ValidationError): + Transcript(turns=[])