test(podcasts): cover brief and transcript contracts

This commit is contained in:
CREDO23 2026-06-10 18:44:25 +02:00
parent 9d8e4e4f9d
commit aaa9f01087

View file

@ -0,0 +1,141 @@
"""The brief and transcript contracts.
A brief is what a user approves before any tokens or audio are spent, so its
validation rules are real behavior: they are the guardrails that keep a
nonsensical or ambiguous brief from ever reaching the expensive stages. These
tests pin those rules through construction of the public Pydantic models.
"""
from __future__ import annotations
import pytest
from pydantic import ValidationError
from app.podcasts.schemas import (
DurationTarget,
PodcastSpec,
SpeakerRole,
SpeakerSpec,
Transcript,
TranscriptTurn,
normalize_language_tag,
)
pytestmark = pytest.mark.unit
def _speaker(slot: int, voice_id: str = "kokoro:am_adam") -> SpeakerSpec:
return SpeakerSpec(
slot=slot, name=f"Speaker {slot}", role=SpeakerRole.HOST, voice_id=voice_id
)
@pytest.mark.parametrize(
("raw", "expected"),
[
("EN", "en"),
("en-US", "en-US"),
("PT-BR", "pt-BR"),
(" fr ", "fr"),
],
)
def test_language_is_normalized_to_canonical_form(raw, expected):
"""The primary subtag is lowercased and surrounding space trimmed."""
assert normalize_language_tag(raw) == expected
@pytest.mark.parametrize("invalid", ["", "e", "english!", "123", "en_US"])
def test_invalid_language_tags_are_rejected(invalid):
"""Tags that are not BCP-47-shaped never reach a brief."""
with pytest.raises(ValueError):
normalize_language_tag(invalid)
def test_spec_normalizes_its_language_on_construction():
"""A brief stores a canonical language regardless of how it was entered."""
spec = PodcastSpec(
language="EN-us",
speakers=[_speaker(0)],
duration=DurationTarget(min_minutes=5, max_minutes=10),
)
assert spec.language == "en-us"
def test_speakers_must_have_unique_slots():
"""Slots are the join key to transcript turns, so duplicates are invalid."""
with pytest.raises(ValidationError):
PodcastSpec(
language="en",
speakers=[_speaker(0), _speaker(0, voice_id="kokoro:af_bella")],
duration=DurationTarget(min_minutes=5, max_minutes=10),
)
def test_a_brief_needs_at_least_one_speaker():
with pytest.raises(ValidationError):
PodcastSpec(
language="en",
speakers=[],
duration=DurationTarget(min_minutes=5, max_minutes=10),
)
def test_duration_rejects_an_inverted_range():
"""A max below the min is a user error caught at the brief gate."""
with pytest.raises(ValidationError):
DurationTarget(min_minutes=20, max_minutes=10)
def test_duration_midpoint_is_where_drafting_aims():
assert DurationTarget(min_minutes=10, max_minutes=20).midpoint_minutes == 15
def test_blank_focus_becomes_absent():
"""Whitespace-only steer is treated as no steer."""
spec = PodcastSpec(
language="en",
speakers=[_speaker(0)],
duration=DurationTarget(min_minutes=5, max_minutes=10),
focus=" ",
)
assert spec.focus is None
def test_speaker_for_returns_the_speaker_bound_to_a_slot():
spec = PodcastSpec(
language="en",
speakers=[_speaker(0), _speaker(1, voice_id="kokoro:af_bella")],
duration=DurationTarget(min_minutes=5, max_minutes=10),
)
assert spec.speaker_for(1).voice_id == "kokoro:af_bella"
def test_speaker_for_raises_when_no_speaker_matches():
spec = PodcastSpec(
language="en",
speakers=[_speaker(0)],
duration=DurationTarget(min_minutes=5, max_minutes=10),
)
with pytest.raises(KeyError):
spec.speaker_for(99)
def test_transcript_word_count_sums_spoken_words():
"""Word count is what drafting checks runtime against, so it must be exact."""
transcript = Transcript(
turns=[
TranscriptTurn(speaker=0, text="hello there world"),
TranscriptTurn(speaker=1, text="one two"),
]
)
assert transcript.word_count == 5
def test_blank_transcript_turns_are_rejected():
with pytest.raises(ValidationError):
TranscriptTurn(speaker=0, text=" ")
def test_a_transcript_needs_at_least_one_turn():
with pytest.raises(ValidationError):
Transcript(turns=[])