feat: agent versioning and model configurations override (#227)

* feat: add tests and migrations * feat: workflow versioning among published and draft * feat: add a new settings page to simplify workflow detail page * fix: fix tsclient generation
2026-06-19 08:28:10 +02:00 · 2026-04-08 19:20:31 +05:30 · 2026-04-08 19:20:31 +05:30 · 38d1d928b7
commit 38d1d928b7
parent f5fa9ce717
62 changed files with 10158 additions and 3131 deletions
--- a/api/tests/conftest.py
+++ b/api/tests/conftest.py
@ -80,11 +80,6 @@ class MockWorkflowModel:
    workflow_id: int = 1
    organization_id: int = 1
    workflow_configurations: Dict[str, Any] = field(default_factory=dict)
-    workflow_definition_with_fallback: Dict[str, Any] = field(default_factory=dict)
-
-    def __post_init__(self):
-        if not self.workflow_definition_with_fallback:
-            self.workflow_definition_with_fallback = DEFAULT_WORKFLOW_DEFINITION.copy()


@dataclass
@ -120,6 +115,7 @@ class MockToolModel:
    name: str
    description: str
    definition: Dict[str, Any]
+    category: str = "http_api"


@pytest.fixture
--- a/api/tests/test_resolve_effective_config.py
+++ b/api/tests/test_resolve_effective_config.py
@ -0,0 +1,353 @@
+"""
+TDD tests for resolve_effective_config().
+
+This function deep-merges workflow-level model_overrides onto the global
+UserConfiguration. Fields not overridden inherit from global.
+
+Module under test: api.services.configuration.resolve
+"""
+
+import pytest
+
+from api.schemas.user_configuration import UserConfiguration
+from api.services.configuration.registry import (
+    DeepgramSTTConfiguration,
+    ElevenlabsTTSConfiguration,
+    GoogleRealtimeLLMConfiguration,
+    OpenAILLMService,
+)
+from api.services.configuration.resolve import resolve_effective_config
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def global_config() -> UserConfiguration:
+    """A realistic global user configuration."""
+    return UserConfiguration(
+        llm=OpenAILLMService(
+            provider="openai", api_key="sk-global-llm", model="gpt-4.1"
+        ),
+        tts=ElevenlabsTTSConfiguration(
+            provider="elevenlabs",
+            api_key="el-global-tts",
+            voice="Rachel",
+            model="eleven_flash_v2_5",
+        ),
+        stt=DeepgramSTTConfiguration(
+            provider="deepgram",
+            api_key="dg-global-stt",
+            model="nova-3-general",
+            language="multi",
+        ),
+        is_realtime=False,
+        realtime=None,
+    )
+
+
+@pytest.fixture
+def global_config_realtime() -> UserConfiguration:
+    """Global config with realtime enabled."""
+    return UserConfiguration(
+        llm=OpenAILLMService(
+            provider="openai", api_key="sk-global-llm", model="gpt-4.1"
+        ),
+        tts=ElevenlabsTTSConfiguration(
+            provider="elevenlabs",
+            api_key="el-global-tts",
+            voice="Rachel",
+            model="eleven_flash_v2_5",
+        ),
+        stt=DeepgramSTTConfiguration(
+            provider="deepgram",
+            api_key="dg-global-stt",
+            model="nova-3-general",
+            language="multi",
+        ),
+        is_realtime=True,
+        realtime=GoogleRealtimeLLMConfiguration(
+            provider="google_realtime",
+            api_key="goog-global-rt",
+            model="gemini-3.1-flash-live-preview",
+            voice="Puck",
+            language="en",
+        ),
+    )
+
+
+# ---------------------------------------------------------------------------
+# No overrides → global returned unchanged
+# ---------------------------------------------------------------------------
+
+
+class TestNoOverrides:
+    def test_none_overrides_returns_global(self, global_config):
+        result = resolve_effective_config(global_config, None)
+        assert result.llm.model == "gpt-4.1"
+        assert result.tts.voice == "Rachel"
+        assert result.stt.model == "nova-3-general"
+        assert result.is_realtime is False
+
+    def test_empty_dict_overrides_returns_global(self, global_config):
+        result = resolve_effective_config(global_config, {})
+        assert result.llm.model == "gpt-4.1"
+        assert result.tts.voice == "Rachel"
+
+    def test_does_not_mutate_original(self, global_config):
+        """The original config object must not be modified."""
+        resolve_effective_config(global_config, {"llm": {"model": "gpt-4.1-mini"}})
+        assert global_config.llm.model == "gpt-4.1"
+
+
+# ---------------------------------------------------------------------------
+# Single-field overrides within a section (same provider)
+# ---------------------------------------------------------------------------
+
+
+class TestSingleFieldOverride:
+    def test_override_llm_model_only(self, global_config):
+        result = resolve_effective_config(
+            global_config, {"llm": {"model": "gpt-4.1-mini"}}
+        )
+        assert result.llm.model == "gpt-4.1-mini"
+        assert result.llm.provider == "openai"  # inherited
+        assert result.llm.api_key == "sk-global-llm"  # inherited
+
+    def test_override_tts_voice_only(self, global_config):
+        result = resolve_effective_config(global_config, {"tts": {"voice": "shimmer"}})
+        assert result.tts.voice == "shimmer"
+        assert result.tts.provider == "elevenlabs"  # inherited
+        assert result.tts.api_key == "el-global-tts"  # inherited
+
+    def test_override_stt_language_only(self, global_config):
+        result = resolve_effective_config(global_config, {"stt": {"language": "en"}})
+        assert result.stt.language == "en"
+        assert result.stt.model == "nova-3-general"  # inherited
+        assert result.stt.provider == "deepgram"  # inherited
+
+
+# ---------------------------------------------------------------------------
+# Provider change (requires full section replacement)
+# ---------------------------------------------------------------------------
+
+
+class TestProviderChange:
+    def test_override_llm_to_different_provider(self, global_config):
+        result = resolve_effective_config(
+            global_config,
+            {
+                "llm": {
+                    "provider": "groq",
+                    "api_key": "groq-key",
+                    "model": "llama-3.3-70b-versatile",
+                }
+            },
+        )
+        assert result.llm.provider == "groq"
+        assert result.llm.model == "llama-3.3-70b-versatile"
+        assert result.llm.api_key == "groq-key"
+
+    def test_provider_change_does_not_affect_other_sections(self, global_config):
+        result = resolve_effective_config(
+            global_config,
+            {
+                "llm": {
+                    "provider": "groq",
+                    "api_key": "groq-key",
+                    "model": "llama-3.3-70b-versatile",
+                }
+            },
+        )
+        # TTS and STT unchanged
+        assert result.tts.provider == "elevenlabs"
+        assert result.stt.provider == "deepgram"
+
+
+# ---------------------------------------------------------------------------
+# API key inheritance
+# ---------------------------------------------------------------------------
+
+
+class TestAPIKeyInheritance:
+    def test_no_api_key_in_override_inherits_global(self, global_config):
+        """When override omits api_key, global key is used."""
+        result = resolve_effective_config(
+            global_config, {"llm": {"model": "gpt-4.1-mini"}}
+        )
+        assert result.llm.api_key == "sk-global-llm"
+
+    def test_explicit_api_key_in_override_wins(self, global_config):
+        """When override includes api_key, it takes precedence."""
+        result = resolve_effective_config(
+            global_config,
+            {"llm": {"model": "gpt-4.1-mini", "api_key": "sk-override-key"}},
+        )
+        assert result.llm.api_key == "sk-override-key"
+
+
+# ---------------------------------------------------------------------------
+# is_realtime override
+# ---------------------------------------------------------------------------
+
+
+class TestRealtimeOverride:
+    def test_enable_realtime_on_non_realtime_global(self, global_config):
+        result = resolve_effective_config(
+            global_config,
+            {
+                "is_realtime": True,
+                "realtime": {
+                    "provider": "google_realtime",
+                    "api_key": "goog-override",
+                    "model": "gemini-3.1-flash-live-preview",
+                    "voice": "Charon",
+                    "language": "en",
+                },
+            },
+        )
+        assert result.is_realtime is True
+        assert result.realtime.provider == "google_realtime"
+        assert result.realtime.voice == "Charon"
+
+    def test_disable_realtime_on_realtime_global(self, global_config_realtime):
+        result = resolve_effective_config(
+            global_config_realtime, {"is_realtime": False}
+        )
+        assert result.is_realtime is False
+        # Realtime config may still be present but is_realtime flag controls usage
+
+    def test_override_realtime_voice_only(self, global_config_realtime):
+        result = resolve_effective_config(
+            global_config_realtime, {"realtime": {"voice": "Kore"}}
+        )
+        assert result.realtime.voice == "Kore"
+        assert result.realtime.provider == "google_realtime"  # inherited
+        assert result.realtime.api_key == "goog-global-rt"  # inherited
+
+    def test_override_is_realtime_only_without_realtime_section(self, global_config):
+        """Override is_realtime=True but provide no realtime config.
+        Should set the flag; realtime section stays None from global."""
+        result = resolve_effective_config(global_config, {"is_realtime": True})
+        assert result.is_realtime is True
+        assert result.realtime is None  # no config provided
+
+
+# ---------------------------------------------------------------------------
+# Section override when global has None for that section
+# ---------------------------------------------------------------------------
+
+
+class TestOverrideOnNullGlobal:
+    def test_override_stt_when_global_is_none(self):
+        """When global has no STT config, override creates one from scratch."""
+        config = UserConfiguration(
+            llm=OpenAILLMService(provider="openai", api_key="sk-key", model="gpt-4.1"),
+            stt=None,
+            tts=None,
+            is_realtime=False,
+        )
+        result = resolve_effective_config(
+            config,
+            {
+                "stt": {
+                    "provider": "deepgram",
+                    "api_key": "dg-new",
+                    "model": "nova-3-general",
+                    "language": "en",
+                }
+            },
+        )
+        assert result.stt is not None
+        assert result.stt.provider == "deepgram"
+        assert result.stt.model == "nova-3-general"
+
+    def test_override_realtime_when_global_is_none(self):
+        """Realtime section can be created from override even if global has none."""
+        config = UserConfiguration(
+            llm=OpenAILLMService(provider="openai", api_key="sk-key", model="gpt-4.1"),
+            is_realtime=False,
+            realtime=None,
+        )
+        result = resolve_effective_config(
+            config,
+            {
+                "is_realtime": True,
+                "realtime": {
+                    "provider": "google_realtime",
+                    "api_key": "goog-new",
+                    "model": "gemini-3.1-flash-live-preview",
+                    "voice": "Puck",
+                    "language": "en",
+                },
+            },
+        )
+        assert result.is_realtime is True
+        assert result.realtime.provider == "google_realtime"
+
+
+# ---------------------------------------------------------------------------
+# Multi-section overrides
+# ---------------------------------------------------------------------------
+
+
+class TestMultiSectionOverride:
+    def test_override_llm_and_tts_not_stt(self, global_config):
+        result = resolve_effective_config(
+            global_config,
+            {
+                "llm": {"model": "gpt-4.1-mini"},
+                "tts": {"voice": "shimmer"},
+            },
+        )
+        assert result.llm.model == "gpt-4.1-mini"
+        assert result.tts.voice == "shimmer"
+        # STT untouched
+        assert result.stt.model == "nova-3-general"
+        assert result.stt.language == "multi"
+
+    def test_override_all_sections(self, global_config):
+        result = resolve_effective_config(
+            global_config,
+            {
+                "llm": {"model": "gpt-4.1-mini"},
+                "tts": {"voice": "shimmer"},
+                "stt": {"language": "en"},
+                "is_realtime": True,
+                "realtime": {
+                    "provider": "google_realtime",
+                    "api_key": "goog-key",
+                    "model": "gemini-3.1-flash-live-preview",
+                    "voice": "Fenrir",
+                    "language": "en",
+                },
+            },
+        )
+        assert result.llm.model == "gpt-4.1-mini"
+        assert result.tts.voice == "shimmer"
+        assert result.stt.language == "en"
+        assert result.is_realtime is True
+        assert result.realtime.voice == "Fenrir"
+
+
+# ---------------------------------------------------------------------------
+# Ignored / unknown keys
+# ---------------------------------------------------------------------------
+
+
+class TestUnknownKeys:
+    def test_unknown_section_in_overrides_is_ignored(self, global_config):
+        """Override with a key that doesn't map to any section should not crash."""
+        result = resolve_effective_config(
+            global_config, {"unknown_section": {"foo": "bar"}}
+        )
+        assert result.llm.model == "gpt-4.1"
+
+    def test_embeddings_not_overridable(self, global_config):
+        """Embeddings stay global — overrides for embeddings should be ignored."""
+        result = resolve_effective_config(
+            global_config,
+            {"embeddings": {"provider": "openai", "model": "text-embedding-3-small"}},
+        )
+        assert result.embeddings is None  # was None in global, stays None
--- a/api/tests/test_user_turn_stop_scenarios.py
+++ b/api/tests/test_user_turn_stop_scenarios.py
@ -1,960 +0,0 @@
-"""Tests validating user turn stop strategy behavior during bot speaking scenarios.
-
-These tests validate the scenarios described in scenarios.md. They demonstrate
-how the ExternalUserTurnStopStrategy and UserTurnController interact when frames
-are suppressed (muted) during bot speaking.
-
-Key concepts:
- When the bot is speaking, AlwaysUserMuteStrategy causes the LLMUserAggregator
-  to suppress user frames (UserStartedSpeaking, UserStoppedSpeaking, Transcription, VAD).
- The ExternalUserTurnStopStrategy accumulates _text from TranscriptionFrames and
-  triggers a stop when _user_speaking is False and _text is truthy.
- The UserTurnController only allows a stop if _user_turn is True (a start must
-  have occurred first). When a stop is rejected, the controller unconditionally
-  resets all stop strategies, clearing any dangling state (e.g. _text).
- This unconditional reset prevents stale _text from causing premature stops
-  or contaminating subsequent turns.
-"""
-
-import asyncio
-
-import pytest
-
-from pipecat.frames.frames import (
-    BotStartedSpeakingFrame,
-    BotStoppedSpeakingFrame,
-    EndTaskFrame,
-    Frame,
-    TranscriptionFrame,
-    UserStartedSpeakingFrame,
-    UserStoppedSpeakingFrame,
-    VADUserStartedSpeakingFrame,
-    VADUserStoppedSpeakingFrame,
-)
-from pipecat.pipeline.pipeline import Pipeline
-from pipecat.pipeline.runner import PipelineRunner
-from pipecat.pipeline.task import PipelineParams, PipelineTask
-from pipecat.processors.aggregators.llm_context import LLMContext
-from pipecat.processors.aggregators.llm_response_universal import (
-    LLMAssistantAggregatorParams,
-    LLMContextAggregatorPair,
-    LLMUserAggregatorParams,
-)
-from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
-from pipecat.tests import MockLLMService
-from pipecat.turns.user_mute import AlwaysUserMuteStrategy
-from pipecat.turns.user_start import VADUserTurnStartStrategy
-from pipecat.turns.user_stop import ExternalUserTurnStopStrategy
-from pipecat.turns.user_turn_strategies import UserTurnStrategies
-from pipecat.utils.time import time_now_iso8601
-
-# Short timeout for faster tests
-STOP_STRATEGY_TIMEOUT = 0.15
-# Delay to allow async processing
-ASYNC_DELAY = 0.05
-# Delay to wait for stop strategy timeout to fire
-TIMEOUT_WAIT = STOP_STRATEGY_TIMEOUT + 0.1
-
-
-class FrameInjector(FrameProcessor):
-    """Simple processor that can inject frames into the pipeline."""
-
-    async def process_frame(self, frame: Frame, direction: FrameDirection):
-        await super().process_frame(frame, direction)
-        await self.push_frame(frame, direction)
-
-    async def inject(
-        self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
-    ):
-        """Inject a frame into the pipeline."""
-        await self.push_frame(frame, direction)
-
-
-def _build_components(llm_steps=None):
-    """Build pipeline components for testing.
-
-    Uses:
-    - VADUserTurnStartStrategy: turn starts only when VADUserStartedSpeakingFrame arrives
-    - ExternalUserTurnStopStrategy: turn stops based on UserStoppedSpeakingFrame + _text
-    - AlwaysUserMuteStrategy: suppresses user frames while bot is speaking
-
-    Returns a tuple of (injector, user_aggregator, stop_strategy, turn_controller, mock_llm, pipeline).
-    """
-    context = LLMContext()
-
-    stop_strategy = ExternalUserTurnStopStrategy(timeout=STOP_STRATEGY_TIMEOUT)
-
-    user_turn_strategies = UserTurnStrategies(
-        start=[VADUserTurnStartStrategy()],
-        stop=[stop_strategy],
-    )
-
-    user_params = LLMUserAggregatorParams(
-        user_turn_strategies=user_turn_strategies,
-        user_mute_strategies=[AlwaysUserMuteStrategy()],
-    )
-    assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
-
-    context_aggregator = LLMContextAggregatorPair(
-        context, assistant_params=assistant_params, user_params=user_params
-    )
-    user_agg = context_aggregator.user()
-    assistant_agg = context_aggregator.assistant()
-
-    if llm_steps is None:
-        llm_steps = [
-            MockLLMService.create_text_chunks(text="Response 1"),
-            MockLLMService.create_text_chunks(text="Response 2"),
-            MockLLMService.create_text_chunks(text="Response 3"),
-        ]
-    mock_llm = MockLLMService(mock_steps=llm_steps, chunk_delay=0.001)
-
-    injector = FrameInjector()
-    pipeline = Pipeline([injector, user_agg, mock_llm, assistant_agg])
-
-    turn_controller = user_agg._user_turn_controller
-
-    return (
-        injector,
-        user_agg,
-        stop_strategy,
-        turn_controller,
-        mock_llm,
-        context,
-        pipeline,
-    )
-
-
-async def _run_scenario(pipeline, inject_fn):
-    """Run a pipeline with a frame injection coroutine."""
-    task = PipelineTask(pipeline, params=PipelineParams(), enable_rtvi=False)
-    runner = PipelineRunner()
-
-    async def run():
-        await runner.run(task)
-
-    async def inject():
-        # Wait for pipeline to start (StartFrame to propagate)
-        await asyncio.sleep(ASYNC_DELAY)
-        await inject_fn()
-
-    await asyncio.gather(run(), inject())
-
-
-async def _inject_user_turn(injector, text, delay=ASYNC_DELAY):
-    """Inject a complete user turn: VAD start + external start + transcription + external stop.
-
-    This simulates what happens in a real pipeline when the user speaks:
-    1. VAD detects speech -> VADUserStartedSpeakingFrame (triggers turn start)
-    2. External processor sends UserStartedSpeakingFrame (stop strategy tracks _user_speaking)
-    3. STT produces TranscriptionFrame (stop strategy accumulates _text)
-    4. External processor sends UserStoppedSpeakingFrame (stop strategy triggers stop)
-    """
-    await injector.inject(VADUserStartedSpeakingFrame())
-    await asyncio.sleep(0)
-    await injector.inject(UserStartedSpeakingFrame())
-    await asyncio.sleep(0)
-    await injector.inject(UserStoppedSpeakingFrame())
-    await asyncio.sleep(delay)
-    await injector.inject(TranscriptionFrame(text, "user-1", time_now_iso8601()))
-
-
-class TestUserTurnStopScenarios:
-    """Test scenarios from scenarios.md.
-
-    Each test simulates a specific frame ordering to validate the interaction
-    between ExternalUserTurnStopStrategy and UserTurnController, particularly
-    around frame suppression during bot speaking.
-    """
-
-    # =========================================================================
-    # Scenario 1 (✅): All frames suppressed during bot speaking
-    #
-    # BotStartedSpeaking (muted)
-    # UserStartedSpeaking (suppressed)
-    # TranscriptionFrame (suppressed)
-    # UserStoppedSpeaking (suppressed)
-    # BotStoppedSpeaking (unmuted)
-    #
-    # Stop strategy _text is empty because TranscriptionFrame was suppressed.
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_1_all_suppressed_then_bot_stops(self):
-        """All user frames suppressed during bot speaking, then bot stops.
-
-        Expected: _text is empty, no turn triggered, clean state.
-        Second turn works correctly.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Bot speaking, all user frames suppressed ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # These are all suppressed by mute
-            await injector.inject(VADUserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(
-                TranscriptionFrame("hello", "user-1", time_now_iso8601())
-            )
-            await asyncio.sleep(0)
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(VADUserStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Assert: _text should be empty (all frames suppressed)
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after all frames suppressed, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn, "Expected _user_turn to be False"
-
-            # === Turn 2: Normal turn should work correctly ===
-            await _inject_user_turn(injector, "second turn text")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Assert: turn completed, _text cleared by reset
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after clean turn, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn, (
-                "Expected _user_turn to be False after turn"
-            )
-            assert mock_llm.get_current_step() == 1, (
-                f"Expected 1 LLM call (turn 2 only), got {mock_llm.get_current_step()}"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Scenario 2 (✅): User frames suppressed, user stops after bot stops
-    #
-    # BotStartedSpeaking (muted)
-    # UserStartedSpeaking (suppressed)
-    # TranscriptionFrame (suppressed)
-    # BotStoppedSpeaking (unmuted)
-    # UserStoppedSpeaking (stop strategy has no _text -> no trigger)
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_2_user_stops_after_bot_stops_no_text(self):
-        """User stops speaking after bot stops, but transcription was suppressed.
-
-        Expected: _text is empty because transcription was suppressed.
-        UserStoppedSpeaking doesn't trigger stop (no _text).
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Bot speaking, user frames partially suppressed ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Suppressed during bot speaking
-            await injector.inject(VADUserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(
-                TranscriptionFrame("hello", "user-1", time_now_iso8601())
-            )
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Bot stops -> unmuted
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # UserStoppedSpeaking arrives after unmute, but _text is empty
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Assert: _text empty (TranscriptionFrame was suppressed)
-            assert stop_strategy._text == "", (
-                f"Expected empty _text, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn, "Expected _user_turn to be False"
-
-            # === Turn 2: Normal turn should work ===
-            await _inject_user_turn(injector, "second turn")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == "", "Expected clean _text after turn 2"
-            assert mock_llm.get_current_step() == 1, (
-                f"Expected 1 LLM call, got {mock_llm.get_current_step()}"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Scenario 3 (✅ after fix): Transcription arrives after unmute
-    #
-    # BotStartedSpeaking (muted)
-    # UserStartedSpeaking (suppressed)
-    # BotStoppedSpeaking (unmuted)
-    # TranscriptionFrame -> stop strategy _text = "hello"
-    # UserStoppedSpeaking -> stop strategy triggers (text truthy, not speaking)
-    #   Turn controller ignores (user_turn is False), BUT unconditionally
-    #   resets stop strategies -> _text cleared. No dangling state.
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_3_transcription_after_unmute_text_cleared(self):
-        """Transcription arrives after bot stops but turn was never started.
-
-        The VADUserStartedSpeakingFrame was suppressed, so no turn started.
-        But TranscriptionFrame arrives after unmute and accumulates _text.
-        The stop strategy triggers, but the turn controller rejects it
-        (no active turn). The unconditional reset clears _text, preventing
-        any dangling state from contaminating subsequent turns.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Rejected stop with unconditional reset ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Suppressed: VAD and UserStartedSpeaking
-            await injector.inject(VADUserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Bot stops -> unmuted
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Install spy on trigger_user_turn_stopped to track every call
-            # and the _user_turn state at the time of each call.
-            trigger_stop_calls = []
-            original_trigger_stop = stop_strategy.trigger_user_turn_stopped
-
-            async def spy_trigger_stop():
-                trigger_stop_calls.append(turn_ctrl._user_turn)
-                await original_trigger_stop()
-
-            stop_strategy.trigger_user_turn_stopped = spy_trigger_stop
-
-            # TranscriptionFrame arrives AFTER unmute -> reaches stop strategy
-            await injector.inject(
-                TranscriptionFrame("hello", "user-1", time_now_iso8601())
-            )
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # UserStoppedSpeaking arrives AFTER unmute
-            # Stop strategy: _user_speaking is False (UserStartedSpeaking was suppressed),
-            # _text is "hello" -> triggers stop via _handle_user_stopped_speaking
-            # Turn controller: _user_turn is False -> rejects, but resets -> _text cleared
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Call #1: _handle_user_stopped_speaking -> _maybe_trigger_user_turn_stopped
-            assert len(trigger_stop_calls) == 1, (
-                f"Expected exactly 1 trigger_user_turn_stopped call from "
-                f"_handle_user_stopped_speaking, got {len(trigger_stop_calls)}"
-            )
-            assert trigger_stop_calls[0] is False, (
-                "Expected _user_turn=False when _handle_user_stopped_speaking triggered stop"
-            )
-
-            # Wait for _task_handler timeout period
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # The unconditional reset cleared _text after the rejected stop,
-            # so the timeout's _maybe_trigger_user_turn_stopped sees _text="" and
-            # does NOT call trigger_user_turn_stopped again.
-            assert len(trigger_stop_calls) == 1, (
-                f"Expected no additional trigger_user_turn_stopped calls after "
-                f"reset cleared _text, but got {len(trigger_stop_calls)} total call(s)"
-            )
-
-            # Restore original method
-            stop_strategy.trigger_user_turn_stopped = original_trigger_stop
-
-            # Transcript is not suppressed, so we should have hello in user aggregator
-            assert user_agg._aggregation[0].text == "hello"
-
-            # Assert: _text is cleared by the unconditional reset (no dangling state)
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn, (
-                "Expected _user_turn to be False (turn was never started)"
-            )
-            # No LLM call should have happened
-            assert mock_llm.get_current_step() == 0, (
-                f"Expected 0 LLM calls, got {mock_llm.get_current_step()}"
-            )
-
-            # === Turn 2: No premature stop, normal flow ===
-            # _text is clean, so UserStoppedSpeaking won't trigger a premature stop.
-            # The turn completes normally when the timeout fires after TranscriptionFrame.
-            # The aggregator still has dangling "hello" from turn 1, which gets
-            # combined with turn 2's "world" — this is acceptable behavior.
-            await _inject_user_turn(injector, "world")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == "", (
-                f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
-            )
-            assert mock_llm.get_current_step() == 1, (
-                f"Expected 1 LLM call (normal turn), got {mock_llm.get_current_step()}"
-            )
-
-            # The LLM received both "hello" (dangling in aggregator from turn 1)
-            # and "world" (from turn 2). This is acceptable — the aggregator's
-            # _aggregation is a separate concern from the stop strategy's _text.
-            messages = context.messages
-            user_messages = [m for m in messages if m.get("role") == "user"]
-            assert len(user_messages) == 1, (
-                f"Expected 1 user message, got {len(user_messages)}"
-            )
-            user_text = user_messages[0]["content"]
-            assert "hello" in user_text, (
-                f"Expected 'hello' (from aggregator) in user message, got: '{user_text}'"
-            )
-            assert "world" in user_text, (
-                f"Expected 'world' (from turn 2) in user message, got: '{user_text}'"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Scenario 4 (✅): User speaks after bot stops -> normal flow
-    #
-    # BotStartedSpeaking (muted)
-    # BotStoppedSpeaking (unmuted)
-    # UserStartedSpeaking (triggers interruption/turn start)
-    # TranscriptionFrame
-    # UserStoppedSpeaking
-    #
-    # Turn starts because VAD frame is not suppressed. Everything works.
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_4_user_speaks_after_bot_stops(self):
-        """User speaks after bot stops speaking. Normal flow, everything works.
-
-        All frames arrive after unmute, so VAD triggers turn start normally.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Bot speaks, then user speaks after bot stops ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Normal user turn after bot stopped
-            await _inject_user_turn(injector, "hello after bot")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Assert: clean state
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after clean turn, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn, "Expected _user_turn False after turn"
-            assert mock_llm.get_current_step() == 1, (
-                f"Expected 1 LLM call, got {mock_llm.get_current_step()}"
-            )
-
-            # === Turn 2: Another normal turn ===
-            await _inject_user_turn(injector, "second turn")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == "", "Expected clean _text after turn 2"
-            assert mock_llm.get_current_step() == 2, (
-                f"Expected 2 LLM calls, got {mock_llm.get_current_step()}"
-            )
-
-            # Verify clean context - each turn should be separate
-            user_messages = [m for m in context.messages if m.get("role") == "user"]
-            assert len(user_messages) == 2, (
-                f"Expected 2 user messages (one per turn), got {len(user_messages)}"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Scenario 5 (✅): Late transcription - all suppressed
-    #
-    # BotStartedSpeaking (muted)
-    # UserStartedSpeaking (suppressed)
-    # UserStoppedSpeaking (suppressed)
-    # TranscriptionFrame (suppressed) <- late, but still during bot speaking
-    # BotStoppedSpeaking (unmuted)
-    #
-    # Everything suppressed, _text empty. Clean state.
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_5_late_transcription_all_suppressed(self):
-        """Late transcription arrives during bot speaking. All suppressed.
-
-        Even though transcription is late, it still arrives before BotStoppedSpeaking
-        so it's still muted. Clean state.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Late transcription, but all still suppressed ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            await injector.inject(VADUserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(VADUserStoppedSpeakingFrame())
-            await asyncio.sleep(0)
-            # Late transcription - but still during bot speaking
-            await injector.inject(
-                TranscriptionFrame("late hello", "user-1", time_now_iso8601())
-            )
-            await asyncio.sleep(ASYNC_DELAY)
-
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Assert: all suppressed, clean state
-            assert stop_strategy._text == "", (
-                f"Expected empty _text, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn
-
-            # === Turn 2: Normal turn works ===
-            await _inject_user_turn(injector, "clean turn")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == ""
-            assert mock_llm.get_current_step() == 1
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Scenario 6 (✅ after fix): Late transcription arrives after bot stops
-    #
-    # BotStartedSpeaking (muted)
-    # UserStartedSpeaking (suppressed)
-    # UserStoppedSpeaking (suppressed)
-    # BotStoppedSpeaking (unmuted)
-    # TranscriptionFrame -> reaches stop strategy, _text = "late hello"
-    #
-    # Stop strategy timeout fires: _user_speaking is False (from initial state,
-    # UserStartedSpeaking was suppressed), _text truthy -> triggers stop.
-    # Turn controller: _user_turn False -> rejects, but unconditionally resets
-    # -> _text cleared. No dangling state.
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_6_late_transcription_after_unmute_text_cleared(self):
-        """Late transcription arrives after bot stops. No turn was started.
-
-        UserStartedSpeaking was suppressed so _user_turn never started.
-        The late TranscriptionFrame accumulates _text after unmute.
-        The stop strategy timeout triggers, but controller rejects it.
-        The unconditional reset clears _text, preventing dangling state.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Late transcription scenario ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Suppressed
-            await injector.inject(VADUserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(VADUserStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Bot stops -> unmuted
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Install spy on trigger_user_turn_stopped to track calls
-            trigger_stop_calls = []
-            original_trigger_stop = stop_strategy.trigger_user_turn_stopped
-
-            async def spy_trigger_stop():
-                trigger_stop_calls.append(turn_ctrl._user_turn)
-                await original_trigger_stop()
-
-            stop_strategy.trigger_user_turn_stopped = spy_trigger_stop
-
-            # Late transcription arrives after unmute
-            await injector.inject(
-                TranscriptionFrame("late hello", "user-1", time_now_iso8601())
-            )
-
-            # No UserStoppedSpeakingFrame in this scenario — the stop is
-            # triggered ONLY by the _task_handler timeout path.
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # The _task_handler timeout fired _maybe_trigger_user_turn_stopped:
-            # _user_speaking=False (UserStartedSpeaking was suppressed),
-            # _text="late hello" -> trigger_user_turn_stopped called
-            # Turn controller: _user_turn=False -> rejects, but resets -> _text cleared
-            assert len(trigger_stop_calls) == 1, (
-                f"Expected exactly 1 trigger_user_turn_stopped call from "
-                f"_task_handler timeout, got {len(trigger_stop_calls)}"
-            )
-            assert trigger_stop_calls[0] is False, (
-                "Expected _user_turn=False when timeout triggered stop"
-            )
-
-            # Restore original method
-            stop_strategy.trigger_user_turn_stopped = original_trigger_stop
-
-            # Transcript is not suppressed, so we should have late hello in user aggregator
-            assert user_agg._aggregation[0].text == "late hello"
-
-            # Assert: _text is cleared by the unconditional reset (no dangling state)
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn, "Turn should not have started"
-            assert mock_llm.get_current_step() == 0, "No LLM call expected"
-
-            # === Turn 2: No premature stop, normal flow ===
-            # _text is clean, so no premature stop occurs.
-            # The turn completes normally when the timeout fires after TranscriptionFrame.
-            # The aggregator still has dangling "late hello" from turn 1, which gets
-            # combined with turn 2's "real speech" — this is acceptable behavior.
-            await _inject_user_turn(injector, "real speech")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == "", (
-                f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
-            )
-            assert mock_llm.get_current_step() == 1, (
-                f"Expected 1 LLM call (normal turn), got {mock_llm.get_current_step()}"
-            )
-
-            # The LLM received both "late hello" (dangling in aggregator from turn 1)
-            # and "real speech" (from turn 2).
-            user_messages = [m for m in context.messages if m.get("role") == "user"]
-            assert len(user_messages) == 1, (
-                f"Expected 1 user message, got {len(user_messages)}"
-            )
-            user_text = user_messages[0]["content"]
-            assert "late hello" in user_text, (
-                f"Expected 'late hello' (from aggregator) in user message, got: '{user_text}'"
-            )
-            assert "real speech" in user_text, (
-                f"Expected 'real speech' (from turn 2) in user message, got: '{user_text}'"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Scenario 7 (✅ after fix): Late transcription - user stops before transcription
-    #
-    # BotStartedSpeaking (muted)
-    # UserStartedSpeaking (suppressed)
-    # BotStoppedSpeaking (unmuted)
-    # UserStoppedSpeaking (no _text yet -> no trigger from _handle_user_stopped)
-    # TranscriptionFrame -> _text = "late", timeout triggers stop
-    #
-    # Turn controller: _user_turn False -> rejects, but unconditionally resets
-    # -> _text cleared. No dangling state.
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_7_late_transcription_after_user_stops_text_cleared(self):
-        """User stops speaking, then late transcription arrives. No turn started.
-
-        UserStoppedSpeaking arrives first (no _text yet, so no trigger).
-        Then TranscriptionFrame arrives (sets _text). The timeout fires and
-        triggers stop, but controller rejects it. The unconditional reset
-        clears _text, preventing dangling state.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Late transcription after user stops ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Suppressed
-            await injector.inject(VADUserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Bot stops -> unmuted
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # UserStoppedSpeaking arrives after unmute, but _text is still empty
-            # -> _maybe_trigger_user_turn_stopped: _text is "" -> no trigger
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Late transcription arrives AFTER user stopped
-            await injector.inject(
-                TranscriptionFrame("late text", "user-1", time_now_iso8601())
-            )
-            # Wait for timeout to fire
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Transcript is not suppressed, so we should have late text in user aggregator
-            assert user_agg._aggregation[0].text == "late text"
-
-            # Assert: _text is cleared by the unconditional reset
-            # The timeout fired _maybe_trigger_user_turn_stopped:
-            # _user_speaking=False (was never set, UserStartedSpeaking suppressed),
-            # _text="late text" -> triggers stop
-            # Turn controller: _user_turn=False -> rejects, but resets -> _text cleared
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn
-            assert mock_llm.get_current_step() == 0
-
-            # === Turn 2: No premature stop, normal flow ===
-            # _text is clean, so no premature stop occurs.
-            # The turn completes normally when the timeout fires after TranscriptionFrame.
-            # The aggregator still has dangling "late text" from turn 1, which gets
-            # combined with turn 2's "next speech" — this is acceptable behavior.
-            await _inject_user_turn(injector, "next speech")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == "", (
-                f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
-            )
-            assert mock_llm.get_current_step() == 1
-
-            # The LLM received both "late text" (dangling in aggregator from turn 1)
-            # and "next speech" (from turn 2).
-            user_messages = [m for m in context.messages if m.get("role") == "user"]
-            assert len(user_messages) == 1
-            user_text = user_messages[0]["content"]
-            assert "late text" in user_text, (
-                f"Expected 'late text' (from aggregator) in context, got: '{user_text}'"
-            )
-            assert "next speech" in user_text, (
-                f"Expected 'next speech' (from turn 2) in context, got: '{user_text}'"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Scenario 8 (✅): Late transcription - user speaks after bot stops
-    #
-    # BotStartedSpeaking (muted)
-    # BotStoppedSpeaking (unmuted)
-    # UserStartedSpeaking (not suppressed -> turn starts, start strategies reset)
-    # UserStoppedSpeaking (no _text -> no trigger)
-    # TranscriptionFrame (timeout triggers stop)
-    #
-    # Turn controller: _user_turn IS True -> allows stop -> resets strategies
-    # Clean state!
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_scenario_8_late_transcription_user_speaks_after_bot_stops(self):
-        """User speaks after bot stops, then late transcription arrives.
-
-        Because user spoke after unmute, VAD triggers turn start -> _user_turn=True.
-        When the late transcription triggers the stop, controller allows it and
-        resets strategies. Clean state.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Turn 1: Late transcription but user spoke after unmute ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # User speaks AFTER bot stops -> not suppressed
-            await injector.inject(VADUserStartedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # User stops speaking (no _text yet, so stop strategy doesn't trigger)
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(0)
-            await injector.inject(VADUserStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Late transcription arrives
-            await injector.inject(
-                TranscriptionFrame("late but ok", "user-1", time_now_iso8601())
-            )
-            # Wait for timeout to trigger stop
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Assert: turn controller allowed the stop, strategies were reset
-            assert stop_strategy._text == "", (
-                f"Expected clean _text after allowed stop, got '{stop_strategy._text}'"
-            )
-            assert not turn_ctrl._user_turn, "Turn should have stopped"
-            assert mock_llm.get_current_step() == 1, (
-                f"Expected 1 LLM call, got {mock_llm.get_current_step()}"
-            )
-
-            # === Turn 2: Clean subsequent turn ===
-            await _inject_user_turn(injector, "clean turn")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == ""
-            assert mock_llm.get_current_step() == 2
-
-            # Verify each turn is separate in context
-            user_messages = [m for m in context.messages if m.get("role") == "user"]
-            assert len(user_messages) == 2, (
-                f"Expected 2 separate user messages, got {len(user_messages)}"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
-
-    # =========================================================================
-    # Combined test: validates _text is cleared independently after each
-    # rejected stop, preventing accumulation across muted periods.
-    # =========================================================================
-
-    @pytest.mark.asyncio
-    async def test_text_cleared_independently_across_failed_stops(self):
-        """Validates _text does not accumulate across multiple failed stop attempts.
-
-        Two consecutive muted periods with late transcriptions each trigger
-        a rejected stop. The unconditional reset clears _text after each
-        rejection, so no accumulation occurs. The subsequent normal turn
-        completes correctly.
-        """
-        injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
-            _build_components()
-        )
-
-        async def inject():
-            # === Muted period 1: _text cleared after rejected stop ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-            await injector.inject(VADUserStartedSpeakingFrame())  # suppressed
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())  # suppressed
-            await asyncio.sleep(ASYNC_DELAY)
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            # Late transcription after unmute
-            await injector.inject(
-                TranscriptionFrame("first", "user-1", time_now_iso8601())
-            )
-            await asyncio.sleep(0)
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # Transcript is not suppressed, so we should have first in user aggregator
-            assert user_agg._aggregation[0].text == "first"
-
-            # _text is cleared by the unconditional reset after rejected stop
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
-            )
-
-            # === Muted period 2: _text cleared independently, no accumulation ===
-            await injector.inject(BotStartedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-            await injector.inject(VADUserStartedSpeakingFrame())  # suppressed
-            await asyncio.sleep(0)
-            await injector.inject(UserStartedSpeakingFrame())  # suppressed
-            await asyncio.sleep(ASYNC_DELAY)
-            await injector.inject(BotStoppedSpeakingFrame())
-            await asyncio.sleep(ASYNC_DELAY)
-
-            await injector.inject(
-                TranscriptionFrame("second", "user-1", time_now_iso8601())
-            )
-            await asyncio.sleep(0)
-            await injector.inject(UserStoppedSpeakingFrame())
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            # _text is cleared again — no accumulation of "first" + "second"
-            assert stop_strategy._text == "", (
-                f"Expected empty _text after second unconditional reset, got '{stop_strategy._text}'"
-            )
-            # Aggregator accumulated both (separate concern, acceptable)
-            assert len(user_agg._aggregation) == 2
-            assert user_agg._aggregation[0].text == "first"
-            assert user_agg._aggregation[1].text == "second"
-
-            # === Turn 3: No premature stop, normal flow ===
-            # _text is clean, so no premature stop occurs.
-            # The turn completes normally when the timeout fires after TranscriptionFrame.
-            # The aggregator has dangling "first" + "second" from muted periods,
-            # which get combined with turn 3's "actual speech".
-            await _inject_user_turn(injector, "actual speech")
-            await asyncio.sleep(TIMEOUT_WAIT)
-
-            assert stop_strategy._text == "", (
-                f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
-            )
-            assert mock_llm.get_current_step() == 1
-
-            # The LLM received all three: "first" + "second" (from aggregator)
-            # and "actual speech" (from turn 3).
-            user_messages = [m for m in context.messages if m.get("role") == "user"]
-            assert len(user_messages) == 1, (
-                f"Expected 1 user message, got {len(user_messages)}"
-            )
-            user_text = user_messages[0]["content"]
-            assert "first" in user_text, f"Expected 'first' in '{user_text}'"
-            assert "second" in user_text, f"Expected 'second' in '{user_text}'"
-            assert "actual speech" in user_text, (
-                f"Expected 'actual speech' in '{user_text}'"
-            )
-
-            await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
-
-        await _run_scenario(pipeline, inject)
--- a/api/tests/test_workflow_versioning.py
+++ b/api/tests/test_workflow_versioning.py
@ -0,0 +1,608 @@
+"""
+TDD tests for workflow versioning lifecycle.
+
+Tests the version lifecycle on WorkflowDefinitionModel:
+  - status: draft / published / archived
+  - version_number: sequential per workflow
+  - released_definition_id on WorkflowModel
+
+Modules under test:
+  - api.db.workflow_client (new versioning methods)
+  - api.db.models (new columns on WorkflowDefinitionModel, WorkflowModel)
+
+These are DB integration tests using the transactional test session.
+"""
+
+import pytest
+
+from api.db.models import (
+    OrganizationModel,
+    UserModel,
+)
+
+# Sample workflow definitions (graph JSON)
+GRAPH_V1 = {
+    "nodes": [
+        {"id": "1", "type": "startCall", "data": {"name": "Start", "prompt": "Hello"}},
+        {"id": "2", "type": "endCall", "data": {"name": "End", "prompt": "Bye"}},
+    ],
+    "edges": [{"id": "e1", "source": "1", "target": "2", "data": {"label": "End"}}],
+}
+
+GRAPH_V2 = {
+    "nodes": [
+        {
+            "id": "1",
+            "type": "startCall",
+            "data": {"name": "Start", "prompt": "Hello v2"},
+        },
+        {
+            "id": "2",
+            "type": "agentNode",
+            "data": {"name": "Agent", "prompt": "Collect info"},
+        },
+        {"id": "3", "type": "endCall", "data": {"name": "End", "prompt": "Bye"}},
+    ],
+    "edges": [
+        {"id": "e1", "source": "1", "target": "2", "data": {"label": "Collect"}},
+        {"id": "e2", "source": "2", "target": "3", "data": {"label": "End"}},
+    ],
+}
+
+GRAPH_V3 = {
+    "nodes": [
+        {
+            "id": "1",
+            "type": "startCall",
+            "data": {"name": "Start", "prompt": "Hello v3"},
+        },
+        {"id": "2", "type": "endCall", "data": {"name": "End", "prompt": "Goodbye"}},
+    ],
+    "edges": [{"id": "e1", "source": "1", "target": "2", "data": {"label": "End"}}],
+}
+
+CONFIG_V1 = {"max_call_duration": 300}
+CONFIG_V2 = {
+    "max_call_duration": 600,
+    "model_overrides": {"llm": {"model": "gpt-4.1-mini"}},
+}
+TEMPLATE_VARS_V1 = {"company_name": "Acme"}
+TEMPLATE_VARS_V2 = {"company_name": "Acme Inc"}
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+async def org_and_user(async_session):
+    """Create an organization and user for workflow tests."""
+    org = OrganizationModel(provider_id="test-org-versioning")
+    async_session.add(org)
+    await async_session.flush()
+
+    user = UserModel(
+        provider_id="test-user-versioning", selected_organization_id=org.id
+    )
+    async_session.add(user)
+    await async_session.flush()
+
+    return org, user
+
+
+@pytest.fixture
+async def workflow_with_v1(db_session, org_and_user):
+    """Create a workflow — should produce V1 as published."""
+    org, user = org_and_user
+    workflow = await db_session.create_workflow(
+        name="Test Workflow",
+        workflow_definition=GRAPH_V1,
+        user_id=user.id,
+        organization_id=org.id,
+    )
+    return workflow, user
+
+
+# ---------------------------------------------------------------------------
+# Workflow creation → V1 published
+# ---------------------------------------------------------------------------
+
+
+class TestWorkflowCreation:
+    async def test_create_workflow_produces_published_v1(
+        self, db_session, org_and_user
+    ):
+        """Creating a new workflow should produce exactly one definition
+        with status='published' and version_number=1."""
+        org, user = org_and_user
+        workflow = await db_session.create_workflow(
+            name="New Workflow",
+            workflow_definition=GRAPH_V1,
+            user_id=user.id,
+            organization_id=org.id,
+        )
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        assert len(versions) == 1
+
+        v1 = versions[0]
+        assert v1.status == "published"
+        assert v1.version_number == 1
+        assert v1.workflow_json == GRAPH_V1
+
+    async def test_create_workflow_sets_released_pointer(
+        self, db_session, org_and_user
+    ):
+        """The workflow's released_definition_id should point to V1."""
+        org, user = org_and_user
+        workflow = await db_session.create_workflow(
+            name="Pointer Test",
+            workflow_definition=GRAPH_V1,
+            user_id=user.id,
+            organization_id=org.id,
+        )
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        assert workflow.released_definition_id == versions[0].id
+
+
+# ---------------------------------------------------------------------------
+# Saving a draft
+# ---------------------------------------------------------------------------
+
+
+class TestSaveDraft:
+    async def test_save_draft_creates_draft_version(self, db_session, workflow_with_v1):
+        """Saving changes to a published workflow creates a draft version."""
+        workflow, user = workflow_with_v1
+
+        draft = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+            workflow_configurations=CONFIG_V2,
+            template_context_variables=TEMPLATE_VARS_V2,
+        )
+
+        assert draft.status == "draft"
+        assert draft.version_number == 2
+        assert draft.workflow_json == GRAPH_V2
+        assert draft.workflow_configurations == CONFIG_V2
+        assert draft.template_context_variables == TEMPLATE_VARS_V2
+
+    async def test_save_draft_does_not_change_released_pointer(
+        self, db_session, workflow_with_v1
+    ):
+        """Creating a draft must not move the released pointer."""
+        workflow, user = workflow_with_v1
+        original_released_id = workflow.released_definition_id
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+
+        refreshed = await db_session.get_workflow(workflow.id)
+        assert refreshed.released_definition_id == original_released_id
+
+    async def test_save_draft_twice_updates_in_place(
+        self, db_session, workflow_with_v1
+    ):
+        """Saving a second draft should update the existing draft, not create a new row."""
+        workflow, user = workflow_with_v1
+
+        draft1 = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+
+        draft2 = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V3,
+        )
+
+        assert draft1.id == draft2.id  # same row
+        assert draft2.workflow_json == GRAPH_V3
+        assert draft2.version_number == 2  # unchanged
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        assert len(versions) == 2  # V1 published + V2 draft, no extras
+
+    async def test_save_draft_with_only_config_change(
+        self, db_session, workflow_with_v1
+    ):
+        """A draft can change only configs, keeping the same graph."""
+        workflow, user = workflow_with_v1
+
+        draft = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V1,  # same graph
+            workflow_configurations=CONFIG_V2,  # different config
+        )
+
+        assert draft.status == "draft"
+        assert draft.workflow_json == GRAPH_V1
+        assert draft.workflow_configurations == CONFIG_V2
+
+
+# ---------------------------------------------------------------------------
+# Publishing a draft
+# ---------------------------------------------------------------------------
+
+
+class TestPublishDraft:
+    async def test_publish_promotes_draft_to_published(
+        self, db_session, workflow_with_v1
+    ):
+        """Publishing moves draft → published and old published → archived."""
+        workflow, user = workflow_with_v1
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+            workflow_configurations=CONFIG_V2,
+        )
+
+        published = await db_session.publish_workflow_draft(workflow.id)
+
+        assert published.status == "published"
+        assert published.workflow_json == GRAPH_V2
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        statuses = {v.version_number: v.status for v in versions}
+        assert statuses[1] == "archived"
+        assert statuses[2] == "published"
+
+    async def test_publish_updates_released_pointer(self, db_session, workflow_with_v1):
+        """After publishing, released_definition_id should point to the new version."""
+        workflow, user = workflow_with_v1
+
+        draft = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+
+        await db_session.publish_workflow_draft(workflow.id)
+
+        refreshed = await db_session.get_workflow(workflow.id)
+        assert refreshed.released_definition_id == draft.id
+
+    async def test_publish_sets_published_at(self, db_session, workflow_with_v1):
+        """Published version should have a published_at timestamp."""
+        workflow, user = workflow_with_v1
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+
+        published = await db_session.publish_workflow_draft(workflow.id)
+        assert published.published_at is not None
+
+    async def test_publish_with_no_draft_raises(self, db_session, workflow_with_v1):
+        """Publishing when no draft exists should raise an error."""
+        workflow, user = workflow_with_v1
+
+        with pytest.raises(ValueError, match="[Nn]o draft"):
+            await db_session.publish_workflow_draft(workflow.id)
+
+    async def test_exactly_one_published_after_multiple_cycles(
+        self, db_session, workflow_with_v1
+    ):
+        """After several draft/publish cycles, exactly one version is published."""
+        workflow, user = workflow_with_v1
+
+        # Cycle 1
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+        await db_session.publish_workflow_draft(workflow.id)
+
+        # Cycle 2
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V3,
+        )
+        await db_session.publish_workflow_draft(workflow.id)
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        published = [v for v in versions if v.status == "published"]
+        assert len(published) == 1
+        assert published[0].version_number == 3
+
+
+# ---------------------------------------------------------------------------
+# Discarding a draft
+# ---------------------------------------------------------------------------
+
+
+class TestDiscardDraft:
+    async def test_discard_removes_draft(self, db_session, workflow_with_v1):
+        """Discarding a draft should delete the draft row."""
+        workflow, user = workflow_with_v1
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+
+        await db_session.discard_workflow_draft(workflow.id)
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        assert len(versions) == 1
+        assert versions[0].status == "published"
+
+    async def test_discard_does_not_affect_published(
+        self, db_session, workflow_with_v1
+    ):
+        """Published version and released pointer are unchanged after discard."""
+        workflow, user = workflow_with_v1
+        original_released_id = workflow.released_definition_id
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+        await db_session.discard_workflow_draft(workflow.id)
+
+        refreshed = await db_session.get_workflow(workflow.id)
+        assert refreshed.released_definition_id == original_released_id
+
+    async def test_discard_when_no_draft_raises(self, db_session, workflow_with_v1):
+        """Discarding when no draft exists should raise an error."""
+        workflow, user = workflow_with_v1
+
+        with pytest.raises(ValueError, match="[Nn]o draft"):
+            await db_session.discard_workflow_draft(workflow.id)
+
+    async def test_new_draft_after_discard_gets_next_version_number(
+        self, db_session, workflow_with_v1
+    ):
+        """After discarding V2 draft, the next draft should still be V2
+        (since V2 was deleted and never published)."""
+        workflow, user = workflow_with_v1
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+        await db_session.discard_workflow_draft(workflow.id)
+
+        new_draft = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V3,
+        )
+        # Version number reuse is acceptable since V2 was never published
+        assert new_draft.version_number == 2
+
+
+# ---------------------------------------------------------------------------
+# Reverting to an archived version
+# ---------------------------------------------------------------------------
+
+
+class TestRevert:
+    async def _publish_v2(self, db_session, workflow):
+        """Helper: create and publish V2, making V1 archived."""
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+            workflow_configurations=CONFIG_V2,
+            template_context_variables=TEMPLATE_VARS_V2,
+        )
+        return await db_session.publish_workflow_draft(workflow.id)
+
+    async def test_revert_creates_draft_from_archived(
+        self, db_session, workflow_with_v1
+    ):
+        """Reverting copies the archived version's full snapshot into a new draft."""
+        workflow, user = workflow_with_v1
+
+        # Get V1's definition ID before it gets archived
+        versions_before = await db_session.get_workflow_versions(workflow.id)
+        v1_id = versions_before[0].id
+
+        # Publish V2, archiving V1
+        await self._publish_v2(db_session, workflow)
+
+        # Revert to V1
+        draft = await db_session.revert_to_version(workflow.id, v1_id)
+
+        assert draft.status == "draft"
+        assert draft.workflow_json == GRAPH_V1
+
+    async def test_revert_preserves_all_snapshot_fields(
+        self, db_session, workflow_with_v1
+    ):
+        """Revert should copy graph, configs, and template vars."""
+        workflow, user = workflow_with_v1
+
+        # Publish V2 with full config
+        v2 = await self._publish_v2(db_session, workflow)
+
+        # Publish V3, archiving V2
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V3,
+        )
+        await db_session.publish_workflow_draft(workflow.id)
+
+        # Revert to V2
+        draft = await db_session.revert_to_version(workflow.id, v2.id)
+
+        assert draft.workflow_json == GRAPH_V2
+        assert draft.workflow_configurations == CONFIG_V2
+        assert draft.template_context_variables == TEMPLATE_VARS_V2
+
+    async def test_revert_when_draft_exists_raises(self, db_session, workflow_with_v1):
+        """Cannot revert when a draft already exists — must discard first."""
+        workflow, user = workflow_with_v1
+        versions = await db_session.get_workflow_versions(workflow.id)
+        v1_id = versions[0].id
+
+        await self._publish_v2(db_session, workflow)
+
+        # Create a draft
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V3,
+        )
+
+        with pytest.raises(ValueError, match="[Dd]raft.*exists"):
+            await db_session.revert_to_version(workflow.id, v1_id)
+
+    async def test_revert_does_not_change_released_pointer(
+        self, db_session, workflow_with_v1
+    ):
+        """Revert creates a draft — the released pointer stays on the published version."""
+        workflow, user = workflow_with_v1
+        versions = await db_session.get_workflow_versions(workflow.id)
+        v1_id = versions[0].id
+
+        v2 = await self._publish_v2(db_session, workflow)
+
+        await db_session.revert_to_version(workflow.id, v1_id)
+
+        refreshed = await db_session.get_workflow(workflow.id)
+        assert refreshed.released_definition_id == v2.id  # still V2
+
+
+# ---------------------------------------------------------------------------
+# Version listing & ordering
+# ---------------------------------------------------------------------------
+
+
+class TestVersionListing:
+    async def test_versions_ordered_by_version_number_desc(
+        self, db_session, workflow_with_v1
+    ):
+        """Versions should be returned newest first."""
+        workflow, user = workflow_with_v1
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+        await db_session.publish_workflow_draft(workflow.id)
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V3,
+        )
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        version_numbers = [v.version_number for v in versions]
+        assert version_numbers == sorted(version_numbers, reverse=True)
+
+    async def test_versions_include_status(self, db_session, workflow_with_v1):
+        """Each version should have an explicit status."""
+        workflow, user = workflow_with_v1
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+        await db_session.publish_workflow_draft(workflow.id)
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V3,
+        )
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        statuses = {v.version_number: v.status for v in versions}
+        assert statuses == {1: "archived", 2: "published", 3: "draft"}
+
+
+# ---------------------------------------------------------------------------
+# Version data stored on definition, not workflow
+# ---------------------------------------------------------------------------
+
+
+class TestVersionDataOnDefinition:
+    async def test_configs_stored_on_definition(self, db_session, workflow_with_v1):
+        """workflow_configurations should be on the definition, not just the workflow."""
+        workflow, user = workflow_with_v1
+
+        draft = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+            workflow_configurations=CONFIG_V2,
+            template_context_variables=TEMPLATE_VARS_V2,
+        )
+
+        assert draft.workflow_configurations == CONFIG_V2
+        assert draft.template_context_variables == TEMPLATE_VARS_V2
+
+    async def test_different_versions_have_different_configs(
+        self, db_session, workflow_with_v1
+    ):
+        """V1 and V2 can have different configs stored independently."""
+        workflow, user = workflow_with_v1
+
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+            workflow_configurations=CONFIG_V2,
+        )
+        await db_session.publish_workflow_draft(workflow.id)
+
+        versions = await db_session.get_workflow_versions(workflow.id)
+        configs_by_version = {
+            v.version_number: v.workflow_configurations for v in versions
+        }
+
+        assert configs_by_version[1] != configs_by_version[2]
+
+
+# ---------------------------------------------------------------------------
+# Run creation uses published (or draft for testing)
+# ---------------------------------------------------------------------------
+
+
+class TestRunDefinitionBinding:
+    async def test_campaign_run_uses_published_version(
+        self, db_session, workflow_with_v1
+    ):
+        """A campaign-initiated run should use the published version, not draft."""
+        workflow, user = workflow_with_v1
+
+        # Create a draft (unpublished)
+        await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+
+        # Create a run (simulating campaign dispatch)
+        run = await db_session.create_workflow_run(
+            name="Campaign Run",
+            workflow_id=workflow.id,
+            mode="webrtc",
+            user_id=user.id,
+        )
+
+        # Run should be bound to the published V1, not the draft V2
+        versions = await db_session.get_workflow_versions(workflow.id)
+        published = next(v for v in versions if v.status == "published")
+        assert run.definition_id == published.id
+
+    async def test_test_run_uses_draft_if_exists(self, db_session, workflow_with_v1):
+        """A test/phone call should use the draft version for pre-publish testing."""
+        workflow, user = workflow_with_v1
+
+        draft = await db_session.save_workflow_draft(
+            workflow_id=workflow.id,
+            workflow_definition=GRAPH_V2,
+        )
+
+        # Create a test run
+        run = await db_session.create_workflow_run(
+            name="Test Run",
+            workflow_id=workflow.id,
+            mode="webrtc",  # test mode
+            user_id=user.id,
+            use_draft=True,
+        )
+
+        assert run.definition_id == draft.id