feat: agent versioning and model configurations override (#227)

* feat: add tests and migrations

* feat: workflow versioning among published and draft

* feat: add a new settings page to simplify workflow detail page

* fix: fix tsclient generation
This commit is contained in:
Abhishek 2026-04-08 19:20:31 +05:30 committed by GitHub
parent f5fa9ce717
commit 38d1d928b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
62 changed files with 10158 additions and 3131 deletions

View file

@ -80,11 +80,6 @@ class MockWorkflowModel:
workflow_id: int = 1
organization_id: int = 1
workflow_configurations: Dict[str, Any] = field(default_factory=dict)
workflow_definition_with_fallback: Dict[str, Any] = field(default_factory=dict)
def __post_init__(self):
if not self.workflow_definition_with_fallback:
self.workflow_definition_with_fallback = DEFAULT_WORKFLOW_DEFINITION.copy()
@dataclass
@ -120,6 +115,7 @@ class MockToolModel:
name: str
description: str
definition: Dict[str, Any]
category: str = "http_api"
@pytest.fixture

View file

@ -0,0 +1,353 @@
"""
TDD tests for resolve_effective_config().
This function deep-merges workflow-level model_overrides onto the global
UserConfiguration. Fields not overridden inherit from global.
Module under test: api.services.configuration.resolve
"""
import pytest
from api.schemas.user_configuration import UserConfiguration
from api.services.configuration.registry import (
DeepgramSTTConfiguration,
ElevenlabsTTSConfiguration,
GoogleRealtimeLLMConfiguration,
OpenAILLMService,
)
from api.services.configuration.resolve import resolve_effective_config
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def global_config() -> UserConfiguration:
"""A realistic global user configuration."""
return UserConfiguration(
llm=OpenAILLMService(
provider="openai", api_key="sk-global-llm", model="gpt-4.1"
),
tts=ElevenlabsTTSConfiguration(
provider="elevenlabs",
api_key="el-global-tts",
voice="Rachel",
model="eleven_flash_v2_5",
),
stt=DeepgramSTTConfiguration(
provider="deepgram",
api_key="dg-global-stt",
model="nova-3-general",
language="multi",
),
is_realtime=False,
realtime=None,
)
@pytest.fixture
def global_config_realtime() -> UserConfiguration:
"""Global config with realtime enabled."""
return UserConfiguration(
llm=OpenAILLMService(
provider="openai", api_key="sk-global-llm", model="gpt-4.1"
),
tts=ElevenlabsTTSConfiguration(
provider="elevenlabs",
api_key="el-global-tts",
voice="Rachel",
model="eleven_flash_v2_5",
),
stt=DeepgramSTTConfiguration(
provider="deepgram",
api_key="dg-global-stt",
model="nova-3-general",
language="multi",
),
is_realtime=True,
realtime=GoogleRealtimeLLMConfiguration(
provider="google_realtime",
api_key="goog-global-rt",
model="gemini-3.1-flash-live-preview",
voice="Puck",
language="en",
),
)
# ---------------------------------------------------------------------------
# No overrides → global returned unchanged
# ---------------------------------------------------------------------------
class TestNoOverrides:
def test_none_overrides_returns_global(self, global_config):
result = resolve_effective_config(global_config, None)
assert result.llm.model == "gpt-4.1"
assert result.tts.voice == "Rachel"
assert result.stt.model == "nova-3-general"
assert result.is_realtime is False
def test_empty_dict_overrides_returns_global(self, global_config):
result = resolve_effective_config(global_config, {})
assert result.llm.model == "gpt-4.1"
assert result.tts.voice == "Rachel"
def test_does_not_mutate_original(self, global_config):
"""The original config object must not be modified."""
resolve_effective_config(global_config, {"llm": {"model": "gpt-4.1-mini"}})
assert global_config.llm.model == "gpt-4.1"
# ---------------------------------------------------------------------------
# Single-field overrides within a section (same provider)
# ---------------------------------------------------------------------------
class TestSingleFieldOverride:
def test_override_llm_model_only(self, global_config):
result = resolve_effective_config(
global_config, {"llm": {"model": "gpt-4.1-mini"}}
)
assert result.llm.model == "gpt-4.1-mini"
assert result.llm.provider == "openai" # inherited
assert result.llm.api_key == "sk-global-llm" # inherited
def test_override_tts_voice_only(self, global_config):
result = resolve_effective_config(global_config, {"tts": {"voice": "shimmer"}})
assert result.tts.voice == "shimmer"
assert result.tts.provider == "elevenlabs" # inherited
assert result.tts.api_key == "el-global-tts" # inherited
def test_override_stt_language_only(self, global_config):
result = resolve_effective_config(global_config, {"stt": {"language": "en"}})
assert result.stt.language == "en"
assert result.stt.model == "nova-3-general" # inherited
assert result.stt.provider == "deepgram" # inherited
# ---------------------------------------------------------------------------
# Provider change (requires full section replacement)
# ---------------------------------------------------------------------------
class TestProviderChange:
def test_override_llm_to_different_provider(self, global_config):
result = resolve_effective_config(
global_config,
{
"llm": {
"provider": "groq",
"api_key": "groq-key",
"model": "llama-3.3-70b-versatile",
}
},
)
assert result.llm.provider == "groq"
assert result.llm.model == "llama-3.3-70b-versatile"
assert result.llm.api_key == "groq-key"
def test_provider_change_does_not_affect_other_sections(self, global_config):
result = resolve_effective_config(
global_config,
{
"llm": {
"provider": "groq",
"api_key": "groq-key",
"model": "llama-3.3-70b-versatile",
}
},
)
# TTS and STT unchanged
assert result.tts.provider == "elevenlabs"
assert result.stt.provider == "deepgram"
# ---------------------------------------------------------------------------
# API key inheritance
# ---------------------------------------------------------------------------
class TestAPIKeyInheritance:
def test_no_api_key_in_override_inherits_global(self, global_config):
"""When override omits api_key, global key is used."""
result = resolve_effective_config(
global_config, {"llm": {"model": "gpt-4.1-mini"}}
)
assert result.llm.api_key == "sk-global-llm"
def test_explicit_api_key_in_override_wins(self, global_config):
"""When override includes api_key, it takes precedence."""
result = resolve_effective_config(
global_config,
{"llm": {"model": "gpt-4.1-mini", "api_key": "sk-override-key"}},
)
assert result.llm.api_key == "sk-override-key"
# ---------------------------------------------------------------------------
# is_realtime override
# ---------------------------------------------------------------------------
class TestRealtimeOverride:
def test_enable_realtime_on_non_realtime_global(self, global_config):
result = resolve_effective_config(
global_config,
{
"is_realtime": True,
"realtime": {
"provider": "google_realtime",
"api_key": "goog-override",
"model": "gemini-3.1-flash-live-preview",
"voice": "Charon",
"language": "en",
},
},
)
assert result.is_realtime is True
assert result.realtime.provider == "google_realtime"
assert result.realtime.voice == "Charon"
def test_disable_realtime_on_realtime_global(self, global_config_realtime):
result = resolve_effective_config(
global_config_realtime, {"is_realtime": False}
)
assert result.is_realtime is False
# Realtime config may still be present but is_realtime flag controls usage
def test_override_realtime_voice_only(self, global_config_realtime):
result = resolve_effective_config(
global_config_realtime, {"realtime": {"voice": "Kore"}}
)
assert result.realtime.voice == "Kore"
assert result.realtime.provider == "google_realtime" # inherited
assert result.realtime.api_key == "goog-global-rt" # inherited
def test_override_is_realtime_only_without_realtime_section(self, global_config):
"""Override is_realtime=True but provide no realtime config.
Should set the flag; realtime section stays None from global."""
result = resolve_effective_config(global_config, {"is_realtime": True})
assert result.is_realtime is True
assert result.realtime is None # no config provided
# ---------------------------------------------------------------------------
# Section override when global has None for that section
# ---------------------------------------------------------------------------
class TestOverrideOnNullGlobal:
def test_override_stt_when_global_is_none(self):
"""When global has no STT config, override creates one from scratch."""
config = UserConfiguration(
llm=OpenAILLMService(provider="openai", api_key="sk-key", model="gpt-4.1"),
stt=None,
tts=None,
is_realtime=False,
)
result = resolve_effective_config(
config,
{
"stt": {
"provider": "deepgram",
"api_key": "dg-new",
"model": "nova-3-general",
"language": "en",
}
},
)
assert result.stt is not None
assert result.stt.provider == "deepgram"
assert result.stt.model == "nova-3-general"
def test_override_realtime_when_global_is_none(self):
"""Realtime section can be created from override even if global has none."""
config = UserConfiguration(
llm=OpenAILLMService(provider="openai", api_key="sk-key", model="gpt-4.1"),
is_realtime=False,
realtime=None,
)
result = resolve_effective_config(
config,
{
"is_realtime": True,
"realtime": {
"provider": "google_realtime",
"api_key": "goog-new",
"model": "gemini-3.1-flash-live-preview",
"voice": "Puck",
"language": "en",
},
},
)
assert result.is_realtime is True
assert result.realtime.provider == "google_realtime"
# ---------------------------------------------------------------------------
# Multi-section overrides
# ---------------------------------------------------------------------------
class TestMultiSectionOverride:
def test_override_llm_and_tts_not_stt(self, global_config):
result = resolve_effective_config(
global_config,
{
"llm": {"model": "gpt-4.1-mini"},
"tts": {"voice": "shimmer"},
},
)
assert result.llm.model == "gpt-4.1-mini"
assert result.tts.voice == "shimmer"
# STT untouched
assert result.stt.model == "nova-3-general"
assert result.stt.language == "multi"
def test_override_all_sections(self, global_config):
result = resolve_effective_config(
global_config,
{
"llm": {"model": "gpt-4.1-mini"},
"tts": {"voice": "shimmer"},
"stt": {"language": "en"},
"is_realtime": True,
"realtime": {
"provider": "google_realtime",
"api_key": "goog-key",
"model": "gemini-3.1-flash-live-preview",
"voice": "Fenrir",
"language": "en",
},
},
)
assert result.llm.model == "gpt-4.1-mini"
assert result.tts.voice == "shimmer"
assert result.stt.language == "en"
assert result.is_realtime is True
assert result.realtime.voice == "Fenrir"
# ---------------------------------------------------------------------------
# Ignored / unknown keys
# ---------------------------------------------------------------------------
class TestUnknownKeys:
def test_unknown_section_in_overrides_is_ignored(self, global_config):
"""Override with a key that doesn't map to any section should not crash."""
result = resolve_effective_config(
global_config, {"unknown_section": {"foo": "bar"}}
)
assert result.llm.model == "gpt-4.1"
def test_embeddings_not_overridable(self, global_config):
"""Embeddings stay global — overrides for embeddings should be ignored."""
result = resolve_effective_config(
global_config,
{"embeddings": {"provider": "openai", "model": "text-embedding-3-small"}},
)
assert result.embeddings is None # was None in global, stays None

View file

@ -1,960 +0,0 @@
"""Tests validating user turn stop strategy behavior during bot speaking scenarios.
These tests validate the scenarios described in scenarios.md. They demonstrate
how the ExternalUserTurnStopStrategy and UserTurnController interact when frames
are suppressed (muted) during bot speaking.
Key concepts:
- When the bot is speaking, AlwaysUserMuteStrategy causes the LLMUserAggregator
to suppress user frames (UserStartedSpeaking, UserStoppedSpeaking, Transcription, VAD).
- The ExternalUserTurnStopStrategy accumulates _text from TranscriptionFrames and
triggers a stop when _user_speaking is False and _text is truthy.
- The UserTurnController only allows a stop if _user_turn is True (a start must
have occurred first). When a stop is rejected, the controller unconditionally
resets all stop strategies, clearing any dangling state (e.g. _text).
- This unconditional reset prevents stale _text from causing premature stops
or contaminating subsequent turns.
"""
import asyncio
import pytest
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
BotStoppedSpeakingFrame,
EndTaskFrame,
Frame,
TranscriptionFrame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
VADUserStartedSpeakingFrame,
VADUserStoppedSpeakingFrame,
)
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import (
LLMAssistantAggregatorParams,
LLMContextAggregatorPair,
LLMUserAggregatorParams,
)
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
from pipecat.tests import MockLLMService
from pipecat.turns.user_mute import AlwaysUserMuteStrategy
from pipecat.turns.user_start import VADUserTurnStartStrategy
from pipecat.turns.user_stop import ExternalUserTurnStopStrategy
from pipecat.turns.user_turn_strategies import UserTurnStrategies
from pipecat.utils.time import time_now_iso8601
# Short timeout for faster tests
STOP_STRATEGY_TIMEOUT = 0.15
# Delay to allow async processing
ASYNC_DELAY = 0.05
# Delay to wait for stop strategy timeout to fire
TIMEOUT_WAIT = STOP_STRATEGY_TIMEOUT + 0.1
class FrameInjector(FrameProcessor):
"""Simple processor that can inject frames into the pipeline."""
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
await self.push_frame(frame, direction)
async def inject(
self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM
):
"""Inject a frame into the pipeline."""
await self.push_frame(frame, direction)
def _build_components(llm_steps=None):
"""Build pipeline components for testing.
Uses:
- VADUserTurnStartStrategy: turn starts only when VADUserStartedSpeakingFrame arrives
- ExternalUserTurnStopStrategy: turn stops based on UserStoppedSpeakingFrame + _text
- AlwaysUserMuteStrategy: suppresses user frames while bot is speaking
Returns a tuple of (injector, user_aggregator, stop_strategy, turn_controller, mock_llm, pipeline).
"""
context = LLMContext()
stop_strategy = ExternalUserTurnStopStrategy(timeout=STOP_STRATEGY_TIMEOUT)
user_turn_strategies = UserTurnStrategies(
start=[VADUserTurnStartStrategy()],
stop=[stop_strategy],
)
user_params = LLMUserAggregatorParams(
user_turn_strategies=user_turn_strategies,
user_mute_strategies=[AlwaysUserMuteStrategy()],
)
assistant_params = LLMAssistantAggregatorParams(expect_stripped_words=True)
context_aggregator = LLMContextAggregatorPair(
context, assistant_params=assistant_params, user_params=user_params
)
user_agg = context_aggregator.user()
assistant_agg = context_aggregator.assistant()
if llm_steps is None:
llm_steps = [
MockLLMService.create_text_chunks(text="Response 1"),
MockLLMService.create_text_chunks(text="Response 2"),
MockLLMService.create_text_chunks(text="Response 3"),
]
mock_llm = MockLLMService(mock_steps=llm_steps, chunk_delay=0.001)
injector = FrameInjector()
pipeline = Pipeline([injector, user_agg, mock_llm, assistant_agg])
turn_controller = user_agg._user_turn_controller
return (
injector,
user_agg,
stop_strategy,
turn_controller,
mock_llm,
context,
pipeline,
)
async def _run_scenario(pipeline, inject_fn):
"""Run a pipeline with a frame injection coroutine."""
task = PipelineTask(pipeline, params=PipelineParams(), enable_rtvi=False)
runner = PipelineRunner()
async def run():
await runner.run(task)
async def inject():
# Wait for pipeline to start (StartFrame to propagate)
await asyncio.sleep(ASYNC_DELAY)
await inject_fn()
await asyncio.gather(run(), inject())
async def _inject_user_turn(injector, text, delay=ASYNC_DELAY):
"""Inject a complete user turn: VAD start + external start + transcription + external stop.
This simulates what happens in a real pipeline when the user speaks:
1. VAD detects speech -> VADUserStartedSpeakingFrame (triggers turn start)
2. External processor sends UserStartedSpeakingFrame (stop strategy tracks _user_speaking)
3. STT produces TranscriptionFrame (stop strategy accumulates _text)
4. External processor sends UserStoppedSpeakingFrame (stop strategy triggers stop)
"""
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(delay)
await injector.inject(TranscriptionFrame(text, "user-1", time_now_iso8601()))
class TestUserTurnStopScenarios:
"""Test scenarios from scenarios.md.
Each test simulates a specific frame ordering to validate the interaction
between ExternalUserTurnStopStrategy and UserTurnController, particularly
around frame suppression during bot speaking.
"""
# =========================================================================
# Scenario 1 (✅): All frames suppressed during bot speaking
#
# BotStartedSpeaking (muted)
# UserStartedSpeaking (suppressed)
# TranscriptionFrame (suppressed)
# UserStoppedSpeaking (suppressed)
# BotStoppedSpeaking (unmuted)
#
# Stop strategy _text is empty because TranscriptionFrame was suppressed.
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_1_all_suppressed_then_bot_stops(self):
"""All user frames suppressed during bot speaking, then bot stops.
Expected: _text is empty, no turn triggered, clean state.
Second turn works correctly.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Bot speaking, all user frames suppressed ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# These are all suppressed by mute
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(
TranscriptionFrame("hello", "user-1", time_now_iso8601())
)
await asyncio.sleep(0)
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(VADUserStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(TIMEOUT_WAIT)
# Assert: _text should be empty (all frames suppressed)
assert stop_strategy._text == "", (
f"Expected empty _text after all frames suppressed, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn, "Expected _user_turn to be False"
# === Turn 2: Normal turn should work correctly ===
await _inject_user_turn(injector, "second turn text")
await asyncio.sleep(TIMEOUT_WAIT)
# Assert: turn completed, _text cleared by reset
assert stop_strategy._text == "", (
f"Expected empty _text after clean turn, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn, (
"Expected _user_turn to be False after turn"
)
assert mock_llm.get_current_step() == 1, (
f"Expected 1 LLM call (turn 2 only), got {mock_llm.get_current_step()}"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Scenario 2 (✅): User frames suppressed, user stops after bot stops
#
# BotStartedSpeaking (muted)
# UserStartedSpeaking (suppressed)
# TranscriptionFrame (suppressed)
# BotStoppedSpeaking (unmuted)
# UserStoppedSpeaking (stop strategy has no _text -> no trigger)
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_2_user_stops_after_bot_stops_no_text(self):
"""User stops speaking after bot stops, but transcription was suppressed.
Expected: _text is empty because transcription was suppressed.
UserStoppedSpeaking doesn't trigger stop (no _text).
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Bot speaking, user frames partially suppressed ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Suppressed during bot speaking
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(
TranscriptionFrame("hello", "user-1", time_now_iso8601())
)
await asyncio.sleep(ASYNC_DELAY)
# Bot stops -> unmuted
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# UserStoppedSpeaking arrives after unmute, but _text is empty
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(TIMEOUT_WAIT)
# Assert: _text empty (TranscriptionFrame was suppressed)
assert stop_strategy._text == "", (
f"Expected empty _text, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn, "Expected _user_turn to be False"
# === Turn 2: Normal turn should work ===
await _inject_user_turn(injector, "second turn")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == "", "Expected clean _text after turn 2"
assert mock_llm.get_current_step() == 1, (
f"Expected 1 LLM call, got {mock_llm.get_current_step()}"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Scenario 3 (✅ after fix): Transcription arrives after unmute
#
# BotStartedSpeaking (muted)
# UserStartedSpeaking (suppressed)
# BotStoppedSpeaking (unmuted)
# TranscriptionFrame -> stop strategy _text = "hello"
# UserStoppedSpeaking -> stop strategy triggers (text truthy, not speaking)
# Turn controller ignores (user_turn is False), BUT unconditionally
# resets stop strategies -> _text cleared. No dangling state.
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_3_transcription_after_unmute_text_cleared(self):
"""Transcription arrives after bot stops but turn was never started.
The VADUserStartedSpeakingFrame was suppressed, so no turn started.
But TranscriptionFrame arrives after unmute and accumulates _text.
The stop strategy triggers, but the turn controller rejects it
(no active turn). The unconditional reset clears _text, preventing
any dangling state from contaminating subsequent turns.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Rejected stop with unconditional reset ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Suppressed: VAD and UserStartedSpeaking
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Bot stops -> unmuted
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Install spy on trigger_user_turn_stopped to track every call
# and the _user_turn state at the time of each call.
trigger_stop_calls = []
original_trigger_stop = stop_strategy.trigger_user_turn_stopped
async def spy_trigger_stop():
trigger_stop_calls.append(turn_ctrl._user_turn)
await original_trigger_stop()
stop_strategy.trigger_user_turn_stopped = spy_trigger_stop
# TranscriptionFrame arrives AFTER unmute -> reaches stop strategy
await injector.inject(
TranscriptionFrame("hello", "user-1", time_now_iso8601())
)
await asyncio.sleep(ASYNC_DELAY)
# UserStoppedSpeaking arrives AFTER unmute
# Stop strategy: _user_speaking is False (UserStartedSpeaking was suppressed),
# _text is "hello" -> triggers stop via _handle_user_stopped_speaking
# Turn controller: _user_turn is False -> rejects, but resets -> _text cleared
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Call #1: _handle_user_stopped_speaking -> _maybe_trigger_user_turn_stopped
assert len(trigger_stop_calls) == 1, (
f"Expected exactly 1 trigger_user_turn_stopped call from "
f"_handle_user_stopped_speaking, got {len(trigger_stop_calls)}"
)
assert trigger_stop_calls[0] is False, (
"Expected _user_turn=False when _handle_user_stopped_speaking triggered stop"
)
# Wait for _task_handler timeout period
await asyncio.sleep(TIMEOUT_WAIT)
# The unconditional reset cleared _text after the rejected stop,
# so the timeout's _maybe_trigger_user_turn_stopped sees _text="" and
# does NOT call trigger_user_turn_stopped again.
assert len(trigger_stop_calls) == 1, (
f"Expected no additional trigger_user_turn_stopped calls after "
f"reset cleared _text, but got {len(trigger_stop_calls)} total call(s)"
)
# Restore original method
stop_strategy.trigger_user_turn_stopped = original_trigger_stop
# Transcript is not suppressed, so we should have hello in user aggregator
assert user_agg._aggregation[0].text == "hello"
# Assert: _text is cleared by the unconditional reset (no dangling state)
assert stop_strategy._text == "", (
f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn, (
"Expected _user_turn to be False (turn was never started)"
)
# No LLM call should have happened
assert mock_llm.get_current_step() == 0, (
f"Expected 0 LLM calls, got {mock_llm.get_current_step()}"
)
# === Turn 2: No premature stop, normal flow ===
# _text is clean, so UserStoppedSpeaking won't trigger a premature stop.
# The turn completes normally when the timeout fires after TranscriptionFrame.
# The aggregator still has dangling "hello" from turn 1, which gets
# combined with turn 2's "world" — this is acceptable behavior.
await _inject_user_turn(injector, "world")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == "", (
f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
)
assert mock_llm.get_current_step() == 1, (
f"Expected 1 LLM call (normal turn), got {mock_llm.get_current_step()}"
)
# The LLM received both "hello" (dangling in aggregator from turn 1)
# and "world" (from turn 2). This is acceptable — the aggregator's
# _aggregation is a separate concern from the stop strategy's _text.
messages = context.messages
user_messages = [m for m in messages if m.get("role") == "user"]
assert len(user_messages) == 1, (
f"Expected 1 user message, got {len(user_messages)}"
)
user_text = user_messages[0]["content"]
assert "hello" in user_text, (
f"Expected 'hello' (from aggregator) in user message, got: '{user_text}'"
)
assert "world" in user_text, (
f"Expected 'world' (from turn 2) in user message, got: '{user_text}'"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Scenario 4 (✅): User speaks after bot stops -> normal flow
#
# BotStartedSpeaking (muted)
# BotStoppedSpeaking (unmuted)
# UserStartedSpeaking (triggers interruption/turn start)
# TranscriptionFrame
# UserStoppedSpeaking
#
# Turn starts because VAD frame is not suppressed. Everything works.
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_4_user_speaks_after_bot_stops(self):
"""User speaks after bot stops speaking. Normal flow, everything works.
All frames arrive after unmute, so VAD triggers turn start normally.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Bot speaks, then user speaks after bot stops ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Normal user turn after bot stopped
await _inject_user_turn(injector, "hello after bot")
await asyncio.sleep(TIMEOUT_WAIT)
# Assert: clean state
assert stop_strategy._text == "", (
f"Expected empty _text after clean turn, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn, "Expected _user_turn False after turn"
assert mock_llm.get_current_step() == 1, (
f"Expected 1 LLM call, got {mock_llm.get_current_step()}"
)
# === Turn 2: Another normal turn ===
await _inject_user_turn(injector, "second turn")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == "", "Expected clean _text after turn 2"
assert mock_llm.get_current_step() == 2, (
f"Expected 2 LLM calls, got {mock_llm.get_current_step()}"
)
# Verify clean context - each turn should be separate
user_messages = [m for m in context.messages if m.get("role") == "user"]
assert len(user_messages) == 2, (
f"Expected 2 user messages (one per turn), got {len(user_messages)}"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Scenario 5 (✅): Late transcription - all suppressed
#
# BotStartedSpeaking (muted)
# UserStartedSpeaking (suppressed)
# UserStoppedSpeaking (suppressed)
# TranscriptionFrame (suppressed) <- late, but still during bot speaking
# BotStoppedSpeaking (unmuted)
#
# Everything suppressed, _text empty. Clean state.
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_5_late_transcription_all_suppressed(self):
"""Late transcription arrives during bot speaking. All suppressed.
Even though transcription is late, it still arrives before BotStoppedSpeaking
so it's still muted. Clean state.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Late transcription, but all still suppressed ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(VADUserStoppedSpeakingFrame())
await asyncio.sleep(0)
# Late transcription - but still during bot speaking
await injector.inject(
TranscriptionFrame("late hello", "user-1", time_now_iso8601())
)
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(TIMEOUT_WAIT)
# Assert: all suppressed, clean state
assert stop_strategy._text == "", (
f"Expected empty _text, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn
# === Turn 2: Normal turn works ===
await _inject_user_turn(injector, "clean turn")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == ""
assert mock_llm.get_current_step() == 1
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Scenario 6 (✅ after fix): Late transcription arrives after bot stops
#
# BotStartedSpeaking (muted)
# UserStartedSpeaking (suppressed)
# UserStoppedSpeaking (suppressed)
# BotStoppedSpeaking (unmuted)
# TranscriptionFrame -> reaches stop strategy, _text = "late hello"
#
# Stop strategy timeout fires: _user_speaking is False (from initial state,
# UserStartedSpeaking was suppressed), _text truthy -> triggers stop.
# Turn controller: _user_turn False -> rejects, but unconditionally resets
# -> _text cleared. No dangling state.
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_6_late_transcription_after_unmute_text_cleared(self):
"""Late transcription arrives after bot stops. No turn was started.
UserStartedSpeaking was suppressed so _user_turn never started.
The late TranscriptionFrame accumulates _text after unmute.
The stop strategy timeout triggers, but controller rejects it.
The unconditional reset clears _text, preventing dangling state.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Late transcription scenario ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Suppressed
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(VADUserStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Bot stops -> unmuted
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Install spy on trigger_user_turn_stopped to track calls
trigger_stop_calls = []
original_trigger_stop = stop_strategy.trigger_user_turn_stopped
async def spy_trigger_stop():
trigger_stop_calls.append(turn_ctrl._user_turn)
await original_trigger_stop()
stop_strategy.trigger_user_turn_stopped = spy_trigger_stop
# Late transcription arrives after unmute
await injector.inject(
TranscriptionFrame("late hello", "user-1", time_now_iso8601())
)
# No UserStoppedSpeakingFrame in this scenario — the stop is
# triggered ONLY by the _task_handler timeout path.
await asyncio.sleep(TIMEOUT_WAIT)
# The _task_handler timeout fired _maybe_trigger_user_turn_stopped:
# _user_speaking=False (UserStartedSpeaking was suppressed),
# _text="late hello" -> trigger_user_turn_stopped called
# Turn controller: _user_turn=False -> rejects, but resets -> _text cleared
assert len(trigger_stop_calls) == 1, (
f"Expected exactly 1 trigger_user_turn_stopped call from "
f"_task_handler timeout, got {len(trigger_stop_calls)}"
)
assert trigger_stop_calls[0] is False, (
"Expected _user_turn=False when timeout triggered stop"
)
# Restore original method
stop_strategy.trigger_user_turn_stopped = original_trigger_stop
# Transcript is not suppressed, so we should have late hello in user aggregator
assert user_agg._aggregation[0].text == "late hello"
# Assert: _text is cleared by the unconditional reset (no dangling state)
assert stop_strategy._text == "", (
f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn, "Turn should not have started"
assert mock_llm.get_current_step() == 0, "No LLM call expected"
# === Turn 2: No premature stop, normal flow ===
# _text is clean, so no premature stop occurs.
# The turn completes normally when the timeout fires after TranscriptionFrame.
# The aggregator still has dangling "late hello" from turn 1, which gets
# combined with turn 2's "real speech" — this is acceptable behavior.
await _inject_user_turn(injector, "real speech")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == "", (
f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
)
assert mock_llm.get_current_step() == 1, (
f"Expected 1 LLM call (normal turn), got {mock_llm.get_current_step()}"
)
# The LLM received both "late hello" (dangling in aggregator from turn 1)
# and "real speech" (from turn 2).
user_messages = [m for m in context.messages if m.get("role") == "user"]
assert len(user_messages) == 1, (
f"Expected 1 user message, got {len(user_messages)}"
)
user_text = user_messages[0]["content"]
assert "late hello" in user_text, (
f"Expected 'late hello' (from aggregator) in user message, got: '{user_text}'"
)
assert "real speech" in user_text, (
f"Expected 'real speech' (from turn 2) in user message, got: '{user_text}'"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Scenario 7 (✅ after fix): Late transcription - user stops before transcription
#
# BotStartedSpeaking (muted)
# UserStartedSpeaking (suppressed)
# BotStoppedSpeaking (unmuted)
# UserStoppedSpeaking (no _text yet -> no trigger from _handle_user_stopped)
# TranscriptionFrame -> _text = "late", timeout triggers stop
#
# Turn controller: _user_turn False -> rejects, but unconditionally resets
# -> _text cleared. No dangling state.
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_7_late_transcription_after_user_stops_text_cleared(self):
"""User stops speaking, then late transcription arrives. No turn started.
UserStoppedSpeaking arrives first (no _text yet, so no trigger).
Then TranscriptionFrame arrives (sets _text). The timeout fires and
triggers stop, but controller rejects it. The unconditional reset
clears _text, preventing dangling state.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Late transcription after user stops ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Suppressed
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Bot stops -> unmuted
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# UserStoppedSpeaking arrives after unmute, but _text is still empty
# -> _maybe_trigger_user_turn_stopped: _text is "" -> no trigger
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Late transcription arrives AFTER user stopped
await injector.inject(
TranscriptionFrame("late text", "user-1", time_now_iso8601())
)
# Wait for timeout to fire
await asyncio.sleep(TIMEOUT_WAIT)
# Transcript is not suppressed, so we should have late text in user aggregator
assert user_agg._aggregation[0].text == "late text"
# Assert: _text is cleared by the unconditional reset
# The timeout fired _maybe_trigger_user_turn_stopped:
# _user_speaking=False (was never set, UserStartedSpeaking suppressed),
# _text="late text" -> triggers stop
# Turn controller: _user_turn=False -> rejects, but resets -> _text cleared
assert stop_strategy._text == "", (
f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn
assert mock_llm.get_current_step() == 0
# === Turn 2: No premature stop, normal flow ===
# _text is clean, so no premature stop occurs.
# The turn completes normally when the timeout fires after TranscriptionFrame.
# The aggregator still has dangling "late text" from turn 1, which gets
# combined with turn 2's "next speech" — this is acceptable behavior.
await _inject_user_turn(injector, "next speech")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == "", (
f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
)
assert mock_llm.get_current_step() == 1
# The LLM received both "late text" (dangling in aggregator from turn 1)
# and "next speech" (from turn 2).
user_messages = [m for m in context.messages if m.get("role") == "user"]
assert len(user_messages) == 1
user_text = user_messages[0]["content"]
assert "late text" in user_text, (
f"Expected 'late text' (from aggregator) in context, got: '{user_text}'"
)
assert "next speech" in user_text, (
f"Expected 'next speech' (from turn 2) in context, got: '{user_text}'"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Scenario 8 (✅): Late transcription - user speaks after bot stops
#
# BotStartedSpeaking (muted)
# BotStoppedSpeaking (unmuted)
# UserStartedSpeaking (not suppressed -> turn starts, start strategies reset)
# UserStoppedSpeaking (no _text -> no trigger)
# TranscriptionFrame (timeout triggers stop)
#
# Turn controller: _user_turn IS True -> allows stop -> resets strategies
# Clean state!
# =========================================================================
@pytest.mark.asyncio
async def test_scenario_8_late_transcription_user_speaks_after_bot_stops(self):
"""User speaks after bot stops, then late transcription arrives.
Because user spoke after unmute, VAD triggers turn start -> _user_turn=True.
When the late transcription triggers the stop, controller allows it and
resets strategies. Clean state.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Turn 1: Late transcription but user spoke after unmute ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# User speaks AFTER bot stops -> not suppressed
await injector.inject(VADUserStartedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# User stops speaking (no _text yet, so stop strategy doesn't trigger)
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(0)
await injector.inject(VADUserStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Late transcription arrives
await injector.inject(
TranscriptionFrame("late but ok", "user-1", time_now_iso8601())
)
# Wait for timeout to trigger stop
await asyncio.sleep(TIMEOUT_WAIT)
# Assert: turn controller allowed the stop, strategies were reset
assert stop_strategy._text == "", (
f"Expected clean _text after allowed stop, got '{stop_strategy._text}'"
)
assert not turn_ctrl._user_turn, "Turn should have stopped"
assert mock_llm.get_current_step() == 1, (
f"Expected 1 LLM call, got {mock_llm.get_current_step()}"
)
# === Turn 2: Clean subsequent turn ===
await _inject_user_turn(injector, "clean turn")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == ""
assert mock_llm.get_current_step() == 2
# Verify each turn is separate in context
user_messages = [m for m in context.messages if m.get("role") == "user"]
assert len(user_messages) == 2, (
f"Expected 2 separate user messages, got {len(user_messages)}"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)
# =========================================================================
# Combined test: validates _text is cleared independently after each
# rejected stop, preventing accumulation across muted periods.
# =========================================================================
@pytest.mark.asyncio
async def test_text_cleared_independently_across_failed_stops(self):
"""Validates _text does not accumulate across multiple failed stop attempts.
Two consecutive muted periods with late transcriptions each trigger
a rejected stop. The unconditional reset clears _text after each
rejection, so no accumulation occurs. The subsequent normal turn
completes correctly.
"""
injector, user_agg, stop_strategy, turn_ctrl, mock_llm, context, pipeline = (
_build_components()
)
async def inject():
# === Muted period 1: _text cleared after rejected stop ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(VADUserStartedSpeakingFrame()) # suppressed
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame()) # suppressed
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
# Late transcription after unmute
await injector.inject(
TranscriptionFrame("first", "user-1", time_now_iso8601())
)
await asyncio.sleep(0)
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(TIMEOUT_WAIT)
# Transcript is not suppressed, so we should have first in user aggregator
assert user_agg._aggregation[0].text == "first"
# _text is cleared by the unconditional reset after rejected stop
assert stop_strategy._text == "", (
f"Expected empty _text after unconditional reset, got '{stop_strategy._text}'"
)
# === Muted period 2: _text cleared independently, no accumulation ===
await injector.inject(BotStartedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(VADUserStartedSpeakingFrame()) # suppressed
await asyncio.sleep(0)
await injector.inject(UserStartedSpeakingFrame()) # suppressed
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(BotStoppedSpeakingFrame())
await asyncio.sleep(ASYNC_DELAY)
await injector.inject(
TranscriptionFrame("second", "user-1", time_now_iso8601())
)
await asyncio.sleep(0)
await injector.inject(UserStoppedSpeakingFrame())
await asyncio.sleep(TIMEOUT_WAIT)
# _text is cleared again — no accumulation of "first" + "second"
assert stop_strategy._text == "", (
f"Expected empty _text after second unconditional reset, got '{stop_strategy._text}'"
)
# Aggregator accumulated both (separate concern, acceptable)
assert len(user_agg._aggregation) == 2
assert user_agg._aggregation[0].text == "first"
assert user_agg._aggregation[1].text == "second"
# === Turn 3: No premature stop, normal flow ===
# _text is clean, so no premature stop occurs.
# The turn completes normally when the timeout fires after TranscriptionFrame.
# The aggregator has dangling "first" + "second" from muted periods,
# which get combined with turn 3's "actual speech".
await _inject_user_turn(injector, "actual speech")
await asyncio.sleep(TIMEOUT_WAIT)
assert stop_strategy._text == "", (
f"Expected clean _text after normal turn, got '{stop_strategy._text}'"
)
assert mock_llm.get_current_step() == 1
# The LLM received all three: "first" + "second" (from aggregator)
# and "actual speech" (from turn 3).
user_messages = [m for m in context.messages if m.get("role") == "user"]
assert len(user_messages) == 1, (
f"Expected 1 user message, got {len(user_messages)}"
)
user_text = user_messages[0]["content"]
assert "first" in user_text, f"Expected 'first' in '{user_text}'"
assert "second" in user_text, f"Expected 'second' in '{user_text}'"
assert "actual speech" in user_text, (
f"Expected 'actual speech' in '{user_text}'"
)
await injector.inject(EndTaskFrame(), direction=FrameDirection.UPSTREAM)
await _run_scenario(pipeline, inject)

View file

@ -0,0 +1,608 @@
"""
TDD tests for workflow versioning lifecycle.
Tests the version lifecycle on WorkflowDefinitionModel:
- status: draft / published / archived
- version_number: sequential per workflow
- released_definition_id on WorkflowModel
Modules under test:
- api.db.workflow_client (new versioning methods)
- api.db.models (new columns on WorkflowDefinitionModel, WorkflowModel)
These are DB integration tests using the transactional test session.
"""
import pytest
from api.db.models import (
OrganizationModel,
UserModel,
)
# Sample workflow definitions (graph JSON)
GRAPH_V1 = {
"nodes": [
{"id": "1", "type": "startCall", "data": {"name": "Start", "prompt": "Hello"}},
{"id": "2", "type": "endCall", "data": {"name": "End", "prompt": "Bye"}},
],
"edges": [{"id": "e1", "source": "1", "target": "2", "data": {"label": "End"}}],
}
GRAPH_V2 = {
"nodes": [
{
"id": "1",
"type": "startCall",
"data": {"name": "Start", "prompt": "Hello v2"},
},
{
"id": "2",
"type": "agentNode",
"data": {"name": "Agent", "prompt": "Collect info"},
},
{"id": "3", "type": "endCall", "data": {"name": "End", "prompt": "Bye"}},
],
"edges": [
{"id": "e1", "source": "1", "target": "2", "data": {"label": "Collect"}},
{"id": "e2", "source": "2", "target": "3", "data": {"label": "End"}},
],
}
GRAPH_V3 = {
"nodes": [
{
"id": "1",
"type": "startCall",
"data": {"name": "Start", "prompt": "Hello v3"},
},
{"id": "2", "type": "endCall", "data": {"name": "End", "prompt": "Goodbye"}},
],
"edges": [{"id": "e1", "source": "1", "target": "2", "data": {"label": "End"}}],
}
CONFIG_V1 = {"max_call_duration": 300}
CONFIG_V2 = {
"max_call_duration": 600,
"model_overrides": {"llm": {"model": "gpt-4.1-mini"}},
}
TEMPLATE_VARS_V1 = {"company_name": "Acme"}
TEMPLATE_VARS_V2 = {"company_name": "Acme Inc"}
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
async def org_and_user(async_session):
"""Create an organization and user for workflow tests."""
org = OrganizationModel(provider_id="test-org-versioning")
async_session.add(org)
await async_session.flush()
user = UserModel(
provider_id="test-user-versioning", selected_organization_id=org.id
)
async_session.add(user)
await async_session.flush()
return org, user
@pytest.fixture
async def workflow_with_v1(db_session, org_and_user):
"""Create a workflow — should produce V1 as published."""
org, user = org_and_user
workflow = await db_session.create_workflow(
name="Test Workflow",
workflow_definition=GRAPH_V1,
user_id=user.id,
organization_id=org.id,
)
return workflow, user
# ---------------------------------------------------------------------------
# Workflow creation → V1 published
# ---------------------------------------------------------------------------
class TestWorkflowCreation:
async def test_create_workflow_produces_published_v1(
self, db_session, org_and_user
):
"""Creating a new workflow should produce exactly one definition
with status='published' and version_number=1."""
org, user = org_and_user
workflow = await db_session.create_workflow(
name="New Workflow",
workflow_definition=GRAPH_V1,
user_id=user.id,
organization_id=org.id,
)
versions = await db_session.get_workflow_versions(workflow.id)
assert len(versions) == 1
v1 = versions[0]
assert v1.status == "published"
assert v1.version_number == 1
assert v1.workflow_json == GRAPH_V1
async def test_create_workflow_sets_released_pointer(
self, db_session, org_and_user
):
"""The workflow's released_definition_id should point to V1."""
org, user = org_and_user
workflow = await db_session.create_workflow(
name="Pointer Test",
workflow_definition=GRAPH_V1,
user_id=user.id,
organization_id=org.id,
)
versions = await db_session.get_workflow_versions(workflow.id)
assert workflow.released_definition_id == versions[0].id
# ---------------------------------------------------------------------------
# Saving a draft
# ---------------------------------------------------------------------------
class TestSaveDraft:
async def test_save_draft_creates_draft_version(self, db_session, workflow_with_v1):
"""Saving changes to a published workflow creates a draft version."""
workflow, user = workflow_with_v1
draft = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
workflow_configurations=CONFIG_V2,
template_context_variables=TEMPLATE_VARS_V2,
)
assert draft.status == "draft"
assert draft.version_number == 2
assert draft.workflow_json == GRAPH_V2
assert draft.workflow_configurations == CONFIG_V2
assert draft.template_context_variables == TEMPLATE_VARS_V2
async def test_save_draft_does_not_change_released_pointer(
self, db_session, workflow_with_v1
):
"""Creating a draft must not move the released pointer."""
workflow, user = workflow_with_v1
original_released_id = workflow.released_definition_id
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
refreshed = await db_session.get_workflow(workflow.id)
assert refreshed.released_definition_id == original_released_id
async def test_save_draft_twice_updates_in_place(
self, db_session, workflow_with_v1
):
"""Saving a second draft should update the existing draft, not create a new row."""
workflow, user = workflow_with_v1
draft1 = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
draft2 = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V3,
)
assert draft1.id == draft2.id # same row
assert draft2.workflow_json == GRAPH_V3
assert draft2.version_number == 2 # unchanged
versions = await db_session.get_workflow_versions(workflow.id)
assert len(versions) == 2 # V1 published + V2 draft, no extras
async def test_save_draft_with_only_config_change(
self, db_session, workflow_with_v1
):
"""A draft can change only configs, keeping the same graph."""
workflow, user = workflow_with_v1
draft = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V1, # same graph
workflow_configurations=CONFIG_V2, # different config
)
assert draft.status == "draft"
assert draft.workflow_json == GRAPH_V1
assert draft.workflow_configurations == CONFIG_V2
# ---------------------------------------------------------------------------
# Publishing a draft
# ---------------------------------------------------------------------------
class TestPublishDraft:
async def test_publish_promotes_draft_to_published(
self, db_session, workflow_with_v1
):
"""Publishing moves draft → published and old published → archived."""
workflow, user = workflow_with_v1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
workflow_configurations=CONFIG_V2,
)
published = await db_session.publish_workflow_draft(workflow.id)
assert published.status == "published"
assert published.workflow_json == GRAPH_V2
versions = await db_session.get_workflow_versions(workflow.id)
statuses = {v.version_number: v.status for v in versions}
assert statuses[1] == "archived"
assert statuses[2] == "published"
async def test_publish_updates_released_pointer(self, db_session, workflow_with_v1):
"""After publishing, released_definition_id should point to the new version."""
workflow, user = workflow_with_v1
draft = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
await db_session.publish_workflow_draft(workflow.id)
refreshed = await db_session.get_workflow(workflow.id)
assert refreshed.released_definition_id == draft.id
async def test_publish_sets_published_at(self, db_session, workflow_with_v1):
"""Published version should have a published_at timestamp."""
workflow, user = workflow_with_v1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
published = await db_session.publish_workflow_draft(workflow.id)
assert published.published_at is not None
async def test_publish_with_no_draft_raises(self, db_session, workflow_with_v1):
"""Publishing when no draft exists should raise an error."""
workflow, user = workflow_with_v1
with pytest.raises(ValueError, match="[Nn]o draft"):
await db_session.publish_workflow_draft(workflow.id)
async def test_exactly_one_published_after_multiple_cycles(
self, db_session, workflow_with_v1
):
"""After several draft/publish cycles, exactly one version is published."""
workflow, user = workflow_with_v1
# Cycle 1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
await db_session.publish_workflow_draft(workflow.id)
# Cycle 2
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V3,
)
await db_session.publish_workflow_draft(workflow.id)
versions = await db_session.get_workflow_versions(workflow.id)
published = [v for v in versions if v.status == "published"]
assert len(published) == 1
assert published[0].version_number == 3
# ---------------------------------------------------------------------------
# Discarding a draft
# ---------------------------------------------------------------------------
class TestDiscardDraft:
async def test_discard_removes_draft(self, db_session, workflow_with_v1):
"""Discarding a draft should delete the draft row."""
workflow, user = workflow_with_v1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
await db_session.discard_workflow_draft(workflow.id)
versions = await db_session.get_workflow_versions(workflow.id)
assert len(versions) == 1
assert versions[0].status == "published"
async def test_discard_does_not_affect_published(
self, db_session, workflow_with_v1
):
"""Published version and released pointer are unchanged after discard."""
workflow, user = workflow_with_v1
original_released_id = workflow.released_definition_id
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
await db_session.discard_workflow_draft(workflow.id)
refreshed = await db_session.get_workflow(workflow.id)
assert refreshed.released_definition_id == original_released_id
async def test_discard_when_no_draft_raises(self, db_session, workflow_with_v1):
"""Discarding when no draft exists should raise an error."""
workflow, user = workflow_with_v1
with pytest.raises(ValueError, match="[Nn]o draft"):
await db_session.discard_workflow_draft(workflow.id)
async def test_new_draft_after_discard_gets_next_version_number(
self, db_session, workflow_with_v1
):
"""After discarding V2 draft, the next draft should still be V2
(since V2 was deleted and never published)."""
workflow, user = workflow_with_v1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
await db_session.discard_workflow_draft(workflow.id)
new_draft = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V3,
)
# Version number reuse is acceptable since V2 was never published
assert new_draft.version_number == 2
# ---------------------------------------------------------------------------
# Reverting to an archived version
# ---------------------------------------------------------------------------
class TestRevert:
async def _publish_v2(self, db_session, workflow):
"""Helper: create and publish V2, making V1 archived."""
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
workflow_configurations=CONFIG_V2,
template_context_variables=TEMPLATE_VARS_V2,
)
return await db_session.publish_workflow_draft(workflow.id)
async def test_revert_creates_draft_from_archived(
self, db_session, workflow_with_v1
):
"""Reverting copies the archived version's full snapshot into a new draft."""
workflow, user = workflow_with_v1
# Get V1's definition ID before it gets archived
versions_before = await db_session.get_workflow_versions(workflow.id)
v1_id = versions_before[0].id
# Publish V2, archiving V1
await self._publish_v2(db_session, workflow)
# Revert to V1
draft = await db_session.revert_to_version(workflow.id, v1_id)
assert draft.status == "draft"
assert draft.workflow_json == GRAPH_V1
async def test_revert_preserves_all_snapshot_fields(
self, db_session, workflow_with_v1
):
"""Revert should copy graph, configs, and template vars."""
workflow, user = workflow_with_v1
# Publish V2 with full config
v2 = await self._publish_v2(db_session, workflow)
# Publish V3, archiving V2
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V3,
)
await db_session.publish_workflow_draft(workflow.id)
# Revert to V2
draft = await db_session.revert_to_version(workflow.id, v2.id)
assert draft.workflow_json == GRAPH_V2
assert draft.workflow_configurations == CONFIG_V2
assert draft.template_context_variables == TEMPLATE_VARS_V2
async def test_revert_when_draft_exists_raises(self, db_session, workflow_with_v1):
"""Cannot revert when a draft already exists — must discard first."""
workflow, user = workflow_with_v1
versions = await db_session.get_workflow_versions(workflow.id)
v1_id = versions[0].id
await self._publish_v2(db_session, workflow)
# Create a draft
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V3,
)
with pytest.raises(ValueError, match="[Dd]raft.*exists"):
await db_session.revert_to_version(workflow.id, v1_id)
async def test_revert_does_not_change_released_pointer(
self, db_session, workflow_with_v1
):
"""Revert creates a draft — the released pointer stays on the published version."""
workflow, user = workflow_with_v1
versions = await db_session.get_workflow_versions(workflow.id)
v1_id = versions[0].id
v2 = await self._publish_v2(db_session, workflow)
await db_session.revert_to_version(workflow.id, v1_id)
refreshed = await db_session.get_workflow(workflow.id)
assert refreshed.released_definition_id == v2.id # still V2
# ---------------------------------------------------------------------------
# Version listing & ordering
# ---------------------------------------------------------------------------
class TestVersionListing:
async def test_versions_ordered_by_version_number_desc(
self, db_session, workflow_with_v1
):
"""Versions should be returned newest first."""
workflow, user = workflow_with_v1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
await db_session.publish_workflow_draft(workflow.id)
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V3,
)
versions = await db_session.get_workflow_versions(workflow.id)
version_numbers = [v.version_number for v in versions]
assert version_numbers == sorted(version_numbers, reverse=True)
async def test_versions_include_status(self, db_session, workflow_with_v1):
"""Each version should have an explicit status."""
workflow, user = workflow_with_v1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
await db_session.publish_workflow_draft(workflow.id)
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V3,
)
versions = await db_session.get_workflow_versions(workflow.id)
statuses = {v.version_number: v.status for v in versions}
assert statuses == {1: "archived", 2: "published", 3: "draft"}
# ---------------------------------------------------------------------------
# Version data stored on definition, not workflow
# ---------------------------------------------------------------------------
class TestVersionDataOnDefinition:
async def test_configs_stored_on_definition(self, db_session, workflow_with_v1):
"""workflow_configurations should be on the definition, not just the workflow."""
workflow, user = workflow_with_v1
draft = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
workflow_configurations=CONFIG_V2,
template_context_variables=TEMPLATE_VARS_V2,
)
assert draft.workflow_configurations == CONFIG_V2
assert draft.template_context_variables == TEMPLATE_VARS_V2
async def test_different_versions_have_different_configs(
self, db_session, workflow_with_v1
):
"""V1 and V2 can have different configs stored independently."""
workflow, user = workflow_with_v1
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
workflow_configurations=CONFIG_V2,
)
await db_session.publish_workflow_draft(workflow.id)
versions = await db_session.get_workflow_versions(workflow.id)
configs_by_version = {
v.version_number: v.workflow_configurations for v in versions
}
assert configs_by_version[1] != configs_by_version[2]
# ---------------------------------------------------------------------------
# Run creation uses published (or draft for testing)
# ---------------------------------------------------------------------------
class TestRunDefinitionBinding:
async def test_campaign_run_uses_published_version(
self, db_session, workflow_with_v1
):
"""A campaign-initiated run should use the published version, not draft."""
workflow, user = workflow_with_v1
# Create a draft (unpublished)
await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
# Create a run (simulating campaign dispatch)
run = await db_session.create_workflow_run(
name="Campaign Run",
workflow_id=workflow.id,
mode="webrtc",
user_id=user.id,
)
# Run should be bound to the published V1, not the draft V2
versions = await db_session.get_workflow_versions(workflow.id)
published = next(v for v in versions if v.status == "published")
assert run.definition_id == published.id
async def test_test_run_uses_draft_if_exists(self, db_session, workflow_with_v1):
"""A test/phone call should use the draft version for pre-publish testing."""
workflow, user = workflow_with_v1
draft = await db_session.save_workflow_draft(
workflow_id=workflow.id,
workflow_definition=GRAPH_V2,
)
# Create a test run
run = await db_session.create_workflow_run(
name="Test Run",
workflow_id=workflow.id,
mode="webrtc", # test mode
user_id=user.id,
use_draft=True,
)
assert run.definition_id == draft.id