mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
* chore: rename PipelineTask to PipelineWorker * fix: fix tests * chore: update pipecat submodule * fix: fix anyio same task cancellation scope
290 lines
11 KiB
Python
290 lines
11 KiB
Python
"""Integration test for the text-greeting flow through ``_run_pipeline``.
|
|
|
|
Drives the full pipeline produced by ``_run_pipeline`` against the test
|
|
database with a workflow whose start node has a text greeting configured.
|
|
The flow under test:
|
|
|
|
1. ``maybe_trigger_initial_response`` (in ``event_handlers.py``) sees a
|
|
text greeting and queues ``TTSSpeakFrame(greeting)``.
|
|
2. ``MockTTSService`` synthesises audio for the greeting; the real
|
|
``MediaSender`` machinery in ``MockOutputTransport`` emits
|
|
``BotStartedSpeakingFrame`` and ``BotStoppedSpeakingFrame``.
|
|
3. The TTS service emits an ``LLMAssistantPushAggregationFrame`` after
|
|
``TTSStoppedFrame``, so the greeting is appended to the assistant
|
|
context by ``LLMAssistantAggregator``.
|
|
4. We then push a ``TranscriptionFrame`` into the pipeline. After the
|
|
user-turn-stop timeout, ``LLMUserAggregator`` pushes a context frame
|
|
to the LLM, ``MockLLMService`` returns an ``end_call`` tool call, and
|
|
the engine's transition function moves to the end node and calls
|
|
``end_call_with_reason``.
|
|
5. ``on_pipeline_finished`` records the run as COMPLETED.
|
|
|
|
External boundaries are patched via ``patch_run_pipeline_externals``
|
|
from the shared helpers module. Preconfigured ``MockLLMService`` /
|
|
``MockTTSService`` instances are passed in so the end_call response is
|
|
deterministic and the synthesised audio length is short.
|
|
"""
|
|
|
|
import asyncio
|
|
|
|
import pytest
|
|
from pipecat.frames.frames import TranscriptionFrame
|
|
from pipecat.tests.mock_transport import MockTransport
|
|
from pipecat.transports.base_transport import TransportParams
|
|
from pipecat.utils.time import time_now_iso8601
|
|
|
|
from api.enums import WorkflowRunMode, WorkflowRunState
|
|
from api.services.pipecat.audio_config import create_audio_config
|
|
from api.services.pipecat.run_pipeline import _run_pipeline
|
|
from api.services.pipecat.worker_runner import wait_for_pipeline_worker_started
|
|
from api.tests.integrations._run_pipeline_helpers import (
|
|
create_workflow_run_rows,
|
|
patch_run_pipeline_externals,
|
|
)
|
|
from pipecat.tests import MockLLMService, MockTTSService
|
|
|
|
GREETING_TEXT = (
|
|
"Thanks for calling Happy Feet, this is Sarah. How can I help you today?"
|
|
)
|
|
|
|
WORKFLOW_DEFINITION = {
|
|
"nodes": [
|
|
{
|
|
"id": "start",
|
|
"type": "startCall",
|
|
"position": {"x": 0, "y": 0},
|
|
"data": {
|
|
"name": "Start",
|
|
"prompt": "You are Sarah. Help the caller and end the call when they ask.",
|
|
"is_start": True,
|
|
"allow_interrupt": False,
|
|
"add_global_prompt": False,
|
|
"greeting": GREETING_TEXT,
|
|
"greeting_type": "text",
|
|
},
|
|
},
|
|
{
|
|
"id": "end",
|
|
"type": "endCall",
|
|
"position": {"x": 0, "y": 200},
|
|
"data": {
|
|
"name": "End",
|
|
"prompt": "End the call politely.",
|
|
"is_end": True,
|
|
"allow_interrupt": False,
|
|
"add_global_prompt": False,
|
|
},
|
|
},
|
|
],
|
|
"edges": [
|
|
{
|
|
"id": "start-end",
|
|
"source": "start",
|
|
"target": "end",
|
|
"data": {"label": "End Call", "condition": "When the user wants to end."},
|
|
}
|
|
],
|
|
}
|
|
|
|
# Hard cap on the entire test. Without this, a hung pipeline would keep the
|
|
# pytest worker alive indefinitely (the harness has no pytest-timeout plugin).
|
|
TEST_HARD_TIMEOUT_SECONDS = 25.0
|
|
|
|
|
|
@pytest.fixture
|
|
async def workflow_run_setup(db_session, async_session):
|
|
"""Create org/user/user_configuration/workflow/workflow_run rows. The
|
|
workflow's start node is configured with a text greeting."""
|
|
return await create_workflow_run_rows(
|
|
db_session,
|
|
async_session,
|
|
workflow_definition=WORKFLOW_DEFINITION,
|
|
name_prefix="Text Greeting Integration",
|
|
provider_id_suffix="text-greeting",
|
|
)
|
|
|
|
|
|
def _greeting_in_assistant_context(context) -> bool:
|
|
"""Return True if the greeting text has been appended to the assistant context."""
|
|
for message in context.get_messages():
|
|
if isinstance(message, dict) and message.get("role") == "assistant":
|
|
content = message.get("content") or ""
|
|
if GREETING_TEXT in content:
|
|
return True
|
|
return False
|
|
|
|
|
|
def _find_processor_by_class_name(pipeline_task, class_name: str):
|
|
"""Walk every processor reachable from the task's pipeline (including nested
|
|
sub-pipelines) and return the first one whose class name matches."""
|
|
visited: set[int] = set()
|
|
stack = [pipeline_task._pipeline]
|
|
while stack:
|
|
processor = stack.pop()
|
|
if id(processor) in visited:
|
|
continue
|
|
visited.add(id(processor))
|
|
if processor.__class__.__name__ == class_name:
|
|
return processor
|
|
sub = getattr(processor, "_processors", None)
|
|
if sub:
|
|
stack.extend(sub)
|
|
return None
|
|
|
|
|
|
async def _wait_for(predicate, *, timeout: float, interval: float = 0.05) -> bool:
|
|
"""Poll ``predicate`` (sync callable returning bool) until it returns True
|
|
or the timeout elapses. Returns the final predicate value."""
|
|
deadline = asyncio.get_event_loop().time() + timeout
|
|
while asyncio.get_event_loop().time() < deadline:
|
|
if predicate():
|
|
return True
|
|
await asyncio.sleep(interval)
|
|
return predicate()
|
|
|
|
|
|
async def _run_test_body(workflow_run_setup, db_session) -> None:
|
|
workflow_run, user, workflow = workflow_run_setup
|
|
|
|
# Prepare the LLM with one step: the end_call function call.
|
|
# Edge label "End Call" maps to function name "end_call".
|
|
end_call_chunks = MockLLMService.create_function_call_chunks(
|
|
function_name="end_call",
|
|
arguments={},
|
|
tool_call_id="call_end_1",
|
|
)
|
|
llm = MockLLMService(mock_steps=[end_call_chunks], chunk_delay=0.001)
|
|
|
|
# Short audio greeting so the bot finishes speaking quickly in tests.
|
|
tts = MockTTSService(mock_audio_duration_ms=50, frame_delay=0)
|
|
|
|
transport = MockTransport(
|
|
TransportParams(audio_in_enabled=True, audio_out_enabled=True)
|
|
)
|
|
|
|
captured_task: list = []
|
|
audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)
|
|
pipeline_task = None
|
|
|
|
try:
|
|
with patch_run_pipeline_externals(captured_task, llm=llm, tts=tts):
|
|
run_coro = _run_pipeline(
|
|
transport=transport,
|
|
workflow_id=workflow.id,
|
|
workflow_run_id=workflow_run.id,
|
|
user_id=user.id,
|
|
audio_config=audio_config,
|
|
user_provider_id=user.provider_id,
|
|
)
|
|
run_task = asyncio.create_task(run_coro)
|
|
|
|
for _ in range(60):
|
|
if captured_task or run_task.done():
|
|
break
|
|
await asyncio.sleep(0.05)
|
|
if run_task.done() and not captured_task:
|
|
run_task.result()
|
|
assert captured_task, "create_pipeline_task was never invoked"
|
|
pipeline_task = captured_task[0]
|
|
|
|
await wait_for_pipeline_worker_started(
|
|
pipeline_task, timeout=3.0, run_task=run_task
|
|
)
|
|
|
|
# Locate the assistant aggregator's LLM context (downstream of TTS).
|
|
# The PipelineWorker wraps the user's pipeline inside another Pipeline,
|
|
# so we walk the tree recursively.
|
|
assistant_aggregator = _find_processor_by_class_name(
|
|
pipeline_task, "LLMAssistantAggregator"
|
|
)
|
|
assert assistant_aggregator is not None, (
|
|
"LLMAssistantAggregator not found in pipeline"
|
|
)
|
|
context = assistant_aggregator.context
|
|
|
|
# Wait for the greeting to be appended to the assistant context. The
|
|
# TTSSpeakFrame -> audio frames -> BotStoppedSpeaking -> assistant
|
|
# aggregation push chain runs through the real pipeline.
|
|
appeared = await _wait_for(
|
|
lambda: _greeting_in_assistant_context(context), timeout=5.0
|
|
)
|
|
assert appeared, (
|
|
"Greeting was not appended to the assistant context. "
|
|
f"Messages: {context.get_messages()}"
|
|
)
|
|
|
|
# The LLM must not have been invoked yet — the greeting bypasses
|
|
# the LLM entirely (goes straight to TTS via TTSSpeakFrame).
|
|
assert llm.get_current_step() == 0, (
|
|
f"LLM should not have run yet; current_step={llm.get_current_step()}"
|
|
)
|
|
|
|
# Now simulate the user replying. SpeechTimeoutUserTurnStopStrategy
|
|
# (default 0.6s) ends the user turn, which triggers an LLM run;
|
|
# the LLM returns end_call; the transition function moves to the
|
|
# end node and ends the call.
|
|
await pipeline_task.queue_frame(
|
|
TranscriptionFrame(
|
|
text="I want to end the call now please.",
|
|
user_id="test-user",
|
|
timestamp=time_now_iso8601(),
|
|
)
|
|
)
|
|
|
|
# Wait for the run to complete.
|
|
await asyncio.wait_for(run_task, timeout=10.0)
|
|
|
|
# Outside the patch ctx so the assertions exercise real DB state.
|
|
# The first LLM run produces the end_call; the engine then transitions
|
|
# to the End node and triggers a second generation (which is empty —
|
|
# mock_steps[1] is unset). What matters is that at least one run
|
|
# happened, i.e. the user transcript actually drove the LLM.
|
|
assert llm.get_current_step() >= 1, (
|
|
f"Expected at least one LLM generation; got step={llm.get_current_step()}"
|
|
)
|
|
|
|
refreshed = await db_session.get_workflow_run_by_id(workflow_run.id)
|
|
assert refreshed.is_completed is True
|
|
assert refreshed.state == WorkflowRunState.COMPLETED.value
|
|
nodes_visited = refreshed.gathered_context.get("nodes_visited", [])
|
|
assert "Start" in nodes_visited
|
|
assert "End" in nodes_visited
|
|
finally:
|
|
# Best-effort cleanup so a partially-run pipeline doesn't leak tasks
|
|
# past the test boundary.
|
|
if pipeline_task is not None and not pipeline_task.has_finished():
|
|
try:
|
|
await asyncio.wait_for(pipeline_task.cancel(), timeout=3.0)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_text_greeting_speaks_then_user_transcript_triggers_end_call(
|
|
workflow_run_setup, db_session
|
|
):
|
|
"""End-to-end:
|
|
|
|
- ``maybe_trigger_initial_response`` queues ``TTSSpeakFrame`` for the
|
|
start-node text greeting.
|
|
- ``MockTTSService`` synthesises audio; ``MockOutputTransport`` emits
|
|
bot speaking events; the assistant aggregator appends the greeting
|
|
to the context after the TTS turn ends.
|
|
- We push a ``TranscriptionFrame`` into the pipeline. After the user
|
|
turn stop timeout, ``MockLLMService`` returns an ``end_call`` tool
|
|
call which transitions to the end node and ends the run.
|
|
|
|
The whole body is bounded by ``TEST_HARD_TIMEOUT_SECONDS`` so a hung
|
|
pipeline fails the test rather than wedging the test runner.
|
|
"""
|
|
try:
|
|
await asyncio.wait_for(
|
|
_run_test_body(workflow_run_setup, db_session),
|
|
timeout=TEST_HARD_TIMEOUT_SECONDS,
|
|
)
|
|
except asyncio.TimeoutError as e:
|
|
raise AssertionError(
|
|
f"Test exceeded hard timeout of {TEST_HARD_TIMEOUT_SECONDS}s — "
|
|
"pipeline likely hung. Check earlier debug logs for the last frame "
|
|
"to reach the pipeline."
|
|
) from e
|