dograh/api/tests/integrations/test_run_pipeline.py
Abhishek a54ab519b8
chore: refactor file upload mechanism to avoid NFS dependency (#496)
* chore: refactor file upload mechanism to avoid NFS dependency

* add regression test for deregistration of calls

* fix: fix minio upload issue

* fix: make transcript upload async
2026-07-03 20:01:52 +05:30

209 lines
8 KiB
Python

"""Integration tests for ``api.services.pipecat.run_pipeline._run_pipeline``.
Drives the actual ``_run_pipeline`` against the test database with real
DB rows (organization, user, user configuration, workflow, workflow run)
and pipecat's real ``MockTransport`` / ``Pipeline`` / ``PipelineWorker``.
The only patches are for things that talk to genuinely external systems;
those are applied via ``patch_run_pipeline_externals`` from the shared
helpers module.
Verifies that the wiring done by ``_run_pipeline`` (in particular
``register_event_handlers``) produces the right behaviour end-to-end:
``maybe_trigger_initial_response`` fires (``engine.set_node`` runs), and
on shutdown the workflow run is persisted with the expected state,
completion flag, and ``gathered_context`` entries.
"""
import asyncio
import pytest
from pipecat.tests.mock_transport import MockTransport
from pipecat.transports.base_transport import TransportParams
from api.enums import WorkflowRunMode, WorkflowRunState
from api.services.pipecat import active_calls
from api.services.pipecat.audio_config import create_audio_config
from api.services.pipecat.run_pipeline import _run_pipeline
from api.services.pipecat.worker_runner import wait_for_pipeline_worker_started
from api.tests.integrations._run_pipeline_helpers import (
create_workflow_run_rows,
patch_run_pipeline_externals,
)
WORKFLOW_DEFINITION = {
"nodes": [
{
"id": "start",
"type": "startCall",
"position": {"x": 0, "y": 0},
"data": {
"name": "Start",
"prompt": "You are a helpful assistant. Greet the user briefly.",
"is_start": True,
"allow_interrupt": False,
"add_global_prompt": False,
},
},
{
"id": "end",
"type": "endCall",
"position": {"x": 0, "y": 200},
"data": {
"name": "End",
"prompt": "End the call politely.",
"is_end": True,
"allow_interrupt": False,
"add_global_prompt": False,
},
},
],
"edges": [
{
"id": "start-end",
"source": "start",
"target": "end",
"data": {"label": "End", "condition": "When the user wants to end."},
}
],
}
@pytest.fixture
async def workflow_run_setup(db_session, async_session):
"""Create org/user/user_configuration/workflow/workflow_run rows in the
test database. Returns (workflow_run, user, workflow)."""
return await create_workflow_run_rows(
db_session,
async_session,
workflow_definition=WORKFLOW_DEFINITION,
name_prefix="Event Handler Integration",
provider_id_suffix="event-handlers",
)
@pytest.mark.asyncio
async def test_run_pipeline_fires_initial_response_and_completes_run(
workflow_run_setup, db_session
):
"""End-to-end: _run_pipeline boots, register_event_handlers wires up,
on_pipeline_started + on_client_connected both fire, the initial
response is triggered (set_node), and on_pipeline_finished updates
the workflow_run row to COMPLETED."""
workflow_run, user, workflow = workflow_run_setup
transport = MockTransport(
TransportParams(audio_in_enabled=True, audio_out_enabled=True)
)
captured_task: list = []
audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)
with patch_run_pipeline_externals(captured_task):
run_coro = _run_pipeline(
transport=transport,
workflow_id=workflow.id,
workflow_run_id=workflow_run.id,
user_id=user.id,
audio_config=audio_config,
user_provider_id=user.provider_id,
)
run_task = asyncio.create_task(run_coro)
# Wait until create_pipeline_task is invoked. Surface any
# exception from _run_pipeline immediately rather than swallowing
# it during the wait loop.
for _ in range(60):
if captured_task or run_task.done():
break
await asyncio.sleep(0.05)
if run_task.done() and not captured_task:
run_task.result() # re-raise the failure
assert captured_task, "create_pipeline_task was never invoked"
pipeline_task = captured_task[0]
await wait_for_pipeline_worker_started(
pipeline_task, timeout=3.0, run_task=run_task
)
# Let the initial response handler (set_node, queue LLMContextFrame)
# complete before tearing things down.
await asyncio.sleep(0.1)
await pipeline_task.cancel()
await asyncio.wait_for(run_task, timeout=5.0)
# Verify the run was completed end-to-end via the real on_pipeline_finished
# handler — DB side effects, not mock assertions.
refreshed = await db_session.get_workflow_run_by_id(workflow_run.id)
assert refreshed.is_completed is True
assert refreshed.state == WorkflowRunState.COMPLETED.value
# set_node("start") populates "nodes_visited" via _gathered_context, and
# on_pipeline_finished merges call_tags into gathered_context.
assert "Start" in refreshed.gathered_context.get("nodes_visited", [])
assert "call_tags" in refreshed.gathered_context
@pytest.mark.asyncio
async def test_call_stays_registered_for_drain_until_artifacts_uploaded(
workflow_run_setup, monkeypatch
):
"""The active-call registry must not drop a run while its artifacts are
still uploading: deploy draining polls the registry and SIGTERMs the
worker at zero, which would kill in-flight uploads.
The ordering rests on pipecat waiting for spawned ``on_pipeline_finished``
handler tasks before ``PipelineWorker.run()`` — and therefore
``run_pipeline_worker()`` — returns, with ``unregister_active_call`` in
the ``finally`` after that. This test pins the guarantee: a slow upload
samples the registry mid-flight and must still see the call registered.
"""
workflow_run, user, workflow = workflow_run_setup
active_calls._active_run_ids.clear()
run_task_ref: list[asyncio.Task] = []
observed: dict = {}
async def slow_upload(workflow_run_id, **_kwargs):
# Give _run_pipeline a chance to (incorrectly) return and unregister
# while the upload is still in flight, then sample the registry.
await asyncio.sleep(0.2)
observed["count_during_upload"] = active_calls.active_call_count()
observed["run_task_done_during_upload"] = run_task_ref[0].done()
monkeypatch.setattr(
"api.services.pipecat.event_handlers.upload_workflow_run_artifacts",
slow_upload,
)
transport = MockTransport(
TransportParams(audio_in_enabled=True, audio_out_enabled=True)
)
captured_task: list = []
audio_config = create_audio_config(WorkflowRunMode.SMALLWEBRTC.value)
with patch_run_pipeline_externals(captured_task):
run_task = asyncio.create_task(
_run_pipeline(
transport=transport,
workflow_id=workflow.id,
workflow_run_id=workflow_run.id,
user_id=user.id,
audio_config=audio_config,
user_provider_id=user.provider_id,
)
)
run_task_ref.append(run_task)
for _ in range(60):
if captured_task or run_task.done():
break
await asyncio.sleep(0.05)
if run_task.done() and not captured_task:
run_task.result() # re-raise the failure
assert captured_task, "create_pipeline_task was never invoked"
pipeline_task = captured_task[0]
await wait_for_pipeline_worker_started(
pipeline_task, timeout=3.0, run_task=run_task
)
assert active_calls.active_call_count() == 1
await pipeline_task.cancel()
await asyncio.wait_for(run_task, timeout=5.0)
assert observed, "upload_workflow_run_artifacts was never called"
assert observed["count_during_upload"] == 1
assert observed["run_task_done_during_upload"] is False
assert active_calls.active_call_count() == 0