mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-28 08:49:42 +02:00
feat: add ultravox realtime and fix signature issue in telephony
- Add UltraVox realtime - Fix signature issue on telephony
This commit is contained in:
parent
9135c2da13
commit
ea0cac63cd
24 changed files with 2082 additions and 133 deletions
459
api/tests/test_ultravox_realtime_wrapper.py
Normal file
459
api/tests/test_ultravox_realtime_wrapper.py
Normal file
|
|
@ -0,0 +1,459 @@
|
|||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, call
|
||||
|
||||
import pytest
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.frames.frames import LLMMessagesAppendFrame, TTSSpeakFrame
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from websockets.exceptions import ConnectionClosedError
|
||||
from websockets.frames import Close
|
||||
|
||||
from api.schemas.user_configuration import UserConfiguration
|
||||
from api.services.configuration.registry import UltravoxRealtimeLLMConfiguration
|
||||
from api.services.pipecat.realtime.ultravox_realtime import (
|
||||
_RESUMPTION_USER_MESSAGE,
|
||||
DograhUltravoxOneShotInputParams,
|
||||
DograhUltravoxRealtimeLLMService,
|
||||
)
|
||||
from api.services.pipecat.service_factory import create_realtime_llm_service
|
||||
|
||||
|
||||
class _ClosingSocket:
|
||||
def __init__(self, exc):
|
||||
self._exc = exc
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
raise self._exc
|
||||
|
||||
|
||||
class _MessageSocket:
|
||||
def __init__(self, messages):
|
||||
self._messages = iter(messages)
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
try:
|
||||
return next(self._messages)
|
||||
except StopIteration:
|
||||
raise StopAsyncIteration
|
||||
|
||||
|
||||
def _make_service() -> DograhUltravoxRealtimeLLMService:
|
||||
service = DograhUltravoxRealtimeLLMService(
|
||||
params=DograhUltravoxOneShotInputParams(
|
||||
api_key="test-key",
|
||||
model="ultravox-v0.7",
|
||||
output_medium="voice",
|
||||
),
|
||||
settings=DograhUltravoxRealtimeLLMService.Settings(
|
||||
model="ultravox-v0.7",
|
||||
output_medium="voice",
|
||||
),
|
||||
)
|
||||
service.stop_all_metrics = AsyncMock()
|
||||
service.cancel_task = AsyncMock()
|
||||
service.push_error = AsyncMock()
|
||||
return service
|
||||
|
||||
|
||||
def _tool_schema() -> ToolsSchema:
|
||||
return ToolsSchema(
|
||||
standard_tools=[
|
||||
FunctionSchema(
|
||||
name="transition_to_next_node",
|
||||
description="Move to the next workflow node",
|
||||
properties={"reason": {"type": "string"}},
|
||||
required=[],
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tts_greeting_triggers_initial_connect():
|
||||
service = _make_service()
|
||||
service._connect_call = AsyncMock()
|
||||
|
||||
await service.process_frame(
|
||||
TTSSpeakFrame("Hello there", append_to_context=True),
|
||||
FrameDirection.DOWNSTREAM,
|
||||
)
|
||||
|
||||
service._connect_call.assert_awaited_once()
|
||||
assert service._connect_call.await_args.kwargs["greeting_text"] == "Hello there"
|
||||
assert service._connect_call.await_args.kwargs["agent_speaks_first"] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_initial_context_connects_without_replay():
|
||||
service = _make_service()
|
||||
service._connect_call = AsyncMock()
|
||||
context = LLMContext()
|
||||
|
||||
await service._handle_context(context)
|
||||
|
||||
service._connect_call.assert_awaited_once()
|
||||
assert service._connect_call.await_args.kwargs["initial_messages"] is None
|
||||
assert service._connect_call.await_args.kwargs["agent_speaks_first"] is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_system_instruction_update_marks_reconnect_required():
|
||||
service = _make_service()
|
||||
service._has_connected_once = True
|
||||
|
||||
changed = await service._update_settings(
|
||||
DograhUltravoxRealtimeLLMService.Settings(system_instruction="new instruction")
|
||||
)
|
||||
|
||||
assert "system_instruction" in changed
|
||||
assert service._reconnect_required is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_system_instruction_change_reconnects_with_full_initial_messages():
|
||||
service = _make_service()
|
||||
service._socket = object()
|
||||
service._has_connected_once = True
|
||||
service._call_system_instruction = "old instruction"
|
||||
service._reconnect_required = True
|
||||
service._settings.system_instruction = "new instruction"
|
||||
service._reconnect_with_context = AsyncMock()
|
||||
|
||||
context = LLMContext(
|
||||
messages=[
|
||||
{"role": "user", "content": "I want to hear the pricing."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Let me check that for you.",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call-transition",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "transition_to_next_node",
|
||||
"arguments": '{"reason":"pricing requested"}',
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call-transition",
|
||||
"content": '{"status":"done"}',
|
||||
},
|
||||
],
|
||||
tools=_tool_schema(),
|
||||
)
|
||||
|
||||
await service._handle_context(context)
|
||||
|
||||
service._reconnect_with_context.assert_awaited_once()
|
||||
initial_messages = service._reconnect_with_context.await_args.kwargs[
|
||||
"initial_messages"
|
||||
]
|
||||
assert initial_messages == [
|
||||
{
|
||||
"role": "MESSAGE_ROLE_USER",
|
||||
"text": "I want to hear the pricing.",
|
||||
},
|
||||
{
|
||||
"role": "MESSAGE_ROLE_AGENT",
|
||||
"text": "Let me check that for you.",
|
||||
},
|
||||
{
|
||||
"role": "MESSAGE_ROLE_TOOL_CALL",
|
||||
"text": "",
|
||||
"invocationId": "call-transition",
|
||||
"toolName": "transition_to_next_node",
|
||||
},
|
||||
{
|
||||
"role": "MESSAGE_ROLE_TOOL_RESULT",
|
||||
"text": '{"status":"done"}',
|
||||
"invocationId": "call-transition",
|
||||
"toolName": "transition_to_next_node",
|
||||
},
|
||||
]
|
||||
assert "call-transition" in service._completed_tool_calls
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tool_context_update_does_not_reconnect_when_system_instruction_is_unchanged():
|
||||
service = _make_service()
|
||||
service._socket = object()
|
||||
service._call_system_instruction = "same instruction"
|
||||
service._settings.system_instruction = "same instruction"
|
||||
service._reconnect_with_context = AsyncMock()
|
||||
service._send_tool_result = AsyncMock()
|
||||
|
||||
context = LLMContext(
|
||||
messages=[
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "call-transition",
|
||||
"content": '{"status":"done"}',
|
||||
},
|
||||
],
|
||||
tools=_tool_schema(),
|
||||
)
|
||||
|
||||
await service._handle_context(context)
|
||||
|
||||
service._reconnect_with_context.assert_not_awaited()
|
||||
service._send_tool_result.assert_awaited_once_with(
|
||||
"call-transition",
|
||||
'{"status":"done"}',
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_messages_append_frame_sends_user_text():
|
||||
service = _make_service()
|
||||
service._socket = object()
|
||||
service._call_started = True
|
||||
service._send_user_text = AsyncMock()
|
||||
|
||||
await service._handle_messages_append(
|
||||
LLMMessagesAppendFrame(
|
||||
[{"role": "user", "content": "Are you still there?"}],
|
||||
run_llm=True,
|
||||
)
|
||||
)
|
||||
|
||||
service._send_user_text.assert_awaited_once_with("Are you still there?")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_messages_append_frame_queues_user_text_until_call_started():
|
||||
service = _make_service()
|
||||
service._socket = object()
|
||||
service._call_started = False
|
||||
service._send_user_text = AsyncMock()
|
||||
|
||||
await service._handle_messages_append(
|
||||
LLMMessagesAppendFrame(
|
||||
[{"role": "user", "content": "Are you still there?"}],
|
||||
run_llm=True,
|
||||
)
|
||||
)
|
||||
|
||||
assert service._pending_user_text_messages == ["Are you still there?"]
|
||||
service._send_user_text.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_call_started_flushes_pending_user_text_messages():
|
||||
service = _make_service()
|
||||
service._pending_user_text_messages = [
|
||||
"First queued message",
|
||||
"Second queued message",
|
||||
]
|
||||
service._send_user_text = AsyncMock()
|
||||
service._socket = _MessageSocket(['{"type":"call_started","callId":"call-123"}'])
|
||||
|
||||
await service._receive_messages()
|
||||
|
||||
assert service._call_started is True
|
||||
assert service._pending_user_text_messages == []
|
||||
assert service._send_user_text.await_args_list == [
|
||||
call("First queued message"),
|
||||
call("Second queued message"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_completed_input_transcription_is_broadcast_as_finalized():
|
||||
service = _make_service()
|
||||
service.broadcast_frame = AsyncMock()
|
||||
service._last_user_id = "caller-1"
|
||||
|
||||
await service._handle_user_transcript("Hello there")
|
||||
|
||||
service.broadcast_frame.assert_awaited_once()
|
||||
assert service.broadcast_frame.await_args.args[0].__name__ == "TranscriptionFrame"
|
||||
assert service.broadcast_frame.await_args.kwargs["text"] == "Hello there"
|
||||
assert service.broadcast_frame.await_args.kwargs["finalized"] is True
|
||||
|
||||
|
||||
def test_build_one_shot_params_uses_explicit_greeting_text():
|
||||
service = _make_service()
|
||||
|
||||
params = service._build_one_shot_params(
|
||||
greeting_text="Welcome to Dograh",
|
||||
initial_messages=None,
|
||||
agent_speaks_first=True,
|
||||
)
|
||||
|
||||
assert params.extra["firstSpeakerSettings"] == {
|
||||
"agent": {"text": "Welcome to Dograh"}
|
||||
}
|
||||
|
||||
|
||||
def test_build_one_shot_params_includes_initial_messages():
|
||||
service = _make_service()
|
||||
service._settings.system_instruction = "Base instruction"
|
||||
|
||||
params = service._build_one_shot_params(
|
||||
greeting_text=None,
|
||||
initial_messages=[
|
||||
{"role": "MESSAGE_ROLE_USER", "text": "User asked a question."},
|
||||
{"role": "MESSAGE_ROLE_TOOL_RESULT", "text": '{"status":"done"}'},
|
||||
],
|
||||
agent_speaks_first=True,
|
||||
)
|
||||
|
||||
assert params.extra["initialMessages"] == [
|
||||
{"role": "MESSAGE_ROLE_USER", "text": "User asked a question."},
|
||||
{"role": "MESSAGE_ROLE_TOOL_RESULT", "text": '{"status":"done"}'},
|
||||
{"role": "MESSAGE_ROLE_USER", "text": _RESUMPTION_USER_MESSAGE},
|
||||
]
|
||||
assert params.system_prompt == "Base instruction"
|
||||
|
||||
|
||||
def test_build_one_shot_params_without_tool_result_does_not_add_resumption_user_message():
|
||||
service = _make_service()
|
||||
service._settings.system_instruction = "Base instruction"
|
||||
|
||||
params = service._build_one_shot_params(
|
||||
greeting_text=None,
|
||||
initial_messages=[
|
||||
{"role": "MESSAGE_ROLE_USER", "text": "User asked a question."},
|
||||
{"role": "MESSAGE_ROLE_AGENT", "text": "Assistant replied."},
|
||||
],
|
||||
agent_speaks_first=False,
|
||||
)
|
||||
|
||||
assert params.system_prompt == "Base instruction"
|
||||
|
||||
|
||||
def test_should_agent_speak_first_when_history_ends_with_tool_result():
|
||||
service = _make_service()
|
||||
|
||||
assert (
|
||||
service._should_agent_speak_first(
|
||||
[
|
||||
{"role": "MESSAGE_ROLE_USER", "text": "Hello"},
|
||||
{"role": "MESSAGE_ROLE_TOOL_RESULT", "text": '{"status":"done"}'},
|
||||
]
|
||||
)
|
||||
is True
|
||||
)
|
||||
|
||||
|
||||
def test_should_not_force_agent_speaks_first_when_history_ends_with_agent():
|
||||
service = _make_service()
|
||||
|
||||
assert (
|
||||
service._should_agent_speak_first(
|
||||
[{"role": "MESSAGE_ROLE_AGENT", "text": "How else can I help?"}]
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_should_add_resumption_user_message_only_when_history_ends_with_tool_result():
|
||||
service = _make_service()
|
||||
|
||||
assert (
|
||||
service._should_add_resumption_user_message(
|
||||
[{"role": "MESSAGE_ROLE_TOOL_RESULT", "text": '{"status":"done"}'}]
|
||||
)
|
||||
is True
|
||||
)
|
||||
assert (
|
||||
service._should_add_resumption_user_message(
|
||||
[{"role": "MESSAGE_ROLE_AGENT", "text": "Assistant replied."}]
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_to_selected_tools_includes_registered_timeout():
|
||||
service = _make_service()
|
||||
service.register_function(
|
||||
"transition_to_next_node",
|
||||
AsyncMock(),
|
||||
timeout_secs=5.5,
|
||||
)
|
||||
|
||||
selected_tools = service._to_selected_tools(_tool_schema())
|
||||
|
||||
assert selected_tools == [
|
||||
{
|
||||
"temporaryTool": {
|
||||
"modelToolName": "transition_to_next_node",
|
||||
"description": "Move to the next workflow node",
|
||||
"dynamicParameters": [
|
||||
{
|
||||
"name": "reason",
|
||||
"location": "PARAMETER_LOCATION_BODY",
|
||||
"schema": {"type": "string"},
|
||||
"required": False,
|
||||
}
|
||||
],
|
||||
"client": {},
|
||||
"timeout": "5.5s",
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_receive_messages_ignores_benign_websocket_close():
|
||||
service = _make_service()
|
||||
service._socket = _ClosingSocket(
|
||||
ConnectionClosedError(None, Close(1000, "OK"), None)
|
||||
)
|
||||
|
||||
await service._receive_messages()
|
||||
|
||||
service.push_error.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_receive_messages_reports_unexpected_websocket_close():
|
||||
service = _make_service()
|
||||
service._socket = _ClosingSocket(
|
||||
ConnectionClosedError(None, Close(1011, "internal error"), None)
|
||||
)
|
||||
|
||||
await service._receive_messages()
|
||||
|
||||
service.push_error.assert_awaited_once()
|
||||
|
||||
|
||||
def test_factory_creates_dograh_ultravox_realtime_service():
|
||||
user_config = UserConfiguration(
|
||||
is_realtime=True,
|
||||
realtime=UltravoxRealtimeLLMConfiguration(
|
||||
provider="ultravox_realtime",
|
||||
api_key="ultra-key",
|
||||
model="ultravox-v0.7",
|
||||
voice="Mark",
|
||||
),
|
||||
)
|
||||
|
||||
service = create_realtime_llm_service(
|
||||
user_config,
|
||||
audio_config=SimpleNamespace(),
|
||||
)
|
||||
|
||||
assert isinstance(service, DograhUltravoxRealtimeLLMService)
|
||||
assert service._params.voice == "Mark"
|
||||
|
||||
|
||||
def test_ultravox_realtime_configuration_defaults_to_mark_voice():
|
||||
config = UltravoxRealtimeLLMConfiguration(
|
||||
provider="ultravox_realtime",
|
||||
api_key="ultra-key",
|
||||
model="ultravox-v0.7",
|
||||
)
|
||||
|
||||
assert config.voice == "Mark"
|
||||
Loading…
Add table
Add a link
Reference in a new issue