mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-22 08:38:13 +02:00
fix: migrate from custom audio recorder to native AudioBuffer (#115)
* fix: update to pipecat VM Detector * fix: refactor to remove audio synchronizer * feat: add speechmatics as STT
This commit is contained in:
parent
31521008cf
commit
edf0fa4fbc
12 changed files with 193 additions and 591 deletions
|
|
@ -83,29 +83,31 @@ class LoopTalkPipelineBuilder:
|
|||
|
||||
logger.debug(f"Created services for {role}: STT={stt}, LLM={llm}, TTS={tts}")
|
||||
|
||||
audio_buffer, audio_synchronizer, transcript, context = (
|
||||
create_pipeline_components(audio_config)
|
||||
)
|
||||
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
# Get workflow graph
|
||||
workflow_graph = WorkflowGraph(
|
||||
ReactFlowDTO.model_validate(workflow.workflow_definition_with_fallback)
|
||||
)
|
||||
|
||||
# Create engine
|
||||
# Create engine first (needed for create_pipeline_components)
|
||||
engine = PipecatEngine(
|
||||
task=None, # Will be set after creating the task
|
||||
llm=llm,
|
||||
context=context,
|
||||
tts=tts,
|
||||
workflow=workflow_graph,
|
||||
call_context_vars={},
|
||||
audio_buffer=audio_buffer,
|
||||
workflow_run_id=None, # LoopTalk doesn't have workflow runs
|
||||
)
|
||||
|
||||
# Create pipeline components with audio configuration and engine
|
||||
audio_buffer, transcript, context = create_pipeline_components(
|
||||
audio_config, engine
|
||||
)
|
||||
|
||||
# Set the context and audio_buffer after creation
|
||||
engine.set_context(context)
|
||||
engine.set_audio_buffer(audio_buffer)
|
||||
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
# Create STT mute filter
|
||||
stt_mute_filter = STTMuteFilter(
|
||||
config=STTMuteConfig(
|
||||
|
|
@ -124,19 +126,13 @@ class LoopTalkPipelineBuilder:
|
|||
user_context_aggregator = context_aggregator.user()
|
||||
assistant_context_aggregator = context_aggregator.assistant()
|
||||
|
||||
# Register processors with synchronizer for merged audio
|
||||
audio_synchronizer.register_processors(
|
||||
audio_buffer.input(), audio_buffer.output()
|
||||
)
|
||||
|
||||
# Get audio streamer for real-time streaming
|
||||
audio_streamer = get_or_create_audio_streamer(str(test_session_id), role)
|
||||
|
||||
# Create pipeline
|
||||
# Create pipeline with AudioBufferProcessor after transport.output()
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
audio_buffer.input(), # Record input audio
|
||||
audio_streamer, # Stream audio to connected clients
|
||||
stt_mute_filter,
|
||||
stt,
|
||||
|
|
@ -146,7 +142,7 @@ class LoopTalkPipelineBuilder:
|
|||
pipeline_engine_callback_processor,
|
||||
tts,
|
||||
transport.output(),
|
||||
audio_buffer.output(), # Record output audio
|
||||
audio_buffer, # AudioBufferProcessor - records both input and output audio
|
||||
transcript.assistant(),
|
||||
assistant_context_aggregator,
|
||||
]
|
||||
|
|
@ -157,13 +153,12 @@ class LoopTalkPipelineBuilder:
|
|||
task = create_pipeline_task(pipeline, conversation_id, audio_config)
|
||||
|
||||
# Set the task on the engine
|
||||
engine.task = task
|
||||
engine.set_task(task)
|
||||
|
||||
return {
|
||||
"task": task,
|
||||
"engine": engine,
|
||||
"audio_buffer": audio_buffer,
|
||||
"audio_synchronizer": audio_synchronizer,
|
||||
"transcript": transcript,
|
||||
"assistant_context_aggregator": assistant_context_aggregator,
|
||||
"audio_streamer": audio_streamer,
|
||||
|
|
|
|||
|
|
@ -245,7 +245,6 @@ class LoopTalkTestOrchestrator:
|
|||
engine = pipeline_info["engine"]
|
||||
task = pipeline_info["task"]
|
||||
audio_buffer = pipeline_info["audio_buffer"]
|
||||
audio_synchronizer = pipeline_info["audio_synchronizer"]
|
||||
transcript = pipeline_info["transcript"]
|
||||
assistant_context_aggregator = pipeline_info["assistant_context_aggregator"]
|
||||
|
||||
|
|
@ -255,7 +254,6 @@ class LoopTalkTestOrchestrator:
|
|||
logger.debug(f"LoopTalk {role} client connected - initializing workflow")
|
||||
# Start audio recording
|
||||
await audio_buffer.start_recording()
|
||||
await audio_synchronizer.start_recording()
|
||||
await engine.initialize()
|
||||
|
||||
@transport.event_handler("on_client_disconnected")
|
||||
|
|
@ -263,7 +261,6 @@ class LoopTalkTestOrchestrator:
|
|||
logger.debug(f"LoopTalk {role} client disconnected")
|
||||
# Stop audio recording
|
||||
await audio_buffer.stop_recording()
|
||||
await audio_synchronizer.stop_recording()
|
||||
|
||||
# Handle disconnect propagation - stop the other agent too
|
||||
await self.session_manager.handle_agent_disconnect(
|
||||
|
|
@ -274,11 +271,11 @@ class LoopTalkTestOrchestrator:
|
|||
|
||||
# Register custom audio and transcript handlers for LoopTalk
|
||||
await self._register_looptalk_handlers(
|
||||
audio_synchronizer, transcript, test_session_id, role
|
||||
audio_buffer, transcript, test_session_id, role
|
||||
)
|
||||
|
||||
async def _register_looptalk_handlers(
|
||||
self, audio_synchronizer, transcript, test_session_id: int, role: str
|
||||
self, audio_buffer, transcript, test_session_id: int, role: str
|
||||
):
|
||||
"""Register LoopTalk-specific handlers for audio and transcript recording"""
|
||||
|
||||
|
|
@ -288,9 +285,9 @@ class LoopTalkTestOrchestrator:
|
|||
audio_metadata = {"sample_rate": None, "num_channels": None}
|
||||
|
||||
# Audio handler - writes directly to PCM file
|
||||
@audio_synchronizer.event_handler("on_merged_audio")
|
||||
async def on_merged_audio(_, pcm, sample_rate, num_channels):
|
||||
if not pcm:
|
||||
@audio_buffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
if not audio:
|
||||
return
|
||||
|
||||
# Store metadata on first write
|
||||
|
|
@ -301,7 +298,7 @@ class LoopTalkTestOrchestrator:
|
|||
# Append PCM data to temporary file
|
||||
try:
|
||||
with open(paths["temp_audio"], "ab") as f:
|
||||
f.write(pcm)
|
||||
f.write(audio)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to write audio for {role} in session {test_session_id}: {e}"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue