mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-19 08:28:10 +02:00
feat: persist split user and bot audio
This commit is contained in:
parent
dd3f2e7323
commit
3d1886c450
30 changed files with 1322 additions and 253 deletions
|
|
@ -944,6 +944,7 @@ class DograhTTSService(BaseTTSConfiguration):
|
|||
voice: str = Field(
|
||||
default="default",
|
||||
description="Voice preset.",
|
||||
json_schema_extra={"allow_custom_input": True},
|
||||
)
|
||||
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed of the voice.")
|
||||
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ from api.services.integrations import IntegrationRuntimeSession
|
|||
from api.services.pipecat.audio_config import AudioConfig
|
||||
from api.services.pipecat.audio_playback import play_audio_loop
|
||||
from api.services.pipecat.in_memory_buffers import (
|
||||
InMemoryAudioBuffer,
|
||||
InMemoryLogsBuffer,
|
||||
InMemoryRecordingBuffers,
|
||||
)
|
||||
from api.services.pipecat.pipeline_metrics_aggregator import PipelineMetricsAggregator
|
||||
from api.services.pipecat.tracing_config import get_trace_url
|
||||
|
|
@ -40,11 +40,11 @@ async def _capture_call_event(
|
|||
"workflow_run_id": workflow_run_id,
|
||||
"workflow_id": workflow_run.workflow_id if workflow_run else None,
|
||||
"call_type": workflow_run.mode if workflow_run else None,
|
||||
"call_direction": (workflow_run.initial_context or {}).get(
|
||||
"direction", "outbound"
|
||||
)
|
||||
if workflow_run
|
||||
else None,
|
||||
"call_direction": (
|
||||
(workflow_run.initial_context or {}).get("direction", "outbound")
|
||||
if workflow_run
|
||||
else None
|
||||
),
|
||||
}
|
||||
if extra_properties:
|
||||
properties.update(extra_properties)
|
||||
|
|
@ -73,7 +73,7 @@ def register_event_handlers(
|
|||
"""Register all event handlers for transport and task events.
|
||||
|
||||
Returns:
|
||||
in_memory_audio_buffer for use by other handlers.
|
||||
In-memory recording buffers for use by other handlers.
|
||||
"""
|
||||
# Initialize in-memory buffers with proper audio configuration
|
||||
sample_rate = audio_config.pipeline_sample_rate if audio_config else 16000
|
||||
|
|
@ -84,7 +84,7 @@ def register_event_handlers(
|
|||
f"with sample_rate={sample_rate}Hz, channels={num_channels}"
|
||||
)
|
||||
|
||||
in_memory_audio_buffer = InMemoryAudioBuffer(
|
||||
in_memory_audio_buffers = InMemoryRecordingBuffers(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=num_channels,
|
||||
|
|
@ -363,14 +363,32 @@ def register_event_handlers(
|
|||
|
||||
# Write buffers to temp files and enqueue combined processing task
|
||||
audio_temp_path = None
|
||||
user_audio_temp_path = None
|
||||
bot_audio_temp_path = None
|
||||
transcript_temp_path = None
|
||||
|
||||
try:
|
||||
if not in_memory_audio_buffer.is_empty:
|
||||
audio_temp_path = await in_memory_audio_buffer.write_to_temp_file()
|
||||
if not in_memory_audio_buffers.mixed.is_empty:
|
||||
audio_temp_path = (
|
||||
await in_memory_audio_buffers.mixed.write_to_temp_file()
|
||||
)
|
||||
else:
|
||||
logger.debug("Audio buffer is empty, skipping upload")
|
||||
|
||||
if not in_memory_audio_buffers.user.is_empty:
|
||||
user_audio_temp_path = (
|
||||
await in_memory_audio_buffers.user.write_to_temp_file()
|
||||
)
|
||||
else:
|
||||
logger.debug("User audio buffer is empty, skipping upload")
|
||||
|
||||
if not in_memory_audio_buffers.bot.is_empty:
|
||||
bot_audio_temp_path = (
|
||||
await in_memory_audio_buffers.bot.write_to_temp_file()
|
||||
)
|
||||
else:
|
||||
logger.debug("Bot audio buffer is empty, skipping upload")
|
||||
|
||||
transcript_temp_path = in_memory_logs_buffer.write_transcript_to_temp_file()
|
||||
if not transcript_temp_path:
|
||||
logger.debug("No transcript events in logs buffer, skipping upload")
|
||||
|
|
@ -385,16 +403,18 @@ def register_event_handlers(
|
|||
workflow_run_id,
|
||||
audio_temp_path,
|
||||
transcript_temp_path,
|
||||
user_audio_temp_path,
|
||||
bot_audio_temp_path,
|
||||
)
|
||||
|
||||
# Return the buffer so it can be passed to other handlers
|
||||
return in_memory_audio_buffer
|
||||
return in_memory_audio_buffers
|
||||
|
||||
|
||||
def register_audio_data_handler(
|
||||
audio_buffer: AudioBufferProcessor,
|
||||
workflow_run_id,
|
||||
in_memory_buffer: InMemoryAudioBuffer,
|
||||
in_memory_buffers: InMemoryRecordingBuffers,
|
||||
):
|
||||
"""Register event handler for audio data"""
|
||||
logger.info(f"Registering audio data handler for workflow run {workflow_run_id}")
|
||||
|
|
@ -404,9 +424,19 @@ def register_audio_data_handler(
|
|||
if not audio:
|
||||
return
|
||||
|
||||
# Use in-memory buffer
|
||||
try:
|
||||
await in_memory_buffer.append(audio)
|
||||
await in_memory_buffers.mixed.append(audio)
|
||||
except MemoryError as e:
|
||||
logger.error(f"Memory buffer full: {e}")
|
||||
# Could implement overflow to disk here if needed
|
||||
logger.error(f"Mixed audio buffer full: {e}")
|
||||
|
||||
@audio_buffer.event_handler("on_track_audio_data")
|
||||
async def on_track_audio_data(
|
||||
buffer, user_audio, bot_audio, sample_rate, num_channels
|
||||
):
|
||||
try:
|
||||
if user_audio:
|
||||
await in_memory_buffers.user.append(user_audio)
|
||||
if bot_audio:
|
||||
await in_memory_buffers.bot.append(bot_audio)
|
||||
except MemoryError as e:
|
||||
logger.error(f"Track audio buffer full: {e}")
|
||||
|
|
|
|||
|
|
@ -75,6 +75,27 @@ class InMemoryAudioBuffer:
|
|||
return self._total_size
|
||||
|
||||
|
||||
class InMemoryRecordingBuffers:
|
||||
"""Holds the mixed recording plus aligned user and bot mono tracks."""
|
||||
|
||||
def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
|
||||
self.mixed = InMemoryAudioBuffer(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=num_channels,
|
||||
)
|
||||
self.user = InMemoryAudioBuffer(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
self.bot = InMemoryAudioBuffer(
|
||||
workflow_run_id=workflow_run_id,
|
||||
sample_rate=sample_rate,
|
||||
num_channels=1,
|
||||
)
|
||||
|
||||
|
||||
class InMemoryLogsBuffer:
|
||||
"""Buffer real-time feedback events in memory during a call, then save to workflow run logs."""
|
||||
|
||||
|
|
|
|||
|
|
@ -718,6 +718,8 @@ class TriggerNodeData(BaseNodeData):
|
|||
"rsvp": "{{gathered_context.rsvp}}",
|
||||
"duration": "{{cost_info.call_duration_seconds}}",
|
||||
"recording_url": "{{recording_url}}",
|
||||
"user_recording_url": "{{user_recording_url}}",
|
||||
"bot_recording_url": "{{bot_recording_url}}",
|
||||
"transcript_url": "{{transcript_url}}",
|
||||
},
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue