dograh/api/services/pipecat/in_memory_buffers.py

179 lines
6.6 KiB
Python
Raw Normal View History

2025-09-09 14:37:32 +05:30
import asyncio
import tempfile
import wave
from datetime import UTC, datetime
from typing import List, Optional
2025-09-09 14:37:32 +05:30
from loguru import logger
from api.services.pipecat.realtime_feedback_events import (
realtime_feedback_event_sort_key,
stamp_realtime_feedback_event,
)
2026-03-11 17:57:04 +05:30
from api.utils.transcript import generate_transcript_text as _generate_transcript_text
from pipecat.utils.enums import RealtimeFeedbackType
2025-09-09 14:37:32 +05:30
class InMemoryAudioBuffer:
"""Buffer audio data in memory during a call, then write to temp file on disconnect."""
def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
self._workflow_run_id = workflow_run_id
self._sample_rate = sample_rate
self._num_channels = num_channels
self._chunks: List[bytes] = []
self._lock = asyncio.Lock()
self._total_size = 0
self._max_size = 100 * 1024 * 1024 # 100MB limit
async def append(self, pcm_data: bytes):
"""Append PCM audio data to the buffer."""
async with self._lock:
if self._total_size + len(pcm_data) > self._max_size:
logger.error(
f"Audio buffer size limit exceeded for workflow {self._workflow_run_id}. "
f"Current: {self._total_size}, Attempted to add: {len(pcm_data)}"
)
raise MemoryError("Audio buffer size limit exceeded")
self._chunks.append(pcm_data)
self._total_size += len(pcm_data)
logger.trace(
f"Appended {len(pcm_data)} bytes to audio buffer. Total size: {self._total_size}"
)
async def write_to_temp_file(self) -> str:
"""Write audio data to a temporary WAV file and return the path."""
async with self._lock:
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
logger.debug(
f"Writing audio buffer to temp file {temp_file.name} for workflow {self._workflow_run_id}"
)
# Write WAV header and PCM data
with wave.open(temp_file.name, "wb") as wf:
wf.setnchannels(self._num_channels)
wf.setsampwidth(2) # 16-bit audio
wf.setframerate(self._sample_rate)
# Concatenate all chunks
for chunk in self._chunks:
wf.writeframes(chunk)
logger.info(
f"Successfully wrote {self._total_size} bytes of audio to {temp_file.name}"
)
return temp_file.name
@property
def is_empty(self) -> bool:
"""Check if the buffer is empty."""
return len(self._chunks) == 0
@property
def size(self) -> int:
"""Get the total size of buffered data."""
return self._total_size
class InMemoryLogsBuffer:
"""Buffer real-time feedback events in memory during a call, then save to workflow run logs."""
def __init__(self, workflow_run_id: int):
self._workflow_run_id = workflow_run_id
self._events: List[dict] = []
self._turn_counter = 0
self._current_node_id: Optional[str] = None
self._current_node_name: Optional[str] = None
def set_current_node(self, node_id: str, node_name: str):
"""Set the current node ID and name to be injected into subsequent events."""
self._current_node_id = node_id
self._current_node_name = node_name
@property
def current_node_id(self) -> Optional[str]:
"""Get the current node ID."""
return self._current_node_id
@property
def current_node_name(self) -> Optional[str]:
"""Get the current node name."""
return self._current_node_name
async def append(self, event: dict):
"""Append a feedback event to the buffer with timestamp and current node."""
timestamped_event = stamp_realtime_feedback_event(
event,
timestamp=datetime.now(UTC).isoformat(),
turn=self._turn_counter,
node_id=self._current_node_id,
node_name=self._current_node_name,
)
self._events.append(timestamped_event)
logger.trace(
f"Appended event {event.get('type')} to logs buffer for workflow {self._workflow_run_id}"
)
def increment_turn(self):
"""Increment turn counter (called on user transcription completion)."""
self._turn_counter += 1
logger.trace(
f"Incremented turn counter to {self._turn_counter} for workflow {self._workflow_run_id}"
)
def _sorted_events(self) -> List[dict]:
# Stable sort by the realtime (payload) timestamp when available, falling
# back to the buffer-append timestamp. Python's sort is stable, so events
# sharing a key retain their original insertion order — this keeps
# consecutive bot-text chunks of a single turn contiguous.
return sorted(self._events, key=realtime_feedback_event_sort_key)
def get_events(self) -> List[dict]:
"""Get all events for final storage, ordered by realtime timestamp."""
return self._sorted_events()
def contains_user_speech(self) -> bool:
"""Return True if any final user transcription event has non-empty text."""
for event in self._events:
if (
event.get("type") == RealtimeFeedbackType.USER_TRANSCRIPTION.value
and event.get("payload", {}).get("final") is True
and event.get("payload", {}).get("text")
):
return True
return False
def generate_transcript_text(self) -> str:
"""Generate transcript text from logged events.
Filters for rtf-user-transcription (final) and rtf-bot-text events,
2026-03-11 17:57:04 +05:30
formats them as '[timestamp] user/assistant: text\\n'.
"""
return _generate_transcript_text(self._sorted_events())
def write_transcript_to_temp_file(self) -> Optional[str]:
"""Write transcript to a temporary text file and return the path.
Returns None if there are no transcript events.
"""
content = self.generate_transcript_text()
if not content:
return None
temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
logger.debug(
f"Writing transcript to temp file {temp_file.name} for workflow {self._workflow_run_id}"
)
temp_file.write(content)
temp_file.close()
logger.info(
f"Successfully wrote {len(content)} chars of transcript to {temp_file.name}"
)
return temp_file.name
@property
def is_empty(self) -> bool:
"""Check if the buffer is empty."""
return len(self._events) == 0