dograh/api/services/pipecat/audio_transcript_buffers.py

123 lines
4.4 KiB
Python
Raw Normal View History

2025-09-09 14:37:32 +05:30
import asyncio
import re
import tempfile
import wave
from typing import List
from loguru import logger
class InMemoryAudioBuffer:
"""Buffer audio data in memory during a call, then write to temp file on disconnect."""
def __init__(self, workflow_run_id: int, sample_rate: int, num_channels: int = 1):
self._workflow_run_id = workflow_run_id
self._sample_rate = sample_rate
self._num_channels = num_channels
self._chunks: List[bytes] = []
self._lock = asyncio.Lock()
self._total_size = 0
self._max_size = 100 * 1024 * 1024 # 100MB limit
async def append(self, pcm_data: bytes):
"""Append PCM audio data to the buffer."""
async with self._lock:
if self._total_size + len(pcm_data) > self._max_size:
logger.error(
f"Audio buffer size limit exceeded for workflow {self._workflow_run_id}. "
f"Current: {self._total_size}, Attempted to add: {len(pcm_data)}"
)
raise MemoryError("Audio buffer size limit exceeded")
self._chunks.append(pcm_data)
self._total_size += len(pcm_data)
logger.trace(
f"Appended {len(pcm_data)} bytes to audio buffer. Total size: {self._total_size}"
)
async def write_to_temp_file(self) -> str:
"""Write audio data to a temporary WAV file and return the path."""
async with self._lock:
temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
logger.debug(
f"Writing audio buffer to temp file {temp_file.name} for workflow {self._workflow_run_id}"
)
# Write WAV header and PCM data
with wave.open(temp_file.name, "wb") as wf:
wf.setnchannels(self._num_channels)
wf.setsampwidth(2) # 16-bit audio
wf.setframerate(self._sample_rate)
# Concatenate all chunks
for chunk in self._chunks:
wf.writeframes(chunk)
logger.info(
f"Successfully wrote {self._total_size} bytes of audio to {temp_file.name}"
)
return temp_file.name
@property
def is_empty(self) -> bool:
"""Check if the buffer is empty."""
return len(self._chunks) == 0
@property
def size(self) -> int:
"""Get the total size of buffered data."""
return self._total_size
class InMemoryTranscriptBuffer:
"""Buffer transcript data in memory during a call, then write to temp file on disconnect."""
# Compiled regex to identify user speech lines, e.g.
# [2025-06-29T12:34:56.789+00:00] user: hello
_USER_SPEECH_RE: re.Pattern[str] = re.compile(
r"^\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}\+\d{2}:\d{2}\] user: .+"
)
def __init__(self, workflow_run_id: int):
self._workflow_run_id = workflow_run_id
self._lines: List[str] = []
self._lock = asyncio.Lock()
async def append(self, transcript: str):
"""Append transcript text to the buffer."""
async with self._lock:
self._lines.append(transcript)
logger.trace(
f"Appended transcript line to buffer for workflow {self._workflow_run_id}"
)
async def write_to_temp_file(self) -> str:
"""Write transcript to a temporary text file and return the path."""
async with self._lock:
temp_file = tempfile.NamedTemporaryFile(
mode="w", suffix=".txt", delete=False
)
logger.debug(
f"Writing transcript buffer to temp file {temp_file.name} for workflow {self._workflow_run_id}"
)
content = "".join(self._lines)
temp_file.write(content)
temp_file.close()
logger.info(
f"Successfully wrote {len(content)} chars of transcript to {temp_file.name}"
)
return temp_file.name
@property
def is_empty(self) -> bool:
"""Check if the buffer is empty."""
return len(self._lines) == 0
def contains_user_speech(self) -> bool:
"""Return True if any buffered transcript line matches the user speech pattern."""
for line in self._lines:
if self._USER_SPEECH_RE.match(line):
return True
return False