mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-07-04 10:52:17 +02:00
fix: fix interruption handling for Gemini Live
1. Fixes #236 2. Fix run_inference for variable extraction for Gemini Live
This commit is contained in:
parent
14e6f29f2f
commit
e31b38122e
12 changed files with 48 additions and 15 deletions
|
|
@ -120,9 +120,21 @@ class InMemoryLogsBuffer:
|
|||
f"Incremented turn counter to {self._turn_counter} for workflow {self._workflow_run_id}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _event_sort_key(event: dict) -> str:
|
||||
payload_ts = event.get("payload", {}).get("timestamp")
|
||||
return payload_ts or event.get("timestamp", "")
|
||||
|
||||
def _sorted_events(self) -> List[dict]:
|
||||
# Stable sort by the realtime (payload) timestamp when available, falling
|
||||
# back to the buffer-append timestamp. Python's sort is stable, so events
|
||||
# sharing a key retain their original insertion order — this keeps
|
||||
# consecutive bot-text chunks of a single turn contiguous.
|
||||
return sorted(self._events, key=self._event_sort_key)
|
||||
|
||||
def get_events(self) -> List[dict]:
|
||||
"""Get all events for final storage."""
|
||||
return self._events
|
||||
"""Get all events for final storage, ordered by realtime timestamp."""
|
||||
return self._sorted_events()
|
||||
|
||||
def contains_user_speech(self) -> bool:
|
||||
"""Return True if any final user transcription event has non-empty text."""
|
||||
|
|
@ -141,7 +153,7 @@ class InMemoryLogsBuffer:
|
|||
Filters for rtf-user-transcription (final) and rtf-bot-text events,
|
||||
formats them as '[timestamp] user/assistant: text\\n'.
|
||||
"""
|
||||
return _generate_transcript_text(self._events)
|
||||
return _generate_transcript_text(self._sorted_events())
|
||||
|
||||
def write_transcript_to_temp_file(self) -> Optional[str]:
|
||||
"""Write transcript to a temporary text file and return the path.
|
||||
|
|
|
|||
|
|
@ -616,10 +616,15 @@ async def _run_pipeline(
|
|||
llm = create_realtime_llm_service(user_config, audio_config)
|
||||
stt = None
|
||||
tts = None
|
||||
# Realtime services don't implement run_inference, so create a
|
||||
# separate text LLM for variable extraction and other out-of-band
|
||||
# inference calls.
|
||||
inference_llm = create_llm_service(user_config)
|
||||
else:
|
||||
stt = create_stt_service(user_config, audio_config, keyterms=keyterms)
|
||||
tts = create_tts_service(user_config, audio_config)
|
||||
llm = create_llm_service(user_config)
|
||||
inference_llm = None
|
||||
|
||||
workflow_graph = WorkflowGraph(ReactFlowDTO.model_validate(run_workflow_json))
|
||||
|
||||
|
|
@ -703,9 +708,15 @@ async def _run_pipeline(
|
|||
context_compaction_enabled = (workflow.workflow_configurations or {}).get(
|
||||
"context_compaction_enabled", False
|
||||
)
|
||||
# Context compaction doesn't apply in realtime mode: the speech-to-speech
|
||||
# service manages its own conversation state server-side.
|
||||
if is_realtime and context_compaction_enabled:
|
||||
logger.info("Disabling context_compaction_enabled for realtime workflow run")
|
||||
context_compaction_enabled = False
|
||||
|
||||
engine = PipecatEngine(
|
||||
llm=llm,
|
||||
inference_llm=inference_llm,
|
||||
workflow=workflow_graph,
|
||||
call_context_vars=merged_call_context_vars,
|
||||
workflow_run_id=workflow_run_id,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue