mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-16 08:25:18 +02:00
feat: simplify pipecat engine execution (#54)
This commit is contained in:
parent
99a768f291
commit
6ce25a589c
20 changed files with 52 additions and 1405 deletions
|
|
@ -24,6 +24,9 @@ from api.services.workflow.dto import ReactFlowDTO
|
|||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
from api.services.workflow.workflow import WorkflowGraph
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
)
|
||||
from pipecat.processors.filters.stt_mute_filter import (
|
||||
STTMuteConfig,
|
||||
STTMuteFilter,
|
||||
|
|
@ -83,7 +86,8 @@ class LoopTalkPipelineBuilder:
|
|||
audio_buffer, audio_synchronizer, transcript, context = (
|
||||
create_pipeline_components(audio_config)
|
||||
)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
context_aggregator = LLMContextAggregatorPair(context)
|
||||
|
||||
# Get workflow graph
|
||||
workflow_graph = WorkflowGraph(
|
||||
|
|
@ -113,7 +117,6 @@ class LoopTalkPipelineBuilder:
|
|||
pipeline_engine_callback_processor = PipelineEngineCallbacksProcessor(
|
||||
max_call_duration_seconds=300,
|
||||
max_duration_end_task_callback=engine.create_max_duration_callback(),
|
||||
llm_generated_text_callback=engine.create_llm_generated_text_callback(),
|
||||
generation_started_callback=engine.create_generation_started_callback(),
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -272,14 +272,6 @@ class LoopTalkTestOrchestrator:
|
|||
|
||||
await task.cancel()
|
||||
|
||||
# Connect the context aggregator events to engine
|
||||
@assistant_context_aggregator.event_handler("on_push_aggregation")
|
||||
async def on_assistant_aggregator_push_context(_aggregator):
|
||||
logger.debug(
|
||||
"Assistant aggregator push context – flushing pending transitions"
|
||||
)
|
||||
await engine.flush_pending_transitions()
|
||||
|
||||
# Register custom audio and transcript handlers for LoopTalk
|
||||
await self._register_looptalk_handlers(
|
||||
audio_synchronizer, transcript, test_session_id, role
|
||||
|
|
|
|||
|
|
@ -1,69 +0,0 @@
|
|||
"""Engine Pre-Aggregator Processor
|
||||
|
||||
This processor sits before the user context aggregator in the pipeline and handles
|
||||
engine-specific callbacks for frames that need to be processed before aggregation.
|
||||
This ensures the engine can update context before the aggregator generates LLM frames.
|
||||
"""
|
||||
|
||||
from typing import Awaitable, Callable, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from api.services.pipecat.exceptions import VoicemailDetectedException
|
||||
from pipecat.frames.frames import (
|
||||
Frame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
|
||||
|
||||
class EnginePreAggregatorProcessor(FrameProcessor):
|
||||
"""
|
||||
Processor that handles engine callbacks before user context aggregation.
|
||||
|
||||
This processor is positioned before the user context aggregator to ensure
|
||||
the engine can update LLM context before aggregation occurs.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_started_speaking_callback: Optional[Callable[[], Awaitable[None]]] = None,
|
||||
user_stopped_speaking_callback: Optional[Callable[[], Awaitable[None]]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self._user_started_speaking_callback = user_started_speaking_callback
|
||||
self._user_stopped_speaking_callback = user_stopped_speaking_callback
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
# Handle frames that need engine processing before aggregation
|
||||
if isinstance(frame, UserStartedSpeakingFrame):
|
||||
await self._handle_user_started_speaking()
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
try:
|
||||
await self._handle_user_stopped_speaking()
|
||||
except VoicemailDetectedException:
|
||||
# We have detected voicemail, lets not
|
||||
# forward the UserStoppedSpeakingFrame, so that
|
||||
# we don't issue an llm call from user context
|
||||
# aggregator
|
||||
logger.debug("Voicemail detected, not pushing UserStoppedSpeakingFrame")
|
||||
return
|
||||
|
||||
# Always push the frame downstream
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
async def _handle_user_started_speaking(self):
|
||||
"""Handle UserStartedSpeakingFrame before aggregation."""
|
||||
if self._user_started_speaking_callback:
|
||||
# logger.debug("Engine pre-aggregator: User started speaking")
|
||||
await self._user_started_speaking_callback()
|
||||
|
||||
async def _handle_user_stopped_speaking(self):
|
||||
"""Handle UserStoppedSpeakingFrame before aggregation."""
|
||||
if self._user_stopped_speaking_callback:
|
||||
# logger.debug("Engine pre-aggregator: User stopped speaking")
|
||||
await self._user_stopped_speaking_callback()
|
||||
|
|
@ -9,7 +9,7 @@ from api.constants import (
|
|||
from api.services.pipecat.audio_config import AudioConfig
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
|
||||
from pipecat.processors.audio.audio_synchronizer import AudioSynchronizer
|
||||
from pipecat.processors.transcript_processor import TranscriptProcessor
|
||||
|
|
@ -39,7 +39,7 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
|
|||
assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
|
||||
)
|
||||
|
||||
context = OpenAILLMContext()
|
||||
context = LLMContext()
|
||||
|
||||
return audio_buffer, audio_synchronizer, transcript, context
|
||||
|
||||
|
|
@ -58,7 +58,6 @@ def build_pipeline(
|
|||
stt_mute_filter,
|
||||
pipeline_metrics_aggregator,
|
||||
user_idle_disconnect,
|
||||
engine_pre_aggregator_processor=None,
|
||||
):
|
||||
"""Build the main pipeline with all components"""
|
||||
# Register processors with synchronizer for merged audio
|
||||
|
|
@ -69,16 +68,12 @@ def build_pipeline(
|
|||
processors = [
|
||||
transport.input(), # Transport user input
|
||||
audio_buffer.input(), # Record input audio (only processes InputAudioRawFrame)
|
||||
stt_mute_filter,
|
||||
stt, # STT can now have audio_passthrough=False
|
||||
stt_mute_filter, # STTMuteFilters don't let VAD related events pass through if muted
|
||||
user_idle_disconnect,
|
||||
transcript.user(),
|
||||
]
|
||||
|
||||
# Insert engine pre-aggregator processor if provided (before user aggregator)
|
||||
if engine_pre_aggregator_processor:
|
||||
processors.append(engine_pre_aggregator_processor)
|
||||
|
||||
processors.extend(
|
||||
[
|
||||
user_context_aggregator,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from pipecat.frames.frames import (
|
|||
Frame,
|
||||
HeartbeatFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMGeneratedTextFrame,
|
||||
LLMTextFrame,
|
||||
StartFrame,
|
||||
TTSSpeakFrame,
|
||||
|
|
@ -26,7 +25,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
|
|||
self,
|
||||
max_call_duration_seconds: int = 300,
|
||||
max_duration_end_task_callback: Optional[Callable[[], Awaitable[None]]] = None,
|
||||
llm_generated_text_callback: Optional[Callable[[], Awaitable[None]]] = None,
|
||||
generation_started_callback: Optional[Callable[[], Awaitable[None]]] = None,
|
||||
llm_text_frame_callback: Optional[Callable[[str], Awaitable[None]]] = None,
|
||||
):
|
||||
|
|
@ -34,7 +32,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
|
|||
self._start_time = None
|
||||
self._max_call_duration_seconds = max_call_duration_seconds
|
||||
self._max_duration_end_task_callback = max_duration_end_task_callback
|
||||
self._llm_generated_text_callback = llm_generated_text_callback
|
||||
self._generation_started_callback = generation_started_callback
|
||||
self._llm_text_frame_callback = llm_text_frame_callback
|
||||
self._end_task_frame_pushed = False
|
||||
|
|
@ -46,8 +43,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
|
|||
await self._start(frame)
|
||||
elif isinstance(frame, HeartbeatFrame):
|
||||
await self._check_call_duration()
|
||||
elif isinstance(frame, LLMGeneratedTextFrame):
|
||||
await self._generated_text_frame(frame)
|
||||
elif isinstance(frame, LLMFullResponseStartFrame):
|
||||
await self._generation_started()
|
||||
elif (
|
||||
|
|
@ -74,11 +69,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
|
|||
"Max call duration exceeded. Skipping EndTaskFrame since already sent"
|
||||
)
|
||||
|
||||
async def _generated_text_frame(self, _: LLMGeneratedTextFrame):
|
||||
"""Handle LLMGeneratedTextFrame."""
|
||||
if self._llm_generated_text_callback is not None:
|
||||
await self._llm_generated_text_callback()
|
||||
|
||||
async def _generation_started(self):
|
||||
if self._generation_started_callback:
|
||||
await self._generation_started_callback()
|
||||
|
|
|
|||
|
|
@ -7,9 +7,6 @@ from api.db import db_client
|
|||
from api.db.models import WorkflowModel
|
||||
from api.enums import WorkflowRunMode
|
||||
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
|
||||
from api.services.pipecat.engine_pre_aggregator_processor import (
|
||||
EnginePreAggregatorProcessor,
|
||||
)
|
||||
from api.services.pipecat.event_handlers import (
|
||||
register_audio_data_handler,
|
||||
register_task_event_handler,
|
||||
|
|
@ -43,6 +40,9 @@ from api.services.workflow.pipecat_engine import PipecatEngine
|
|||
from api.services.workflow.workflow import WorkflowGraph
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
|
||||
from pipecat.processors.aggregators.llm_response_universal import (
|
||||
LLMContextAggregatorPair,
|
||||
)
|
||||
from pipecat.processors.filters.stt_mute_filter import (
|
||||
STTMuteConfig,
|
||||
STTMuteFilter,
|
||||
|
|
@ -357,21 +357,14 @@ async def _run_pipeline(
|
|||
expect_stripped_words=True,
|
||||
correct_aggregation_callback=engine.create_aggregation_correction_callback(),
|
||||
)
|
||||
context_aggregator = llm.create_context_aggregator(
|
||||
context_aggregator = LLMContextAggregatorPair(
|
||||
context, assistant_params=assistant_params
|
||||
)
|
||||
|
||||
# Create engine pre-aggregator processor for speaking events
|
||||
engine_pre_aggregator_processor = EnginePreAggregatorProcessor(
|
||||
user_started_speaking_callback=engine.create_user_started_speaking_callback(),
|
||||
user_stopped_speaking_callback=engine.create_user_stopped_speaking_callback(),
|
||||
)
|
||||
|
||||
# Create usage metrics aggregator with engine's callback
|
||||
pipeline_engine_callback_processor = PipelineEngineCallbacksProcessor(
|
||||
max_call_duration_seconds=max_call_duration_seconds,
|
||||
max_duration_end_task_callback=engine.create_max_duration_callback(),
|
||||
llm_generated_text_callback=engine.create_llm_generated_text_callback(),
|
||||
generation_started_callback=engine.create_generation_started_callback(),
|
||||
llm_text_frame_callback=engine.handle_llm_text_frame,
|
||||
# Note: speaking event callbacks are now handled by pre-aggregator processor
|
||||
|
|
@ -398,11 +391,6 @@ async def _run_pipeline(
|
|||
user_context_aggregator = context_aggregator.user()
|
||||
assistant_context_aggregator = context_aggregator.assistant()
|
||||
|
||||
@assistant_context_aggregator.event_handler("on_push_aggregation")
|
||||
async def on_assistant_aggregator_push_context(_aggregator):
|
||||
logger.debug("Assistant aggregator push context – flushing pending transitions")
|
||||
await engine.flush_pending_transitions(source="context_push")
|
||||
|
||||
# Build the pipeline with the STT mute filter and context controller
|
||||
pipeline = build_pipeline(
|
||||
transport,
|
||||
|
|
@ -418,7 +406,6 @@ async def _run_pipeline(
|
|||
stt_mute_filter,
|
||||
pipeline_metrics_aggregator,
|
||||
user_idle_disconnect,
|
||||
engine_pre_aggregator_processor=engine_pre_aggregator_processor,
|
||||
)
|
||||
|
||||
# Create pipeline task with audio configuration
|
||||
|
|
|
|||
|
|
@ -14,14 +14,14 @@ from pipecat.frames.frames import (
|
|||
CancelFrame,
|
||||
EndFrame,
|
||||
FunctionCallResultProperties,
|
||||
LLMContextFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
TTSSpeakFrame,
|
||||
)
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.openai.llm import OpenAILLMContext
|
||||
from pipecat.transports.base_transport import BaseTransport
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
|
||||
|
|
@ -63,7 +63,7 @@ class PipecatEngine:
|
|||
*,
|
||||
task: Optional[PipelineTask] = None,
|
||||
llm: Optional["LLMService"] = None,
|
||||
context: Optional[OpenAILLMContext] = None,
|
||||
context: Optional[LLMContext] = None,
|
||||
tts: Optional[Any] = None,
|
||||
transport: Optional[BaseTransport] = None,
|
||||
workflow: WorkflowGraph,
|
||||
|
|
@ -82,7 +82,6 @@ class PipecatEngine:
|
|||
self._workflow_run_id = workflow_run_id
|
||||
self._initialized = False
|
||||
self._client_disconnected = False
|
||||
self._pending_function_calls = 0
|
||||
self._current_node: Optional[Node] = None
|
||||
self._gathered_context: dict = {}
|
||||
self._user_response_timeout_task: Optional[asyncio.Task] = None
|
||||
|
|
@ -102,29 +101,9 @@ class PipecatEngine:
|
|||
self._voicemail_detector = None
|
||||
self._voicemail_detection_task: Optional[asyncio.Task] = None
|
||||
|
||||
# This transition is generated by the llm as part of tool call. This can
|
||||
# also be accompanied with some content which can be played using TTS. If the
|
||||
# bot is interrupted, we would cancel this transition (we do cancel this currently when
|
||||
# the next generation starts in handle_generation_started callback handler.)
|
||||
self._pending_generated_transition_after_context_push: Optional[
|
||||
Callable[[], Awaitable[None]]
|
||||
] = None
|
||||
|
||||
# This is the transtion which is typically programmatic transition, and not goes as
|
||||
# tool call to LLM. This is not interrupted by the user and is done on context push
|
||||
self._pending_control_transition_after_context_push: Optional[
|
||||
Callable[[], Awaitable[None]]
|
||||
] = None
|
||||
|
||||
# Flag to determine if the current llm generation has a text completion
|
||||
self._defer_context_push: bool = False
|
||||
|
||||
# Lazy loaded built-in function schemas
|
||||
self._builtin_function_schemas: Optional[list[dict]] = None
|
||||
|
||||
# Flag to control whether to queue context frame
|
||||
self._queue_context_frame: bool = True
|
||||
|
||||
# Track current LLM reference text for TTS aggregation correction
|
||||
self._current_llm_reference_text: str = ""
|
||||
|
||||
|
|
@ -211,23 +190,15 @@ class PipecatEngine:
|
|||
|
||||
async def _create_transition_func(self, name: str, transition_to_node: str):
|
||||
async def transition_func(function_call_params: FunctionCallParams) -> None:
|
||||
"""Inner function that handles the actual tool invocation."""
|
||||
"""Inner function that handles the node change tool calls"""
|
||||
try:
|
||||
# Track pending function call
|
||||
self._pending_function_calls += 1
|
||||
logger.debug(
|
||||
f"Function call pending: {function_call_params.function_name} (total: {self._pending_function_calls})"
|
||||
)
|
||||
|
||||
# For edge functions, prevent LLM completion until transition (run_llm=False)
|
||||
# For node functions, allow immediate completion (run_llm=True)
|
||||
async def on_context_updated() -> None:
|
||||
"""
|
||||
Framework will run this function after the function call result has been updated in the context.
|
||||
pipecat framework will run this function after the function call result has been updated in the context.
|
||||
This way, when we do set_node from within this function, and go for LLM completion with updated
|
||||
system prompts, the context is updated with function call result.
|
||||
"""
|
||||
self._pending_function_calls -= 1
|
||||
# Perform variable extraction before transitioning to new node
|
||||
await self._perform_variable_extraction_if_needed(
|
||||
self._current_node
|
||||
|
|
@ -241,41 +212,14 @@ class PipecatEngine:
|
|||
on_context_updated=on_context_updated,
|
||||
)
|
||||
|
||||
async def _invoke_result_callback():
|
||||
"""
|
||||
Functions are executed immediately when they come from LLM as part of text completion.
|
||||
But, if the LLM completion also has some text, we would want to not call the function if the user interrupts the speech.
|
||||
We would also not want the function to be added to context, so that the LLM can call the function again. Hence, we
|
||||
defer the function invocation until we receive on_context_updated callback, i.e the bot has finished speaking
|
||||
the text that was generated.
|
||||
"""
|
||||
await function_call_params.result_callback(
|
||||
result, properties=properties
|
||||
)
|
||||
|
||||
if self._defer_context_push:
|
||||
"""
|
||||
We set the flag to _defer_context_push when we receive text in the current generation from LLM.
|
||||
This is set in the handle_llm_generated_text callback handler.
|
||||
"""
|
||||
logger.debug(
|
||||
"Deferring transition function result until context push"
|
||||
)
|
||||
# Only one deferred transition should exist at any time.
|
||||
# Overwrite if one is somehow already set (unexpected).
|
||||
self._pending_generated_transition_after_context_push = (
|
||||
_invoke_result_callback
|
||||
)
|
||||
else:
|
||||
"""
|
||||
If there was no text in the current generation, and we only had function call,
|
||||
lets invoke the result callback, so that framework can call on_context_updated and
|
||||
we can do switch node.
|
||||
"""
|
||||
await _invoke_result_callback()
|
||||
# Call results callback from the pipecat framework
|
||||
# so that a new llm generation can be triggred if
|
||||
# required
|
||||
await function_call_params.result_callback(
|
||||
result, properties=properties
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in transition function {name}: {str(e)}")
|
||||
self._pending_function_calls = 0
|
||||
error_result = {"status": "error", "error": str(e)}
|
||||
await function_call_params.result_callback(error_result)
|
||||
|
||||
|
|
@ -362,27 +306,6 @@ class PipecatEngine:
|
|||
]
|
||||
)
|
||||
|
||||
async def _setup_static_start_node_transition(self, node: Node) -> None:
|
||||
"""Set up the deferred transition for static start nodes."""
|
||||
if not node.out_edges:
|
||||
return
|
||||
|
||||
next_node_id = node.out_edges[0].target
|
||||
|
||||
if not node.wait_for_user_response:
|
||||
# Normal static start node - transition immediately after context push
|
||||
async def _deferred_static_transition():
|
||||
try:
|
||||
await self.set_node(next_node_id)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
f"Error executing deferred static node transition to {next_node_id}: {exc}"
|
||||
)
|
||||
|
||||
self._pending_control_transition_after_context_push = (
|
||||
_deferred_static_transition
|
||||
)
|
||||
|
||||
async def _perform_variable_extraction_if_needed(
|
||||
self, previous_node: Optional[Node]
|
||||
) -> None:
|
||||
|
|
@ -441,17 +364,7 @@ class PipecatEngine:
|
|||
functions,
|
||||
) = await self._compose_system_message_functions_for_node(node)
|
||||
await self._update_llm_context(system_message, functions)
|
||||
|
||||
# Queue context frame if needed
|
||||
if self._queue_context_frame:
|
||||
await self.task.queue_frame(OpenAILLMContextFrame(self.context))
|
||||
else:
|
||||
logger.debug(
|
||||
f"Not queueing context frame for node: {node.name} as _queue_context_frame is False"
|
||||
)
|
||||
|
||||
# Reset _queue_context_frame as default behavior
|
||||
self._queue_context_frame = True
|
||||
await self.task.queue_frame(LLMContextFrame(self.context))
|
||||
|
||||
async def set_node(self, node_id: str):
|
||||
"""
|
||||
|
|
@ -525,12 +438,7 @@ class PipecatEngine:
|
|||
await asyncio.sleep(delay_duration)
|
||||
|
||||
if node.is_static:
|
||||
# Queue TTS for static start node
|
||||
formatted_prompt = self._format_prompt(node.prompt)
|
||||
await self._queue_tts_response(formatted_prompt)
|
||||
|
||||
# Set up deferred transition for static start nodes
|
||||
await self._setup_static_start_node_transition(node)
|
||||
raise ValueError("Static nodes are not supported!")
|
||||
else:
|
||||
# Start generation for non-static start node
|
||||
await self._setup_llm_context_and_start_generation(node)
|
||||
|
|
@ -538,66 +446,24 @@ class PipecatEngine:
|
|||
async def _handle_end_node(self, node: Node) -> None:
|
||||
"""Handle end node execution."""
|
||||
if node.is_static:
|
||||
# Queue TTS for static end node
|
||||
formatted_prompt = self._format_prompt(node.prompt)
|
||||
await self._queue_tts_response(formatted_prompt)
|
||||
raise ValueError("Static nodes are not supported!")
|
||||
else:
|
||||
# Start generation for non-static end node
|
||||
await self._setup_llm_context_and_start_generation(node)
|
||||
|
||||
# If this end node has extraction enabled, perform extraction immediately
|
||||
if node.extraction_enabled and node.extraction_variables:
|
||||
await self._perform_variable_extraction_if_needed(node)
|
||||
|
||||
# TODO: Extract disposition code from extracted variables
|
||||
# Defer send_end_task_frame using _pending_control_transition_after_context_push
|
||||
|
||||
# Decide the end-task reason dynamically depending on call_disposition.
|
||||
async def _deferred_end_task():
|
||||
# call_disposition is the disposition which is generated from
|
||||
# llm call based on the conversation so far.
|
||||
# TODO: Make this more generic based on configuration or llm prompting
|
||||
disposition = self._gathered_context.get("call_disposition")
|
||||
if disposition == "XFER":
|
||||
reason = EndTaskReason.USER_QUALIFIED.value
|
||||
else:
|
||||
reason = EndTaskReason.USER_DISQUALIFIED.value
|
||||
await self.send_end_task_frame(reason)
|
||||
|
||||
self._pending_control_transition_after_context_push = _deferred_end_task
|
||||
await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
|
||||
|
||||
async def _handle_agent_node(self, node: Node) -> None:
|
||||
"""Handle agent node execution."""
|
||||
if node.is_static:
|
||||
# Queue TTS for static agent node
|
||||
formatted_prompt = self._format_prompt(node.prompt)
|
||||
await self._queue_tts_response(formatted_prompt)
|
||||
|
||||
# Set up deferred transition for static agent nodes
|
||||
await self._setup_agent_node_transition(node)
|
||||
raise ValueError("Static nodes are not supported!")
|
||||
else:
|
||||
# Set context and functions for non-static agent node
|
||||
await self._setup_llm_context_and_start_generation(node)
|
||||
|
||||
async def _setup_agent_node_transition(self, node: Node) -> None:
|
||||
"""Set up the deferred transition for static agent nodes."""
|
||||
if not node.out_edges:
|
||||
return
|
||||
|
||||
next_node_id = node.out_edges[0].target
|
||||
|
||||
async def _deferred_static_transition():
|
||||
try:
|
||||
await self.set_node(next_node_id)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
f"Error executing deferred static node transition to {next_node_id}: {exc}"
|
||||
)
|
||||
|
||||
self._pending_control_transition_after_context_push = (
|
||||
_deferred_static_transition
|
||||
)
|
||||
|
||||
async def send_end_task_frame(
|
||||
self,
|
||||
reason: str,
|
||||
|
|
@ -640,7 +506,7 @@ class PipecatEngine:
|
|||
# Store the mapped disconnect reason
|
||||
self._gathered_context["call_disposition"] = mapped_disposition
|
||||
|
||||
# TODO: Generalise this, currently tailored to Kapil's use case
|
||||
# TODO: Generalise this
|
||||
self._gathered_context["address"] = ", ".join(
|
||||
[
|
||||
self._call_context_vars.get("address1", ""),
|
||||
|
|
@ -759,55 +625,6 @@ class PipecatEngine:
|
|||
|
||||
return system_message, functions
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Pending transition handling
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def flush_pending_transitions(self, *, source: str = "context_push"):
|
||||
"""Execute and clear any pending transitions.
|
||||
|
||||
Args:
|
||||
source: Indicates the trigger that caused this flush:
|
||||
- "context_push": the assistant context aggregator completed a push.
|
||||
"""
|
||||
|
||||
if source != "context_push":
|
||||
raise ValueError("Invalid flush source – expected 'context_push'")
|
||||
|
||||
len_pending_functions = 0
|
||||
|
||||
if self._pending_generated_transition_after_context_push is not None:
|
||||
len_pending_functions += 1
|
||||
if self._pending_control_transition_after_context_push is not None:
|
||||
len_pending_functions += 1
|
||||
|
||||
# Nothing to do
|
||||
if len_pending_functions == 0:
|
||||
return
|
||||
|
||||
logger.debug(
|
||||
f"Flushing {len_pending_functions} pending transition(s) after {source.replace('_', ' ')}"
|
||||
)
|
||||
|
||||
# Generated transition
|
||||
if self._pending_generated_transition_after_context_push is not None:
|
||||
pending_cb = self._pending_generated_transition_after_context_push
|
||||
self._pending_generated_transition_after_context_push = None
|
||||
try:
|
||||
await pending_cb()
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.error(f"Error executing deferred transition: {exc}")
|
||||
|
||||
# Control transition (context push)
|
||||
if self._pending_control_transition_after_context_push is not None:
|
||||
logger.debug("Executing control transition after context push")
|
||||
static_cb = self._pending_control_transition_after_context_push
|
||||
self._pending_control_transition_after_context_push = None
|
||||
try:
|
||||
await static_cb()
|
||||
except Exception as exc: # pragma: no cover
|
||||
logger.error(f"Error executing deferred static node transition: {exc}")
|
||||
|
||||
def create_should_mute_callback(self) -> Callable[[STTMuteFilter], Awaitable[bool]]:
|
||||
"""
|
||||
This callback is called by STTMuteFilter to determine if the STT should be muted.
|
||||
|
|
@ -828,15 +645,6 @@ class PipecatEngine:
|
|||
"""
|
||||
return engine_callbacks.create_max_duration_callback(self)
|
||||
|
||||
def create_llm_generated_text_callback(self):
|
||||
"""
|
||||
This callback is called when some text is generated by the LLM.
|
||||
We use this to defer the result_callback of the node transition functions if
|
||||
there is set_node called along with some text generated. This way, we will
|
||||
have the context sent in the next generation from new node.
|
||||
"""
|
||||
return engine_callbacks.create_llm_generated_text_callback(self)
|
||||
|
||||
def create_generation_started_callback(self):
|
||||
"""
|
||||
This callback is called when a new generation starts.
|
||||
|
|
@ -844,26 +652,12 @@ class PipecatEngine:
|
|||
"""
|
||||
return engine_callbacks.create_generation_started_callback(self)
|
||||
|
||||
def create_user_stopped_speaking_callback(self):
|
||||
"""
|
||||
This callback is called when the user stops speaking.
|
||||
We use this to handle transitions when wait_for_user_response is enabled.
|
||||
"""
|
||||
return engine_callbacks.create_user_stopped_speaking_callback(self)
|
||||
|
||||
def create_user_started_speaking_callback(self):
|
||||
"""
|
||||
This callback is called when the user starts speaking.
|
||||
We use this to handle wait_for_user_greeting functionality.
|
||||
"""
|
||||
return engine_callbacks.create_user_started_speaking_callback(self)
|
||||
|
||||
def create_aggregation_correction_callback(self) -> Callable[[str], str]:
|
||||
"""Create a callback that corrects corrupted aggregation using reference text."""
|
||||
return engine_callbacks.create_aggregation_correction_callback(self)
|
||||
|
||||
def set_context(self, context: OpenAILLMContext) -> None:
|
||||
"""Set the OpenAI LLM context.
|
||||
def set_context(self, context: LLMContext) -> None:
|
||||
"""Set the LLM context.
|
||||
|
||||
This allows setting the context after the engine has been created,
|
||||
which is useful when the context needs to be created after the engine.
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import re
|
|||
from typing import TYPE_CHECKING, Awaitable, Callable
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.frames.frames import (
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
|
|
@ -23,9 +24,8 @@ from pipecat.processors.filters.stt_mute_filter import STTMuteFilter
|
|||
from pipecat.utils.enums import EndTaskReason
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
|
||||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
from pipecat.processors.user_idle_processor import UserIdleProcessor
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -114,23 +114,6 @@ def create_max_duration_callback(engine: "PipecatEngine"):
|
|||
return handle_max_duration
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM-generated-text handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def create_llm_generated_text_callback(engine: "PipecatEngine"):
|
||||
"""Return a callback invoked when the LLM emits text (not only tool calls)."""
|
||||
|
||||
async def handle_llm_generated_text(): # noqa: D401
|
||||
logger.debug(
|
||||
"Generation has text content in current response - deferring context push from set_node"
|
||||
)
|
||||
engine._defer_context_push = True
|
||||
|
||||
return handle_llm_generated_text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Generation-started handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -140,96 +123,13 @@ def create_generation_started_callback(engine: "PipecatEngine"):
|
|||
"""Return a callback that resets flags at the start of each LLM generation."""
|
||||
|
||||
async def handle_generation_started(): # noqa: D401
|
||||
logger.debug("LLM generation started - resetting defer flags and tool counters")
|
||||
engine._defer_context_push = False
|
||||
engine._pending_function_calls = 0
|
||||
engine._pending_generated_transition_after_context_push = None
|
||||
logger.debug("LLM generation started in callback processor")
|
||||
# Clear reference text from previous generation
|
||||
engine._current_llm_reference_text = ""
|
||||
|
||||
return handle_generation_started
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# User-stopped-speaking handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def create_user_stopped_speaking_callback(engine: "PipecatEngine"):
|
||||
"""Return a callback that handles when the user stops speaking.
|
||||
|
||||
According to simplified flow:
|
||||
- For start nodes with wait_for_user_response=True:
|
||||
- Cancel timeout task if still active
|
||||
- Transition to next node with _queue_context_frame=False
|
||||
"""
|
||||
|
||||
async def handle_user_stopped_speaking():
|
||||
# Only handle if current node is a start node with wait_for_user_response
|
||||
if (
|
||||
engine._current_node
|
||||
and engine._current_node.is_start
|
||||
and engine._current_node.wait_for_user_response
|
||||
and engine._current_node.out_edges
|
||||
):
|
||||
# Cancel timeout task if it's still active
|
||||
if (
|
||||
engine._user_response_timeout_task
|
||||
and not engine._user_response_timeout_task.done()
|
||||
):
|
||||
logger.debug("Cancelling user response timeout - user responded")
|
||||
engine._user_response_timeout_task.cancel()
|
||||
engine._user_response_timeout_task = None
|
||||
|
||||
# Transition to next node
|
||||
next_node_id = engine._current_node.out_edges[0].target
|
||||
logger.debug(
|
||||
f"User stopped speaking after wait_for_user_response - transitioning to: {next_node_id}"
|
||||
)
|
||||
|
||||
# Set flag to not queue context frame since
|
||||
# it will be pushed by user context aggregator
|
||||
# we are just setting the context with next node's
|
||||
# functions and prompts
|
||||
engine._queue_context_frame = False
|
||||
|
||||
# Transition to next node
|
||||
await engine.set_node(next_node_id)
|
||||
|
||||
return handle_user_stopped_speaking
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# User-started-speaking handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def create_user_started_speaking_callback(engine: "PipecatEngine"):
|
||||
"""Return a callback that handles when the user starts speaking.
|
||||
|
||||
According to simplified flow:
|
||||
- For start nodes with wait_for_user_response=True:
|
||||
- Cancel the timeout timer if it exists (but don't set to None)
|
||||
"""
|
||||
|
||||
async def handle_user_started_speaking():
|
||||
# Only handle if current node is a start node with wait_for_user_response
|
||||
if (
|
||||
engine._current_node
|
||||
and engine._current_node.is_start
|
||||
and engine._current_node.wait_for_user_response
|
||||
and engine._user_response_timeout_task
|
||||
and not engine._user_response_timeout_task.done()
|
||||
):
|
||||
logger.debug(
|
||||
"User started speaking during wait_for_user_response - cancelling timeout timer"
|
||||
)
|
||||
engine._user_response_timeout_task.cancel()
|
||||
# Don't set to None here - let user_stopped_speaking handle the transition
|
||||
|
||||
return handle_user_started_speaking
|
||||
|
||||
|
||||
def create_aggregation_correction_callback(engine: "PipecatEngine"):
|
||||
"""Create a callback that uses engine's reference text to correct corrupted aggregation."""
|
||||
|
||||
|
|
|
|||
|
|
@ -2,16 +2,10 @@ from __future__ import annotations
|
|||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from google.genai.types import (
|
||||
Content,
|
||||
Part,
|
||||
)
|
||||
from api.utils.template_renderer import render_template
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.services.google.llm import GoogleLLMContext
|
||||
from pipecat.services.openai.llm import OpenAILLMContext
|
||||
|
||||
from api.utils.template_renderer import render_template
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
|
||||
__all__ = [
|
||||
"get_function_schema",
|
||||
|
|
@ -44,7 +38,7 @@ def get_function_schema(
|
|||
|
||||
|
||||
def update_llm_context(
|
||||
context: OpenAILLMContext,
|
||||
context: LLMContext,
|
||||
system_message: Dict[str, Any],
|
||||
functions: List[FunctionSchema],
|
||||
) -> None:
|
||||
|
|
@ -59,21 +53,6 @@ def update_llm_context(
|
|||
# associated with the current LLM service can convert them to the correct
|
||||
# provider-specific representation when required.
|
||||
tools_schema = ToolsSchema(standard_tools=functions)
|
||||
|
||||
if isinstance(context, GoogleLLMContext):
|
||||
context.system_message = system_message["content"]
|
||||
|
||||
if functions:
|
||||
# Lets only call set_tools if we have functions, else Gemini will
|
||||
# throw an exception
|
||||
context.set_tools(tools_schema)
|
||||
|
||||
if context.messages[-1].role != "user":
|
||||
# Google expects the last message should end with user message
|
||||
context.add_message(Content(role="user", parts=[Part(text="...")]))
|
||||
return
|
||||
|
||||
# In case of OpenAILLMContext, replace the system message with incoming system message
|
||||
previous_interactions = context.messages
|
||||
|
||||
# Filter out old system messages but keep user/assistant/function content.
|
||||
|
|
|
|||
|
|
@ -7,11 +7,11 @@ from typing import TYPE_CHECKING, Any, List
|
|||
from loguru import logger
|
||||
from openai import AsyncOpenAI
|
||||
from opentelemetry import trace
|
||||
from pipecat.services.openai.llm import OpenAILLMContext
|
||||
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
|
||||
|
||||
from api.services.pipecat.tracing_config import is_tracing_enabled
|
||||
from api.services.workflow.dto import ExtractionVariableDTO
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
|
|
@ -139,7 +139,7 @@ class VariableExtractionManager:
|
|||
f"{conversation_history}"
|
||||
)
|
||||
|
||||
extraction_context = OpenAILLMContext()
|
||||
extraction_context = LLMContext()
|
||||
extraction_messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_prompt},
|
||||
|
|
@ -171,7 +171,7 @@ class VariableExtractionManager:
|
|||
service_name="OpenAILLMService",
|
||||
model=self._model,
|
||||
operation_name="variable_extraction",
|
||||
messages=json.dumps(extraction_messages),
|
||||
messages=extraction_messages,
|
||||
output=llm_response,
|
||||
stream=False,
|
||||
parameters={"temperature": 0.0, "response_format": "json_object"},
|
||||
|
|
|
|||
|
|
@ -44,8 +44,6 @@ class Node:
|
|||
self.extraction_prompt = data.extraction_prompt
|
||||
self.extraction_variables = data.extraction_variables
|
||||
self.add_global_prompt = data.add_global_prompt
|
||||
self.wait_for_user_response = data.wait_for_user_response
|
||||
self.wait_for_user_response_timeout = data.wait_for_user_response_timeout
|
||||
self.detect_voicemail = data.detect_voicemail
|
||||
self.delayed_start = data.delayed_start
|
||||
self.delayed_start_duration = data.delayed_start_duration
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue