feat: simplify pipecat engine execution (#54)

This commit is contained in:
Abhishek 2025-11-15 17:38:27 +05:30 committed by GitHub
parent 99a768f291
commit 6ce25a589c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 52 additions and 1405 deletions

View file

@ -24,6 +24,9 @@ from api.services.workflow.dto import ReactFlowDTO
from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow.workflow import WorkflowGraph
from pipecat.pipeline.pipeline import Pipeline
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
)
from pipecat.processors.filters.stt_mute_filter import (
STTMuteConfig,
STTMuteFilter,
@ -83,7 +86,8 @@ class LoopTalkPipelineBuilder:
audio_buffer, audio_synchronizer, transcript, context = (
create_pipeline_components(audio_config)
)
context_aggregator = llm.create_context_aggregator(context)
context_aggregator = LLMContextAggregatorPair(context)
# Get workflow graph
workflow_graph = WorkflowGraph(
@ -113,7 +117,6 @@ class LoopTalkPipelineBuilder:
pipeline_engine_callback_processor = PipelineEngineCallbacksProcessor(
max_call_duration_seconds=300,
max_duration_end_task_callback=engine.create_max_duration_callback(),
llm_generated_text_callback=engine.create_llm_generated_text_callback(),
generation_started_callback=engine.create_generation_started_callback(),
)

View file

@ -272,14 +272,6 @@ class LoopTalkTestOrchestrator:
await task.cancel()
# Connect the context aggregator events to engine
@assistant_context_aggregator.event_handler("on_push_aggregation")
async def on_assistant_aggregator_push_context(_aggregator):
logger.debug(
"Assistant aggregator push context flushing pending transitions"
)
await engine.flush_pending_transitions()
# Register custom audio and transcript handlers for LoopTalk
await self._register_looptalk_handlers(
audio_synchronizer, transcript, test_session_id, role

View file

@ -1,69 +0,0 @@
"""Engine Pre-Aggregator Processor
This processor sits before the user context aggregator in the pipeline and handles
engine-specific callbacks for frames that need to be processed before aggregation.
This ensures the engine can update context before the aggregator generates LLM frames.
"""
from typing import Awaitable, Callable, Optional
from loguru import logger
from api.services.pipecat.exceptions import VoicemailDetectedException
from pipecat.frames.frames import (
Frame,
UserStartedSpeakingFrame,
UserStoppedSpeakingFrame,
)
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
class EnginePreAggregatorProcessor(FrameProcessor):
"""
Processor that handles engine callbacks before user context aggregation.
This processor is positioned before the user context aggregator to ensure
the engine can update LLM context before aggregation occurs.
"""
def __init__(
self,
user_started_speaking_callback: Optional[Callable[[], Awaitable[None]]] = None,
user_stopped_speaking_callback: Optional[Callable[[], Awaitable[None]]] = None,
**kwargs,
):
super().__init__(**kwargs)
self._user_started_speaking_callback = user_started_speaking_callback
self._user_stopped_speaking_callback = user_stopped_speaking_callback
async def process_frame(self, frame: Frame, direction: FrameDirection):
await super().process_frame(frame, direction)
# Handle frames that need engine processing before aggregation
if isinstance(frame, UserStartedSpeakingFrame):
await self._handle_user_started_speaking()
elif isinstance(frame, UserStoppedSpeakingFrame):
try:
await self._handle_user_stopped_speaking()
except VoicemailDetectedException:
# We have detected voicemail, lets not
# forward the UserStoppedSpeakingFrame, so that
# we don't issue an llm call from user context
# aggregator
logger.debug("Voicemail detected, not pushing UserStoppedSpeakingFrame")
return
# Always push the frame downstream
await self.push_frame(frame, direction)
async def _handle_user_started_speaking(self):
"""Handle UserStartedSpeakingFrame before aggregation."""
if self._user_started_speaking_callback:
# logger.debug("Engine pre-aggregator: User started speaking")
await self._user_started_speaking_callback()
async def _handle_user_stopped_speaking(self):
"""Handle UserStoppedSpeakingFrame before aggregation."""
if self._user_stopped_speaking_callback:
# logger.debug("Engine pre-aggregator: User stopped speaking")
await self._user_stopped_speaking_callback()

View file

@ -9,7 +9,7 @@ from api.constants import (
from api.services.pipecat.audio_config import AudioConfig
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.audio.audio_buffer_processor import AudioBuffer
from pipecat.processors.audio.audio_synchronizer import AudioSynchronizer
from pipecat.processors.transcript_processor import TranscriptProcessor
@ -39,7 +39,7 @@ def create_pipeline_components(audio_config: AudioConfig, engine: "PipecatEngine
assistant_correct_aggregation_callback=engine.create_aggregation_correction_callback()
)
context = OpenAILLMContext()
context = LLMContext()
return audio_buffer, audio_synchronizer, transcript, context
@ -58,7 +58,6 @@ def build_pipeline(
stt_mute_filter,
pipeline_metrics_aggregator,
user_idle_disconnect,
engine_pre_aggregator_processor=None,
):
"""Build the main pipeline with all components"""
# Register processors with synchronizer for merged audio
@ -69,16 +68,12 @@ def build_pipeline(
processors = [
transport.input(), # Transport user input
audio_buffer.input(), # Record input audio (only processes InputAudioRawFrame)
stt_mute_filter,
stt, # STT can now have audio_passthrough=False
stt_mute_filter, # STTMuteFilters don't let VAD related events pass through if muted
user_idle_disconnect,
transcript.user(),
]
# Insert engine pre-aggregator processor if provided (before user aggregator)
if engine_pre_aggregator_processor:
processors.append(engine_pre_aggregator_processor)
processors.extend(
[
user_context_aggregator,

View file

@ -7,7 +7,6 @@ from pipecat.frames.frames import (
Frame,
HeartbeatFrame,
LLMFullResponseStartFrame,
LLMGeneratedTextFrame,
LLMTextFrame,
StartFrame,
TTSSpeakFrame,
@ -26,7 +25,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
self,
max_call_duration_seconds: int = 300,
max_duration_end_task_callback: Optional[Callable[[], Awaitable[None]]] = None,
llm_generated_text_callback: Optional[Callable[[], Awaitable[None]]] = None,
generation_started_callback: Optional[Callable[[], Awaitable[None]]] = None,
llm_text_frame_callback: Optional[Callable[[str], Awaitable[None]]] = None,
):
@ -34,7 +32,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
self._start_time = None
self._max_call_duration_seconds = max_call_duration_seconds
self._max_duration_end_task_callback = max_duration_end_task_callback
self._llm_generated_text_callback = llm_generated_text_callback
self._generation_started_callback = generation_started_callback
self._llm_text_frame_callback = llm_text_frame_callback
self._end_task_frame_pushed = False
@ -46,8 +43,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
await self._start(frame)
elif isinstance(frame, HeartbeatFrame):
await self._check_call_duration()
elif isinstance(frame, LLMGeneratedTextFrame):
await self._generated_text_frame(frame)
elif isinstance(frame, LLMFullResponseStartFrame):
await self._generation_started()
elif (
@ -74,11 +69,6 @@ class PipelineEngineCallbacksProcessor(FrameProcessor):
"Max call duration exceeded. Skipping EndTaskFrame since already sent"
)
async def _generated_text_frame(self, _: LLMGeneratedTextFrame):
"""Handle LLMGeneratedTextFrame."""
if self._llm_generated_text_callback is not None:
await self._llm_generated_text_callback()
async def _generation_started(self):
if self._generation_started_callback:
await self._generation_started_callback()

View file

@ -7,9 +7,6 @@ from api.db import db_client
from api.db.models import WorkflowModel
from api.enums import WorkflowRunMode
from api.services.pipecat.audio_config import AudioConfig, create_audio_config
from api.services.pipecat.engine_pre_aggregator_processor import (
EnginePreAggregatorProcessor,
)
from api.services.pipecat.event_handlers import (
register_audio_data_handler,
register_task_event_handler,
@ -43,6 +40,9 @@ from api.services.workflow.pipecat_engine import PipecatEngine
from api.services.workflow.workflow import WorkflowGraph
from pipecat.pipeline.runner import PipelineRunner
from pipecat.processors.aggregators.llm_response import LLMAssistantAggregatorParams
from pipecat.processors.aggregators.llm_response_universal import (
LLMContextAggregatorPair,
)
from pipecat.processors.filters.stt_mute_filter import (
STTMuteConfig,
STTMuteFilter,
@ -357,21 +357,14 @@ async def _run_pipeline(
expect_stripped_words=True,
correct_aggregation_callback=engine.create_aggregation_correction_callback(),
)
context_aggregator = llm.create_context_aggregator(
context_aggregator = LLMContextAggregatorPair(
context, assistant_params=assistant_params
)
# Create engine pre-aggregator processor for speaking events
engine_pre_aggregator_processor = EnginePreAggregatorProcessor(
user_started_speaking_callback=engine.create_user_started_speaking_callback(),
user_stopped_speaking_callback=engine.create_user_stopped_speaking_callback(),
)
# Create usage metrics aggregator with engine's callback
pipeline_engine_callback_processor = PipelineEngineCallbacksProcessor(
max_call_duration_seconds=max_call_duration_seconds,
max_duration_end_task_callback=engine.create_max_duration_callback(),
llm_generated_text_callback=engine.create_llm_generated_text_callback(),
generation_started_callback=engine.create_generation_started_callback(),
llm_text_frame_callback=engine.handle_llm_text_frame,
# Note: speaking event callbacks are now handled by pre-aggregator processor
@ -398,11 +391,6 @@ async def _run_pipeline(
user_context_aggregator = context_aggregator.user()
assistant_context_aggregator = context_aggregator.assistant()
@assistant_context_aggregator.event_handler("on_push_aggregation")
async def on_assistant_aggregator_push_context(_aggregator):
logger.debug("Assistant aggregator push context flushing pending transitions")
await engine.flush_pending_transitions(source="context_push")
# Build the pipeline with the STT mute filter and context controller
pipeline = build_pipeline(
transport,
@ -418,7 +406,6 @@ async def _run_pipeline(
stt_mute_filter,
pipeline_metrics_aggregator,
user_idle_disconnect,
engine_pre_aggregator_processor=engine_pre_aggregator_processor,
)
# Create pipeline task with audio configuration

View file

@ -14,14 +14,14 @@ from pipecat.frames.frames import (
CancelFrame,
EndFrame,
FunctionCallResultProperties,
LLMContextFrame,
LLMFullResponseEndFrame,
LLMFullResponseStartFrame,
TTSSpeakFrame,
)
from pipecat.pipeline.task import PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContextFrame
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.services.llm_service import FunctionCallParams
from pipecat.services.openai.llm import OpenAILLMContext
from pipecat.transports.base_transport import BaseTransport
from pipecat.utils.enums import EndTaskReason
@ -63,7 +63,7 @@ class PipecatEngine:
*,
task: Optional[PipelineTask] = None,
llm: Optional["LLMService"] = None,
context: Optional[OpenAILLMContext] = None,
context: Optional[LLMContext] = None,
tts: Optional[Any] = None,
transport: Optional[BaseTransport] = None,
workflow: WorkflowGraph,
@ -82,7 +82,6 @@ class PipecatEngine:
self._workflow_run_id = workflow_run_id
self._initialized = False
self._client_disconnected = False
self._pending_function_calls = 0
self._current_node: Optional[Node] = None
self._gathered_context: dict = {}
self._user_response_timeout_task: Optional[asyncio.Task] = None
@ -102,29 +101,9 @@ class PipecatEngine:
self._voicemail_detector = None
self._voicemail_detection_task: Optional[asyncio.Task] = None
# This transition is generated by the llm as part of tool call. This can
# also be accompanied with some content which can be played using TTS. If the
# bot is interrupted, we would cancel this transition (we do cancel this currently when
# the next generation starts in handle_generation_started callback handler.)
self._pending_generated_transition_after_context_push: Optional[
Callable[[], Awaitable[None]]
] = None
# This is the transtion which is typically programmatic transition, and not goes as
# tool call to LLM. This is not interrupted by the user and is done on context push
self._pending_control_transition_after_context_push: Optional[
Callable[[], Awaitable[None]]
] = None
# Flag to determine if the current llm generation has a text completion
self._defer_context_push: bool = False
# Lazy loaded built-in function schemas
self._builtin_function_schemas: Optional[list[dict]] = None
# Flag to control whether to queue context frame
self._queue_context_frame: bool = True
# Track current LLM reference text for TTS aggregation correction
self._current_llm_reference_text: str = ""
@ -211,23 +190,15 @@ class PipecatEngine:
async def _create_transition_func(self, name: str, transition_to_node: str):
async def transition_func(function_call_params: FunctionCallParams) -> None:
"""Inner function that handles the actual tool invocation."""
"""Inner function that handles the node change tool calls"""
try:
# Track pending function call
self._pending_function_calls += 1
logger.debug(
f"Function call pending: {function_call_params.function_name} (total: {self._pending_function_calls})"
)
# For edge functions, prevent LLM completion until transition (run_llm=False)
# For node functions, allow immediate completion (run_llm=True)
async def on_context_updated() -> None:
"""
Framework will run this function after the function call result has been updated in the context.
pipecat framework will run this function after the function call result has been updated in the context.
This way, when we do set_node from within this function, and go for LLM completion with updated
system prompts, the context is updated with function call result.
"""
self._pending_function_calls -= 1
# Perform variable extraction before transitioning to new node
await self._perform_variable_extraction_if_needed(
self._current_node
@ -241,41 +212,14 @@ class PipecatEngine:
on_context_updated=on_context_updated,
)
async def _invoke_result_callback():
"""
Functions are executed immediately when they come from LLM as part of text completion.
But, if the LLM completion also has some text, we would want to not call the function if the user interrupts the speech.
We would also not want the function to be added to context, so that the LLM can call the function again. Hence, we
defer the function invocation until we receive on_context_updated callback, i.e the bot has finished speaking
the text that was generated.
"""
await function_call_params.result_callback(
result, properties=properties
)
if self._defer_context_push:
"""
We set the flag to _defer_context_push when we receive text in the current generation from LLM.
This is set in the handle_llm_generated_text callback handler.
"""
logger.debug(
"Deferring transition function result until context push"
)
# Only one deferred transition should exist at any time.
# Overwrite if one is somehow already set (unexpected).
self._pending_generated_transition_after_context_push = (
_invoke_result_callback
)
else:
"""
If there was no text in the current generation, and we only had function call,
lets invoke the result callback, so that framework can call on_context_updated and
we can do switch node.
"""
await _invoke_result_callback()
# Call results callback from the pipecat framework
# so that a new llm generation can be triggred if
# required
await function_call_params.result_callback(
result, properties=properties
)
except Exception as e:
logger.error(f"Error in transition function {name}: {str(e)}")
self._pending_function_calls = 0
error_result = {"status": "error", "error": str(e)}
await function_call_params.result_callback(error_result)
@ -362,27 +306,6 @@ class PipecatEngine:
]
)
async def _setup_static_start_node_transition(self, node: Node) -> None:
"""Set up the deferred transition for static start nodes."""
if not node.out_edges:
return
next_node_id = node.out_edges[0].target
if not node.wait_for_user_response:
# Normal static start node - transition immediately after context push
async def _deferred_static_transition():
try:
await self.set_node(next_node_id)
except Exception as exc:
logger.error(
f"Error executing deferred static node transition to {next_node_id}: {exc}"
)
self._pending_control_transition_after_context_push = (
_deferred_static_transition
)
async def _perform_variable_extraction_if_needed(
self, previous_node: Optional[Node]
) -> None:
@ -441,17 +364,7 @@ class PipecatEngine:
functions,
) = await self._compose_system_message_functions_for_node(node)
await self._update_llm_context(system_message, functions)
# Queue context frame if needed
if self._queue_context_frame:
await self.task.queue_frame(OpenAILLMContextFrame(self.context))
else:
logger.debug(
f"Not queueing context frame for node: {node.name} as _queue_context_frame is False"
)
# Reset _queue_context_frame as default behavior
self._queue_context_frame = True
await self.task.queue_frame(LLMContextFrame(self.context))
async def set_node(self, node_id: str):
"""
@ -525,12 +438,7 @@ class PipecatEngine:
await asyncio.sleep(delay_duration)
if node.is_static:
# Queue TTS for static start node
formatted_prompt = self._format_prompt(node.prompt)
await self._queue_tts_response(formatted_prompt)
# Set up deferred transition for static start nodes
await self._setup_static_start_node_transition(node)
raise ValueError("Static nodes are not supported!")
else:
# Start generation for non-static start node
await self._setup_llm_context_and_start_generation(node)
@ -538,66 +446,24 @@ class PipecatEngine:
async def _handle_end_node(self, node: Node) -> None:
"""Handle end node execution."""
if node.is_static:
# Queue TTS for static end node
formatted_prompt = self._format_prompt(node.prompt)
await self._queue_tts_response(formatted_prompt)
raise ValueError("Static nodes are not supported!")
else:
# Start generation for non-static end node
await self._setup_llm_context_and_start_generation(node)
# If this end node has extraction enabled, perform extraction immediately
if node.extraction_enabled and node.extraction_variables:
await self._perform_variable_extraction_if_needed(node)
# TODO: Extract disposition code from extracted variables
# Defer send_end_task_frame using _pending_control_transition_after_context_push
# Decide the end-task reason dynamically depending on call_disposition.
async def _deferred_end_task():
# call_disposition is the disposition which is generated from
# llm call based on the conversation so far.
# TODO: Make this more generic based on configuration or llm prompting
disposition = self._gathered_context.get("call_disposition")
if disposition == "XFER":
reason = EndTaskReason.USER_QUALIFIED.value
else:
reason = EndTaskReason.USER_DISQUALIFIED.value
await self.send_end_task_frame(reason)
self._pending_control_transition_after_context_push = _deferred_end_task
await self.send_end_task_frame(EndTaskReason.USER_QUALIFIED.value)
async def _handle_agent_node(self, node: Node) -> None:
"""Handle agent node execution."""
if node.is_static:
# Queue TTS for static agent node
formatted_prompt = self._format_prompt(node.prompt)
await self._queue_tts_response(formatted_prompt)
# Set up deferred transition for static agent nodes
await self._setup_agent_node_transition(node)
raise ValueError("Static nodes are not supported!")
else:
# Set context and functions for non-static agent node
await self._setup_llm_context_and_start_generation(node)
async def _setup_agent_node_transition(self, node: Node) -> None:
"""Set up the deferred transition for static agent nodes."""
if not node.out_edges:
return
next_node_id = node.out_edges[0].target
async def _deferred_static_transition():
try:
await self.set_node(next_node_id)
except Exception as exc:
logger.error(
f"Error executing deferred static node transition to {next_node_id}: {exc}"
)
self._pending_control_transition_after_context_push = (
_deferred_static_transition
)
async def send_end_task_frame(
self,
reason: str,
@ -640,7 +506,7 @@ class PipecatEngine:
# Store the mapped disconnect reason
self._gathered_context["call_disposition"] = mapped_disposition
# TODO: Generalise this, currently tailored to Kapil's use case
# TODO: Generalise this
self._gathered_context["address"] = ", ".join(
[
self._call_context_vars.get("address1", ""),
@ -759,55 +625,6 @@ class PipecatEngine:
return system_message, functions
# ------------------------------------------------------------------
# Pending transition handling
# ------------------------------------------------------------------
async def flush_pending_transitions(self, *, source: str = "context_push"):
"""Execute and clear any pending transitions.
Args:
source: Indicates the trigger that caused this flush:
- "context_push": the assistant context aggregator completed a push.
"""
if source != "context_push":
raise ValueError("Invalid flush source expected 'context_push'")
len_pending_functions = 0
if self._pending_generated_transition_after_context_push is not None:
len_pending_functions += 1
if self._pending_control_transition_after_context_push is not None:
len_pending_functions += 1
# Nothing to do
if len_pending_functions == 0:
return
logger.debug(
f"Flushing {len_pending_functions} pending transition(s) after {source.replace('_', ' ')}"
)
# Generated transition
if self._pending_generated_transition_after_context_push is not None:
pending_cb = self._pending_generated_transition_after_context_push
self._pending_generated_transition_after_context_push = None
try:
await pending_cb()
except Exception as exc: # pragma: no cover
logger.error(f"Error executing deferred transition: {exc}")
# Control transition (context push)
if self._pending_control_transition_after_context_push is not None:
logger.debug("Executing control transition after context push")
static_cb = self._pending_control_transition_after_context_push
self._pending_control_transition_after_context_push = None
try:
await static_cb()
except Exception as exc: # pragma: no cover
logger.error(f"Error executing deferred static node transition: {exc}")
def create_should_mute_callback(self) -> Callable[[STTMuteFilter], Awaitable[bool]]:
"""
This callback is called by STTMuteFilter to determine if the STT should be muted.
@ -828,15 +645,6 @@ class PipecatEngine:
"""
return engine_callbacks.create_max_duration_callback(self)
def create_llm_generated_text_callback(self):
"""
This callback is called when some text is generated by the LLM.
We use this to defer the result_callback of the node transition functions if
there is set_node called along with some text generated. This way, we will
have the context sent in the next generation from new node.
"""
return engine_callbacks.create_llm_generated_text_callback(self)
def create_generation_started_callback(self):
"""
This callback is called when a new generation starts.
@ -844,26 +652,12 @@ class PipecatEngine:
"""
return engine_callbacks.create_generation_started_callback(self)
def create_user_stopped_speaking_callback(self):
"""
This callback is called when the user stops speaking.
We use this to handle transitions when wait_for_user_response is enabled.
"""
return engine_callbacks.create_user_stopped_speaking_callback(self)
def create_user_started_speaking_callback(self):
"""
This callback is called when the user starts speaking.
We use this to handle wait_for_user_greeting functionality.
"""
return engine_callbacks.create_user_started_speaking_callback(self)
def create_aggregation_correction_callback(self) -> Callable[[str], str]:
"""Create a callback that corrects corrupted aggregation using reference text."""
return engine_callbacks.create_aggregation_correction_callback(self)
def set_context(self, context: OpenAILLMContext) -> None:
"""Set the OpenAI LLM context.
def set_context(self, context: LLMContext) -> None:
"""Set the LLM context.
This allows setting the context after the engine has been created,
which is useful when the context needs to be created after the engine.

View file

@ -14,6 +14,7 @@ import re
from typing import TYPE_CHECKING, Awaitable, Callable
from loguru import logger
from pipecat.frames.frames import (
LLMFullResponseEndFrame,
LLMFullResponseStartFrame,
@ -23,9 +24,8 @@ from pipecat.processors.filters.stt_mute_filter import STTMuteFilter
from pipecat.utils.enums import EndTaskReason
if TYPE_CHECKING:
from pipecat.processors.user_idle_processor import UserIdleProcessor
from api.services.workflow.pipecat_engine import PipecatEngine
from pipecat.processors.user_idle_processor import UserIdleProcessor
# ---------------------------------------------------------------------------
@ -114,23 +114,6 @@ def create_max_duration_callback(engine: "PipecatEngine"):
return handle_max_duration
# ---------------------------------------------------------------------------
# LLM-generated-text handling
# ---------------------------------------------------------------------------
def create_llm_generated_text_callback(engine: "PipecatEngine"):
"""Return a callback invoked when the LLM emits text (not only tool calls)."""
async def handle_llm_generated_text(): # noqa: D401
logger.debug(
"Generation has text content in current response - deferring context push from set_node"
)
engine._defer_context_push = True
return handle_llm_generated_text
# ---------------------------------------------------------------------------
# Generation-started handling
# ---------------------------------------------------------------------------
@ -140,96 +123,13 @@ def create_generation_started_callback(engine: "PipecatEngine"):
"""Return a callback that resets flags at the start of each LLM generation."""
async def handle_generation_started(): # noqa: D401
logger.debug("LLM generation started - resetting defer flags and tool counters")
engine._defer_context_push = False
engine._pending_function_calls = 0
engine._pending_generated_transition_after_context_push = None
logger.debug("LLM generation started in callback processor")
# Clear reference text from previous generation
engine._current_llm_reference_text = ""
return handle_generation_started
# ---------------------------------------------------------------------------
# User-stopped-speaking handling
# ---------------------------------------------------------------------------
def create_user_stopped_speaking_callback(engine: "PipecatEngine"):
"""Return a callback that handles when the user stops speaking.
According to simplified flow:
- For start nodes with wait_for_user_response=True:
- Cancel timeout task if still active
- Transition to next node with _queue_context_frame=False
"""
async def handle_user_stopped_speaking():
# Only handle if current node is a start node with wait_for_user_response
if (
engine._current_node
and engine._current_node.is_start
and engine._current_node.wait_for_user_response
and engine._current_node.out_edges
):
# Cancel timeout task if it's still active
if (
engine._user_response_timeout_task
and not engine._user_response_timeout_task.done()
):
logger.debug("Cancelling user response timeout - user responded")
engine._user_response_timeout_task.cancel()
engine._user_response_timeout_task = None
# Transition to next node
next_node_id = engine._current_node.out_edges[0].target
logger.debug(
f"User stopped speaking after wait_for_user_response - transitioning to: {next_node_id}"
)
# Set flag to not queue context frame since
# it will be pushed by user context aggregator
# we are just setting the context with next node's
# functions and prompts
engine._queue_context_frame = False
# Transition to next node
await engine.set_node(next_node_id)
return handle_user_stopped_speaking
# ---------------------------------------------------------------------------
# User-started-speaking handling
# ---------------------------------------------------------------------------
def create_user_started_speaking_callback(engine: "PipecatEngine"):
"""Return a callback that handles when the user starts speaking.
According to simplified flow:
- For start nodes with wait_for_user_response=True:
- Cancel the timeout timer if it exists (but don't set to None)
"""
async def handle_user_started_speaking():
# Only handle if current node is a start node with wait_for_user_response
if (
engine._current_node
and engine._current_node.is_start
and engine._current_node.wait_for_user_response
and engine._user_response_timeout_task
and not engine._user_response_timeout_task.done()
):
logger.debug(
"User started speaking during wait_for_user_response - cancelling timeout timer"
)
engine._user_response_timeout_task.cancel()
# Don't set to None here - let user_stopped_speaking handle the transition
return handle_user_started_speaking
def create_aggregation_correction_callback(engine: "PipecatEngine"):
"""Create a callback that uses engine's reference text to correct corrupted aggregation."""

View file

@ -2,16 +2,10 @@ from __future__ import annotations
from typing import Any, Dict, List
from google.genai.types import (
Content,
Part,
)
from api.utils.template_renderer import render_template
from pipecat.adapters.schemas.function_schema import FunctionSchema
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.services.google.llm import GoogleLLMContext
from pipecat.services.openai.llm import OpenAILLMContext
from api.utils.template_renderer import render_template
from pipecat.processors.aggregators.llm_context import LLMContext
__all__ = [
"get_function_schema",
@ -44,7 +38,7 @@ def get_function_schema(
def update_llm_context(
context: OpenAILLMContext,
context: LLMContext,
system_message: Dict[str, Any],
functions: List[FunctionSchema],
) -> None:
@ -59,21 +53,6 @@ def update_llm_context(
# associated with the current LLM service can convert them to the correct
# provider-specific representation when required.
tools_schema = ToolsSchema(standard_tools=functions)
if isinstance(context, GoogleLLMContext):
context.system_message = system_message["content"]
if functions:
# Lets only call set_tools if we have functions, else Gemini will
# throw an exception
context.set_tools(tools_schema)
if context.messages[-1].role != "user":
# Google expects the last message should end with user message
context.add_message(Content(role="user", parts=[Part(text="...")]))
return
# In case of OpenAILLMContext, replace the system message with incoming system message
previous_interactions = context.messages
# Filter out old system messages but keep user/assistant/function content.

View file

@ -7,11 +7,11 @@ from typing import TYPE_CHECKING, Any, List
from loguru import logger
from openai import AsyncOpenAI
from opentelemetry import trace
from pipecat.services.openai.llm import OpenAILLMContext
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
from api.services.pipecat.tracing_config import is_tracing_enabled
from api.services.workflow.dto import ExtractionVariableDTO
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
if TYPE_CHECKING:
from api.services.workflow.pipecat_engine import PipecatEngine
@ -139,7 +139,7 @@ class VariableExtractionManager:
f"{conversation_history}"
)
extraction_context = OpenAILLMContext()
extraction_context = LLMContext()
extraction_messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
@ -171,7 +171,7 @@ class VariableExtractionManager:
service_name="OpenAILLMService",
model=self._model,
operation_name="variable_extraction",
messages=json.dumps(extraction_messages),
messages=extraction_messages,
output=llm_response,
stream=False,
parameters={"temperature": 0.0, "response_format": "json_object"},

View file

@ -44,8 +44,6 @@ class Node:
self.extraction_prompt = data.extraction_prompt
self.extraction_variables = data.extraction_variables
self.add_global_prompt = data.add_global_prompt
self.wait_for_user_response = data.wait_for_user_response
self.wait_for_user_response_timeout = data.wait_for_user_response_timeout
self.detect_voicemail = data.detect_voicemail
self.delayed_start = data.delayed_start
self.delayed_start_duration = data.delayed_start_duration