mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-07-01 08:59:46 +02:00
feat: add hybrid text + recording functionality in agents (#191)
* feat: add recording feature in agents * chore: pin pipecat version * feat: show usage in UI * chore: update pipecat
This commit is contained in:
parent
f075bcb623
commit
494c60d774
43 changed files with 2865 additions and 397 deletions
|
|
@ -5,6 +5,7 @@ from api.services.workflow.disposition_mapper import (
|
|||
get_organization_id_from_workflow_run,
|
||||
)
|
||||
from api.services.workflow.workflow import Node, WorkflowGraph
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
|
|
@ -16,6 +17,7 @@ from pipecat.frames.frames import (
|
|||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.services.llm_service import FunctionCallParams
|
||||
from pipecat.services.settings import LLMSettings
|
||||
from pipecat.utils.enums import EndTaskReason
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
@ -31,18 +33,19 @@ import asyncio
|
|||
from loguru import logger
|
||||
|
||||
from api.services.workflow import pipecat_engine_callbacks as engine_callbacks
|
||||
from api.services.workflow.pipecat_engine_custom_tools import CustomToolManager
|
||||
from api.services.workflow.pipecat_engine_utils import (
|
||||
from api.services.workflow.pipecat_engine_context_composer import (
|
||||
compose_functions_for_node,
|
||||
compose_system_prompt_for_node,
|
||||
)
|
||||
from api.services.workflow.pipecat_engine_custom_tools import (
|
||||
CustomToolManager,
|
||||
get_function_schema,
|
||||
render_template,
|
||||
update_llm_context,
|
||||
)
|
||||
from api.services.workflow.pipecat_engine_variable_extractor import (
|
||||
VariableExtractionManager,
|
||||
)
|
||||
from api.services.workflow.tools.calculator import get_calculator_tools, safe_calculator
|
||||
from api.services.workflow.tools.knowledge_base import (
|
||||
get_knowledge_base_tool,
|
||||
retrieve_from_knowledge_base,
|
||||
)
|
||||
from api.services.workflow.tools.timezone import (
|
||||
|
|
@ -50,6 +53,7 @@ from api.services.workflow.tools.timezone import (
|
|||
get_current_time,
|
||||
get_time_tools,
|
||||
)
|
||||
from api.utils.template_renderer import render_template
|
||||
|
||||
|
||||
class PipecatEngine:
|
||||
|
|
@ -68,6 +72,7 @@ class PipecatEngine:
|
|||
embeddings_api_key: Optional[str] = None,
|
||||
embeddings_model: Optional[str] = None,
|
||||
embeddings_base_url: Optional[str] = None,
|
||||
has_recordings: bool = False,
|
||||
):
|
||||
self.task = task
|
||||
self.llm = llm
|
||||
|
|
@ -113,6 +118,10 @@ class PipecatEngine:
|
|||
# Audio configuration (set via set_audio_config from _run_pipeline)
|
||||
self._audio_config = None
|
||||
|
||||
# True when the workflow has active recordings; enables recording
|
||||
# response mode instructions on all nodes for in-context learning.
|
||||
self._has_recordings: bool = has_recordings
|
||||
|
||||
async def _get_organization_id(self) -> Optional[int]:
|
||||
"""Get and cache the organization ID from workflow run."""
|
||||
if self._custom_tool_manager:
|
||||
|
|
@ -194,15 +203,14 @@ class PipecatEngine:
|
|||
logger.error(f"Error initializing {self.__class__.__name__}: {e}")
|
||||
raise
|
||||
|
||||
def _get_function_schema(self, function_name: str, description: str):
|
||||
"""Thin wrapper around utils.get_function_schema for backwards compatibility."""
|
||||
async def _update_llm_context(self, system_prompt: str, functions: list[dict]):
|
||||
"""Update LLM settings with the composed system prompt and tool list."""
|
||||
|
||||
return get_function_schema(function_name, description)
|
||||
await self.llm._update_settings(LLMSettings(system_instruction=system_prompt))
|
||||
|
||||
async def _update_llm_context(self, system_message: dict, functions: list[dict]):
|
||||
"""Delegate context update to the shared workflow.utils implementation."""
|
||||
|
||||
update_llm_context(self.context, system_message, functions)
|
||||
if functions:
|
||||
tools_schema = ToolsSchema(standard_tools=functions)
|
||||
self.context.set_tools(tools_schema)
|
||||
|
||||
def _format_prompt(self, prompt: str) -> str:
|
||||
"""Delegate prompt formatting to the shared workflow.utils implementation."""
|
||||
|
|
@ -473,12 +481,19 @@ class PipecatEngine:
|
|||
if node.document_uuids:
|
||||
await self._register_knowledge_base_function(node.document_uuids)
|
||||
|
||||
# Set up system message and functions
|
||||
(
|
||||
system_message,
|
||||
functions,
|
||||
) = await self._compose_system_message_functions_for_node(node)
|
||||
await self._update_llm_context(system_message, functions)
|
||||
# Compose prompt and functions via the context composer module
|
||||
system_prompt = compose_system_prompt_for_node(
|
||||
node=node,
|
||||
workflow=self.workflow,
|
||||
format_prompt=self._format_prompt,
|
||||
has_recordings=self._has_recordings,
|
||||
)
|
||||
functions = await compose_functions_for_node(
|
||||
node=node,
|
||||
builtin_function_schemas=self.builtin_function_schemas,
|
||||
custom_tool_manager=self._custom_tool_manager,
|
||||
)
|
||||
await self._update_llm_context(system_prompt, functions)
|
||||
|
||||
async def set_node(self, node_id: str):
|
||||
"""
|
||||
|
|
@ -610,62 +625,6 @@ class PipecatEngine:
|
|||
)
|
||||
await self.task.queue_frame(frame_to_push)
|
||||
|
||||
async def _compose_system_message_functions_for_node(
|
||||
self, node: "Node"
|
||||
) -> tuple[list[dict], list[dict]]:
|
||||
"""Generate the system messages and function schemas for the given node.
|
||||
|
||||
This performs the same formatting logic used when entering a node but
|
||||
does **not** register the functions with the LLM; callers are
|
||||
responsible for that.
|
||||
"""
|
||||
|
||||
global_prompt = ""
|
||||
if self.workflow.global_node_id and node.add_global_prompt:
|
||||
global_node = self.workflow.nodes[self.workflow.global_node_id]
|
||||
global_prompt = self._format_prompt(global_node.prompt)
|
||||
|
||||
functions: list[dict] = []
|
||||
|
||||
# Add built-in function schemas (calculator and timezone tools)
|
||||
functions.extend(self.builtin_function_schemas)
|
||||
|
||||
# Add knowledge base retrieval tool if node has documents
|
||||
if node.document_uuids:
|
||||
kb_tool_def = get_knowledge_base_tool(node.document_uuids)
|
||||
kb_schema = get_function_schema(
|
||||
kb_tool_def["function"]["name"],
|
||||
kb_tool_def["function"]["description"],
|
||||
properties=kb_tool_def["function"]["parameters"].get("properties", {}),
|
||||
required=kb_tool_def["function"]["parameters"].get("required", []),
|
||||
)
|
||||
functions.append(kb_schema)
|
||||
|
||||
# Add custom tools from node.tool_uuids
|
||||
if node.tool_uuids and self._custom_tool_manager:
|
||||
custom_tool_schemas = await self._custom_tool_manager.get_tool_schemas(
|
||||
node.tool_uuids
|
||||
)
|
||||
functions.extend(custom_tool_schemas)
|
||||
|
||||
# Transition functions (schema only; registration handled elsewhere)
|
||||
for outgoing_edge in node.out_edges:
|
||||
function_schema = self._get_function_schema(
|
||||
outgoing_edge.get_function_name(), outgoing_edge.condition
|
||||
)
|
||||
functions.append(function_schema)
|
||||
|
||||
formatted_node_prompt = self._format_prompt(node.prompt)
|
||||
|
||||
system_message = {
|
||||
"role": "system",
|
||||
"content": "\n\n".join(
|
||||
p for p in (global_prompt, formatted_node_prompt) if p
|
||||
),
|
||||
}
|
||||
|
||||
return system_message, functions
|
||||
|
||||
async def should_mute_user(self, frame: "Frame") -> bool:
|
||||
"""
|
||||
Callback for CallbackUserMuteStrategy to determine if the user should be muted.
|
||||
|
|
|
|||
138
api/services/workflow/pipecat_engine_context_composer.py
Normal file
138
api/services/workflow/pipecat_engine_context_composer.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""System prompt and function schema composition for PipecatEngine nodes.
|
||||
|
||||
Extracts prompt and function composition logic from PipecatEngine into
|
||||
reusable functions. Defines recording response mode markers and instructions.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from api.services.workflow.pipecat_engine_custom_tools import CustomToolManager
|
||||
from api.services.workflow.workflow import Node, WorkflowGraph
|
||||
|
||||
from api.services.workflow.pipecat_engine_custom_tools import get_function_schema
|
||||
from api.services.workflow.tools.knowledge_base import get_knowledge_base_tool
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recording response mode markers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RECORDING_MARKER = "●" # Play pre-recorded audio
|
||||
TTS_MARKER = "▸" # Generate dynamic TTS text
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Recording response mode system prompt instructions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
RECORDING_RESPONSE_MODE_INSTRUCTIONS = """\
|
||||
RESPONSE MODE INSTRUCTIONS - MANDATORY FORMAT:
|
||||
Every response you generate MUST begin with a response mode indicator.
|
||||
You have two modes for responding:
|
||||
|
||||
1. DYNAMIC SPEECH (▸): Generate text that will be converted to speech by TTS.
|
||||
Format: `▸` followed by a space and your full spoken response.
|
||||
Example: ▸ Hello! How can I help you today?
|
||||
|
||||
2. PRE-RECORDED AUDIO (●): Play a pre-recorded audio message.
|
||||
Format: `●` followed by a space and ONLY the recording_id. Nothing else.
|
||||
Example: ● rec_greeting_01
|
||||
|
||||
RULES:
|
||||
- Your response MUST start with either `▸` or `●` as the very first character.
|
||||
- For `▸` (dynamic speech): Follow with a space and your full response text.
|
||||
- For `●` (pre-recorded audio): Follow with a space and ONLY the recording_id. No other text.
|
||||
- Use `●` when a pre-recorded message matches the situation well.
|
||||
- Use `▸` when you need to generate a dynamic, contextual response.
|
||||
- NEVER mix modes in a single response. Choose one."""
|
||||
|
||||
|
||||
def compose_system_prompt_for_node(
|
||||
*,
|
||||
node: "Node",
|
||||
workflow: "WorkflowGraph",
|
||||
format_prompt: Callable[[str], str],
|
||||
has_recordings: bool,
|
||||
) -> str:
|
||||
"""Compose the full system prompt text for a workflow node.
|
||||
|
||||
Combines the global prompt, node-specific prompt, and (when recordings
|
||||
are enabled anywhere in the workflow) the recording response mode
|
||||
instructions into a single string.
|
||||
|
||||
Args:
|
||||
node: The workflow node to compose the prompt for.
|
||||
workflow: The full workflow graph (needed for global node prompt).
|
||||
format_prompt: Callable to render template variables in prompts.
|
||||
has_recordings: Whether any node in the workflow uses recordings.
|
||||
|
||||
Returns:
|
||||
The composed system prompt text.
|
||||
"""
|
||||
global_prompt = ""
|
||||
if workflow.global_node_id and node.add_global_prompt:
|
||||
global_node = workflow.nodes[workflow.global_node_id]
|
||||
global_prompt = format_prompt(global_node.prompt)
|
||||
|
||||
formatted_node_prompt = format_prompt(node.prompt)
|
||||
|
||||
parts = [p for p in (global_prompt, formatted_node_prompt) if p]
|
||||
|
||||
if has_recordings:
|
||||
parts.append(RECORDING_RESPONSE_MODE_INSTRUCTIONS)
|
||||
# TODO: Append per-node available recordings list here once
|
||||
# Node.recording_ids is populated. The list should include
|
||||
# recording_id and a short description so the LLM can choose.
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
async def compose_functions_for_node(
|
||||
*,
|
||||
node: "Node",
|
||||
builtin_function_schemas: list[dict],
|
||||
custom_tool_manager: Optional["CustomToolManager"],
|
||||
) -> list[dict]:
|
||||
"""Compose the function/tool schemas for a workflow node.
|
||||
|
||||
Gathers built-in tools, knowledge-base tools, custom tools,
|
||||
and transition function schemas into a single list.
|
||||
|
||||
Args:
|
||||
node: The workflow node to compose functions for.
|
||||
builtin_function_schemas: Pre-computed schemas for built-in tools.
|
||||
custom_tool_manager: Manager for user-defined custom tools (may be None).
|
||||
|
||||
Returns:
|
||||
A list of function schemas to register with the LLM.
|
||||
"""
|
||||
functions: list[dict] = []
|
||||
|
||||
# Built-in tools (calculator, timezone)
|
||||
functions.extend(builtin_function_schemas)
|
||||
|
||||
# Knowledge base retrieval tool
|
||||
if node.document_uuids:
|
||||
kb_tool_def = get_knowledge_base_tool(node.document_uuids)
|
||||
kb_schema = get_function_schema(
|
||||
kb_tool_def["function"]["name"],
|
||||
kb_tool_def["function"]["description"],
|
||||
properties=kb_tool_def["function"]["parameters"].get("properties", {}),
|
||||
required=kb_tool_def["function"]["parameters"].get("required", []),
|
||||
)
|
||||
functions.append(kb_schema)
|
||||
|
||||
# Custom tools
|
||||
if node.tool_uuids and custom_tool_manager:
|
||||
custom_tool_schemas = await custom_tool_manager.get_tool_schemas(
|
||||
node.tool_uuids
|
||||
)
|
||||
functions.extend(custom_tool_schemas)
|
||||
|
||||
# Transition function schemas
|
||||
for outgoing_edge in node.out_edges:
|
||||
function_schema = get_function_schema(
|
||||
outgoing_edge.get_function_name(), outgoing_edge.condition
|
||||
)
|
||||
functions.append(function_schema)
|
||||
|
||||
return functions
|
||||
|
|
@ -10,7 +10,7 @@ import asyncio
|
|||
import re
|
||||
import time
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
|
@ -23,7 +23,6 @@ from api.services.telephony.transfer_event_protocol import TransferContext
|
|||
from api.services.workflow.disposition_mapper import (
|
||||
get_organization_id_from_workflow_run,
|
||||
)
|
||||
from api.services.workflow.pipecat_engine_utils import get_function_schema
|
||||
from api.services.workflow.tools.custom_tool import (
|
||||
execute_http_tool,
|
||||
tool_to_function_schema,
|
||||
|
|
@ -42,6 +41,29 @@ if TYPE_CHECKING:
|
|||
from api.services.workflow.pipecat_engine import PipecatEngine
|
||||
|
||||
|
||||
def get_function_schema(
|
||||
function_name: str,
|
||||
description: str,
|
||||
*,
|
||||
properties: Dict[str, Any] | None = None,
|
||||
required: List[str] | None = None,
|
||||
) -> FunctionSchema:
|
||||
"""Create a FunctionSchema definition that can later be transformed into
|
||||
the provider-specific format (OpenAI, Gemini, etc.).
|
||||
|
||||
The helper keeps the public signature backward-compatible – callers that
|
||||
only pass ``function_name`` and ``description`` continue to work and will
|
||||
define a parameter-less function.
|
||||
"""
|
||||
|
||||
return FunctionSchema(
|
||||
name=function_name,
|
||||
description=description,
|
||||
properties=properties or {},
|
||||
required=required or [],
|
||||
)
|
||||
|
||||
|
||||
class CustomToolManager:
|
||||
"""Manager for custom tool registration and execution.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,68 +0,0 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from api.utils.template_renderer import render_template
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
|
||||
__all__ = [
|
||||
"get_function_schema",
|
||||
"update_llm_context",
|
||||
"render_template",
|
||||
]
|
||||
|
||||
|
||||
def get_function_schema(
|
||||
function_name: str,
|
||||
description: str,
|
||||
*,
|
||||
properties: Dict[str, Any] | None = None,
|
||||
required: List[str] | None = None,
|
||||
) -> FunctionSchema:
|
||||
"""Create a FunctionSchema definition that can later be transformed into
|
||||
the provider-specific format (OpenAI, Gemini, etc.).
|
||||
|
||||
The helper keeps the public signature backward-compatible – callers that
|
||||
only pass ``function_name`` and ``description`` continue to work and will
|
||||
define a parameter-less function.
|
||||
"""
|
||||
|
||||
return FunctionSchema(
|
||||
name=function_name,
|
||||
description=description,
|
||||
properties=properties or {},
|
||||
required=required or [],
|
||||
)
|
||||
|
||||
|
||||
def update_llm_context(
|
||||
context: LLMContext,
|
||||
system_message: Dict[str, Any],
|
||||
functions: List[FunctionSchema],
|
||||
) -> None:
|
||||
"""Update *context* with an up-to-date system message and tool list.
|
||||
|
||||
This helper removes any previous system messages before inserting the new
|
||||
*system_message* at the top of the conversation history and then instructs
|
||||
the LLM which *functions* (a.k.a. tools) are currently available.
|
||||
"""
|
||||
|
||||
# Wrap the provided function schemas in a ToolsSchema so that the adapter
|
||||
# associated with the current LLM service can convert them to the correct
|
||||
# provider-specific representation when required.
|
||||
tools_schema = ToolsSchema(standard_tools=functions)
|
||||
previous_interactions = context.messages
|
||||
|
||||
# Replace the first message if it's a system message, otherwise prepend.
|
||||
# Keep any system messages that appear in the middle of the conversation.
|
||||
if previous_interactions and previous_interactions[0]["role"] == "system":
|
||||
messages = [system_message] + previous_interactions[1:]
|
||||
else:
|
||||
messages = [system_message] + previous_interactions
|
||||
|
||||
context.set_messages(messages)
|
||||
|
||||
if functions:
|
||||
context.set_tools(tools_schema)
|
||||
Loading…
Add table
Add a link
Reference in a new issue