feat: add gemini realtime and speaches integration

- Add gemini realtime support
- Add speaches support for locally hosted LLMs
This commit is contained in:
Abhishek Kumar 2026-03-31 17:39:47 +05:30
parent 2eaaabd936
commit ee2028eb2d
19 changed files with 531 additions and 185 deletions

View file

@ -210,12 +210,17 @@ class PipecatEngine:
async def _update_llm_context(self, system_prompt: str, functions: list[dict]):
"""Update LLM settings with the composed system prompt and tool list."""
await self.llm._update_settings(LLMSettings(system_instruction=system_prompt))
if functions:
tools_schema = ToolsSchema(standard_tools=functions)
self.context.set_tools(tools_schema)
await self.llm._update_settings(LLMSettings(system_instruction=system_prompt))
# For Gemini Live, set context on the LLM before _update_settings so that
# _connect (triggered by reconnect) can read tools from it.
if hasattr(self.llm, "_context") and not self.llm._context and self.context:
self.llm._context = self.context
def _format_prompt(self, prompt: str) -> str:
"""Delegate prompt formatting to the shared workflow.utils implementation."""

View file

@ -215,13 +215,17 @@ class VariableExtractionManager:
with tracer.start_as_current_span(
"llm-variable-extraction", context=parent_ctx
) as span:
tracing_messages = [
{"role": "system", "content": system_prompt},
*extraction_messages,
]
add_llm_span_attributes(
span,
service_name=self._engine.llm.__class__.__name__,
model=model_name,
operation_name="llm-variable-extraction",
messages=extraction_messages,
output=llm_response,
messages=tracing_messages,
output=json.dumps({"content": llm_response}),
stream=False,
parameters={},
)

View file

@ -59,7 +59,14 @@ async def _generate_conversation_summary(
)
span_name = f"conversation-summary-before-{node_name}"
add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
add_qa_span_to_trace(
parent_ctx,
model,
messages,
summary,
span_name,
CONVERSATION_SUMMARY_SYSTEM_PROMPT,
)
return summary
except Exception as e:
@ -189,7 +196,9 @@ async def run_per_node_qa_analysis(
# Trace
span_name = f"qa-node-{node_name}"
add_qa_span_to_trace(parent_ctx, model, messages, raw_response, span_name)
add_qa_span_to_trace(
parent_ctx, model, messages, raw_response, span_name, system_content
)
# Parse response
node_result: dict[str, Any] = {
@ -299,7 +308,9 @@ async def _run_whole_call_qa_analysis(
# Langfuse tracing
parent_ctx = setup_langfuse_parent_context(workflow_run)
add_qa_span_to_trace(parent_ctx, model, messages, raw_response, "qa-analysis")
add_qa_span_to_trace(
parent_ctx, model, messages, raw_response, "qa-analysis", system_content
)
return {
"node_results": {"whole_call": node_result},

View file

@ -1,5 +1,7 @@
"""LLM configuration resolution and token usage accumulation."""
import random
from api.db import db_client
from api.db.models import WorkflowRunModel
@ -57,6 +59,8 @@ async def resolve_user_llm_config(
provider = llm_config.get("provider", "openai")
api_key = llm_config.get("api_key", "")
if isinstance(api_key, list):
api_key = random.choice(api_key)
model = llm_config.get("model", "gpt-4.1")
kwargs = {}

View file

@ -166,7 +166,9 @@ async def ensure_node_summaries(
continue
# Create a Langfuse trace for this summary generation
trace_url = create_node_summary_trace(model, messages, summary_text, node_name)
trace_url = create_node_summary_trace(
model, messages, summary_text, node_name, NODE_SUMMARY_SYSTEM_PROMPT
)
entry: dict[str, Any] = {"summary": summary_text}
if trace_url:

View file

@ -1,5 +1,6 @@
"""Langfuse / OpenTelemetry tracing helpers for QA analysis."""
import json
import re
from loguru import logger
@ -70,6 +71,7 @@ def add_qa_span_to_trace(
messages: list[dict],
output: str,
span_name: str,
system_prompt: str = "",
) -> None:
"""Create a child span under the conversation trace."""
if parent_ctx is None:
@ -84,13 +86,21 @@ def add_qa_span_to_trace(
span_name,
context=parent_ctx,
) as span:
tracing_messages = (
[
{"role": "system", "content": system_prompt},
*messages,
]
if system_prompt
else messages
)
add_llm_span_attributes(
span,
service_name="OpenAILLMService",
model=model,
operation_name=span_name,
messages=messages,
output=output,
messages=tracing_messages,
output=json.dumps({"content": output}),
stream=False,
parameters={"temperature": 0},
)
@ -103,6 +113,7 @@ def create_node_summary_trace(
messages: list[dict],
output: str,
node_name: str,
system_prompt: str = "",
) -> str | None:
"""Create a standalone Langfuse trace for a node summary generation.
@ -125,13 +136,21 @@ def create_node_summary_trace(
f"node-summary-{node_name}",
context=Context(),
) as span:
tracing_messages = (
[
{"role": "system", "content": system_prompt},
*messages,
]
if system_prompt
else messages
)
add_llm_span_attributes(
span,
service_name="OpenAILLMService",
model=model,
operation_name=f"node-summary-{node_name}",
messages=messages,
output=output,
messages=tracing_messages,
output=json.dumps({"content": output}),
stream=False,
parameters={"temperature": 0},
)