feat: add gemini realtime and speaches integration

- Add gemini realtime support - Add speaches support for locally hosted LLMs
2026-06-16 08:25:18 +02:00 · 2026-03-31 17:39:47 +05:30 · 2026-03-31 17:39:47 +05:30 · ee2028eb2d
commit ee2028eb2d
parent 2eaaabd936
19 changed files with 531 additions and 185 deletions
--- a/api/services/workflow/pipecat_engine.py
+++ b/api/services/workflow/pipecat_engine.py
@ -210,12 +210,17 @@ class PipecatEngine:
    async def _update_llm_context(self, system_prompt: str, functions: list[dict]):
        """Update LLM settings with the composed system prompt and tool list."""

-        await self.llm._update_settings(LLMSettings(system_instruction=system_prompt))
-
        if functions:
            tools_schema = ToolsSchema(standard_tools=functions)
            self.context.set_tools(tools_schema)

+        await self.llm._update_settings(LLMSettings(system_instruction=system_prompt))
+
+        # For Gemini Live, set context on the LLM before _update_settings so that
+        # _connect (triggered by reconnect) can read tools from it.
+        if hasattr(self.llm, "_context") and not self.llm._context and self.context:
+            self.llm._context = self.context
+
    def _format_prompt(self, prompt: str) -> str:
        """Delegate prompt formatting to the shared workflow.utils implementation."""

--- a/api/services/workflow/pipecat_engine_variable_extractor.py
+++ b/api/services/workflow/pipecat_engine_variable_extractor.py
@ -215,13 +215,17 @@ class VariableExtractionManager:
            with tracer.start_as_current_span(
                "llm-variable-extraction", context=parent_ctx
            ) as span:
+                tracing_messages = [
+                    {"role": "system", "content": system_prompt},
+                    *extraction_messages,
+                ]
                add_llm_span_attributes(
                    span,
                    service_name=self._engine.llm.__class__.__name__,
                    model=model_name,
                    operation_name="llm-variable-extraction",
-                    messages=extraction_messages,
-                    output=llm_response,
+                    messages=tracing_messages,
+                    output=json.dumps({"content": llm_response}),
                    stream=False,
                    parameters={},
                )
--- a/api/services/workflow/qa/analysis.py
+++ b/api/services/workflow/qa/analysis.py
@ -59,7 +59,14 @@ async def _generate_conversation_summary(
        )

        span_name = f"conversation-summary-before-{node_name}"
-        add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
+        add_qa_span_to_trace(
+            parent_ctx,
+            model,
+            messages,
+            summary,
+            span_name,
+            CONVERSATION_SUMMARY_SYSTEM_PROMPT,
+        )

        return summary
    except Exception as e:
@ -189,7 +196,9 @@ async def run_per_node_qa_analysis(

        # Trace
        span_name = f"qa-node-{node_name}"
-        add_qa_span_to_trace(parent_ctx, model, messages, raw_response, span_name)
+        add_qa_span_to_trace(
+            parent_ctx, model, messages, raw_response, span_name, system_content
+        )

        # Parse response
        node_result: dict[str, Any] = {
@ -299,7 +308,9 @@ async def _run_whole_call_qa_analysis(

    # Langfuse tracing
    parent_ctx = setup_langfuse_parent_context(workflow_run)
-    add_qa_span_to_trace(parent_ctx, model, messages, raw_response, "qa-analysis")
+    add_qa_span_to_trace(
+        parent_ctx, model, messages, raw_response, "qa-analysis", system_content
+    )

    return {
        "node_results": {"whole_call": node_result},
--- a/api/services/workflow/qa/llm_config.py
+++ b/api/services/workflow/qa/llm_config.py
@ -1,5 +1,7 @@
 """LLM configuration resolution and token usage accumulation."""

+import random
+
 from api.db import db_client
 from api.db.models import WorkflowRunModel

@ -57,6 +59,8 @@ async def resolve_user_llm_config(

    provider = llm_config.get("provider", "openai")
    api_key = llm_config.get("api_key", "")
+    if isinstance(api_key, list):
+        api_key = random.choice(api_key)
    model = llm_config.get("model", "gpt-4.1")

    kwargs = {}
--- a/api/services/workflow/qa/node_summary.py
+++ b/api/services/workflow/qa/node_summary.py
@ -166,7 +166,9 @@ async def ensure_node_summaries(
            continue

        # Create a Langfuse trace for this summary generation
-        trace_url = create_node_summary_trace(model, messages, summary_text, node_name)
+        trace_url = create_node_summary_trace(
+            model, messages, summary_text, node_name, NODE_SUMMARY_SYSTEM_PROMPT
+        )

        entry: dict[str, Any] = {"summary": summary_text}
        if trace_url:
--- a/api/services/workflow/qa/tracing.py
+++ b/api/services/workflow/qa/tracing.py
@ -1,5 +1,6 @@
 """Langfuse / OpenTelemetry tracing helpers for QA analysis."""

+import json
 import re

 from loguru import logger
@ -70,6 +71,7 @@ def add_qa_span_to_trace(
    messages: list[dict],
    output: str,
    span_name: str,
+    system_prompt: str = "",
 ) -> None:
    """Create a child span under the conversation trace."""
    if parent_ctx is None:
@ -84,13 +86,21 @@ def add_qa_span_to_trace(
            span_name,
            context=parent_ctx,
        ) as span:
+            tracing_messages = (
+                [
+                    {"role": "system", "content": system_prompt},
+                    *messages,
+                ]
+                if system_prompt
+                else messages
+            )
            add_llm_span_attributes(
                span,
                service_name="OpenAILLMService",
                model=model,
                operation_name=span_name,
-                messages=messages,
-                output=output,
+                messages=tracing_messages,
+                output=json.dumps({"content": output}),
                stream=False,
                parameters={"temperature": 0},
            )
@ -103,6 +113,7 @@ def create_node_summary_trace(
    messages: list[dict],
    output: str,
    node_name: str,
+    system_prompt: str = "",
 ) -> str | None:
    """Create a standalone Langfuse trace for a node summary generation.

@ -125,13 +136,21 @@ def create_node_summary_trace(
            f"node-summary-{node_name}",
            context=Context(),
        ) as span:
+            tracing_messages = (
+                [
+                    {"role": "system", "content": system_prompt},
+                    *messages,
+                ]
+                if system_prompt
+                else messages
+            )
            add_llm_span_attributes(
                span,
                service_name="OpenAILLMService",
                model=model,
                operation_name=f"node-summary-{node_name}",
-                messages=messages,
-                output=output,
+                messages=tracing_messages,
+                output=json.dumps({"content": output}),
                stream=False,
                parameters={"temperature": 0},
            )