mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-16 08:25:18 +02:00
feat: add gemini realtime and speaches integration
- Add gemini realtime support - Add speaches support for locally hosted LLMs
This commit is contained in:
parent
2eaaabd936
commit
ee2028eb2d
19 changed files with 531 additions and 185 deletions
|
|
@ -210,12 +210,17 @@ class PipecatEngine:
|
|||
async def _update_llm_context(self, system_prompt: str, functions: list[dict]):
|
||||
"""Update LLM settings with the composed system prompt and tool list."""
|
||||
|
||||
await self.llm._update_settings(LLMSettings(system_instruction=system_prompt))
|
||||
|
||||
if functions:
|
||||
tools_schema = ToolsSchema(standard_tools=functions)
|
||||
self.context.set_tools(tools_schema)
|
||||
|
||||
await self.llm._update_settings(LLMSettings(system_instruction=system_prompt))
|
||||
|
||||
# For Gemini Live, set context on the LLM before _update_settings so that
|
||||
# _connect (triggered by reconnect) can read tools from it.
|
||||
if hasattr(self.llm, "_context") and not self.llm._context and self.context:
|
||||
self.llm._context = self.context
|
||||
|
||||
def _format_prompt(self, prompt: str) -> str:
|
||||
"""Delegate prompt formatting to the shared workflow.utils implementation."""
|
||||
|
||||
|
|
|
|||
|
|
@ -215,13 +215,17 @@ class VariableExtractionManager:
|
|||
with tracer.start_as_current_span(
|
||||
"llm-variable-extraction", context=parent_ctx
|
||||
) as span:
|
||||
tracing_messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
*extraction_messages,
|
||||
]
|
||||
add_llm_span_attributes(
|
||||
span,
|
||||
service_name=self._engine.llm.__class__.__name__,
|
||||
model=model_name,
|
||||
operation_name="llm-variable-extraction",
|
||||
messages=extraction_messages,
|
||||
output=llm_response,
|
||||
messages=tracing_messages,
|
||||
output=json.dumps({"content": llm_response}),
|
||||
stream=False,
|
||||
parameters={},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -59,7 +59,14 @@ async def _generate_conversation_summary(
|
|||
)
|
||||
|
||||
span_name = f"conversation-summary-before-{node_name}"
|
||||
add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
|
||||
add_qa_span_to_trace(
|
||||
parent_ctx,
|
||||
model,
|
||||
messages,
|
||||
summary,
|
||||
span_name,
|
||||
CONVERSATION_SUMMARY_SYSTEM_PROMPT,
|
||||
)
|
||||
|
||||
return summary
|
||||
except Exception as e:
|
||||
|
|
@ -189,7 +196,9 @@ async def run_per_node_qa_analysis(
|
|||
|
||||
# Trace
|
||||
span_name = f"qa-node-{node_name}"
|
||||
add_qa_span_to_trace(parent_ctx, model, messages, raw_response, span_name)
|
||||
add_qa_span_to_trace(
|
||||
parent_ctx, model, messages, raw_response, span_name, system_content
|
||||
)
|
||||
|
||||
# Parse response
|
||||
node_result: dict[str, Any] = {
|
||||
|
|
@ -299,7 +308,9 @@ async def _run_whole_call_qa_analysis(
|
|||
|
||||
# Langfuse tracing
|
||||
parent_ctx = setup_langfuse_parent_context(workflow_run)
|
||||
add_qa_span_to_trace(parent_ctx, model, messages, raw_response, "qa-analysis")
|
||||
add_qa_span_to_trace(
|
||||
parent_ctx, model, messages, raw_response, "qa-analysis", system_content
|
||||
)
|
||||
|
||||
return {
|
||||
"node_results": {"whole_call": node_result},
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
"""LLM configuration resolution and token usage accumulation."""
|
||||
|
||||
import random
|
||||
|
||||
from api.db import db_client
|
||||
from api.db.models import WorkflowRunModel
|
||||
|
||||
|
|
@ -57,6 +59,8 @@ async def resolve_user_llm_config(
|
|||
|
||||
provider = llm_config.get("provider", "openai")
|
||||
api_key = llm_config.get("api_key", "")
|
||||
if isinstance(api_key, list):
|
||||
api_key = random.choice(api_key)
|
||||
model = llm_config.get("model", "gpt-4.1")
|
||||
|
||||
kwargs = {}
|
||||
|
|
|
|||
|
|
@ -166,7 +166,9 @@ async def ensure_node_summaries(
|
|||
continue
|
||||
|
||||
# Create a Langfuse trace for this summary generation
|
||||
trace_url = create_node_summary_trace(model, messages, summary_text, node_name)
|
||||
trace_url = create_node_summary_trace(
|
||||
model, messages, summary_text, node_name, NODE_SUMMARY_SYSTEM_PROMPT
|
||||
)
|
||||
|
||||
entry: dict[str, Any] = {"summary": summary_text}
|
||||
if trace_url:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Langfuse / OpenTelemetry tracing helpers for QA analysis."""
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from loguru import logger
|
||||
|
|
@ -70,6 +71,7 @@ def add_qa_span_to_trace(
|
|||
messages: list[dict],
|
||||
output: str,
|
||||
span_name: str,
|
||||
system_prompt: str = "",
|
||||
) -> None:
|
||||
"""Create a child span under the conversation trace."""
|
||||
if parent_ctx is None:
|
||||
|
|
@ -84,13 +86,21 @@ def add_qa_span_to_trace(
|
|||
span_name,
|
||||
context=parent_ctx,
|
||||
) as span:
|
||||
tracing_messages = (
|
||||
[
|
||||
{"role": "system", "content": system_prompt},
|
||||
*messages,
|
||||
]
|
||||
if system_prompt
|
||||
else messages
|
||||
)
|
||||
add_llm_span_attributes(
|
||||
span,
|
||||
service_name="OpenAILLMService",
|
||||
model=model,
|
||||
operation_name=span_name,
|
||||
messages=messages,
|
||||
output=output,
|
||||
messages=tracing_messages,
|
||||
output=json.dumps({"content": output}),
|
||||
stream=False,
|
||||
parameters={"temperature": 0},
|
||||
)
|
||||
|
|
@ -103,6 +113,7 @@ def create_node_summary_trace(
|
|||
messages: list[dict],
|
||||
output: str,
|
||||
node_name: str,
|
||||
system_prompt: str = "",
|
||||
) -> str | None:
|
||||
"""Create a standalone Langfuse trace for a node summary generation.
|
||||
|
||||
|
|
@ -125,13 +136,21 @@ def create_node_summary_trace(
|
|||
f"node-summary-{node_name}",
|
||||
context=Context(),
|
||||
) as span:
|
||||
tracing_messages = (
|
||||
[
|
||||
{"role": "system", "content": system_prompt},
|
||||
*messages,
|
||||
]
|
||||
if system_prompt
|
||||
else messages
|
||||
)
|
||||
add_llm_span_attributes(
|
||||
span,
|
||||
service_name="OpenAILLMService",
|
||||
model=model,
|
||||
operation_name=f"node-summary-{node_name}",
|
||||
messages=messages,
|
||||
output=output,
|
||||
messages=tracing_messages,
|
||||
output=json.dumps({"content": output}),
|
||||
stream=False,
|
||||
parameters={"temperature": 0},
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue