feat: run per node QA

This commit is contained in:
Abhishek Kumar 2026-02-25 17:17:48 +05:30
parent a836825b83
commit c8742dbdc0
13 changed files with 1012 additions and 373 deletions

View file

@ -0,0 +1,12 @@
"""QA analysis service for post-call quality assessment.
Runs LLM-based analysis on call transcripts, traces under the same
Langfuse trace as the conversation, and returns structured results.
Supports per-node QA analysis where each agent/start node gets its own
evaluation with node purpose summary and prior conversation context.
"""
from api.services.workflow.qa.analysis import run_per_node_qa_analysis
__all__ = ["run_per_node_qa_analysis"]

View file

@ -0,0 +1,333 @@
"""Main QA analysis orchestrator — per-node and whole-call fallback."""
import json
from typing import Any
from loguru import logger
from openai import AsyncOpenAI
from api.db.models import WorkflowRunModel
from api.services.gen_ai.json_parser import parse_llm_json
from api.services.workflow.qa.conversation import (
build_conversation_structure,
format_transcript,
split_events_by_node,
)
from api.services.workflow.qa.llm_config import (
accumulate_token_usage,
resolve_llm_config,
)
from api.services.workflow.qa.metrics import compute_call_metrics
from api.services.workflow.qa.node_summary import (
CONVERSATION_SUMMARY_SYSTEM_PROMPT,
ensure_node_summaries,
get_node_summary_text,
)
from api.services.workflow.qa.tracing import (
add_qa_span_to_trace,
setup_langfuse_parent_context,
)
from api.utils.template_renderer import render_template
async def _generate_conversation_summary(
client: AsyncOpenAI,
model: str,
transcript: str,
parent_ctx,
node_name: str,
total_token_usage: dict,
) -> str:
"""Generate a summary of the conversation so far (before the current node).
Traced to Langfuse as conversation-summary-before-{node_name}.
"""
messages = [
{"role": "system", "content": CONVERSATION_SUMMARY_SYSTEM_PROMPT},
{"role": "user", "content": f"## Conversation\n{transcript}"},
]
try:
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
)
summary = response.choices[0].message.content or ""
accumulate_token_usage(total_token_usage, response)
span_name = f"conversation-summary-before-{node_name}"
add_qa_span_to_trace(parent_ctx, model, messages, summary, span_name)
return summary
except Exception as e:
logger.warning(
f"Failed to generate conversation summary before {node_name}: {e}"
)
return ""
async def run_per_node_qa_analysis(
qa_node_data: dict[str, Any],
workflow_run: WorkflowRunModel,
workflow_run_id: int,
workflow_definition: dict,
definition_id: int | None,
) -> dict[str, Any]:
"""Run per-node QA analysis on a completed workflow run.
Splits the call by node, generates per-node summaries and conversation
context, then evaluates each node segment individually.
Falls back to whole-call QA if events lack node_id.
Returns:
Dict with node_results, token_usage, model
"""
logs = workflow_run.logs or {}
rtf_events = logs.get("realtime_feedback_events", [])
if not rtf_events:
logger.warning(f"No realtime_feedback_events for run {workflow_run_id}")
return {"error": "no_transcript", "node_results": {}}
# Try to split by node
node_splits = split_events_by_node(rtf_events)
if not node_splits:
# Fall back to whole-call QA
logger.info(
f"Events lack node_id for run {workflow_run_id}, falling back to whole-call QA"
)
return await _run_whole_call_qa_analysis(
qa_node_data, workflow_run, workflow_run_id
)
system_prompt = qa_node_data.get("qa_system_prompt", "")
if not system_prompt:
logger.warning("No system prompt defined for QA Node")
return {"error": "no_system_prompt", "node_results": {}}
# Resolve LLM config
model, api_key, base_url = await resolve_llm_config(qa_node_data, workflow_run)
if not api_key:
logger.warning(
f"No LLM API key configured for QA analysis on run {workflow_run_id}"
)
return {"error": "no_api_key", "node_results": {}}
# Ensure node summaries
node_summaries = await ensure_node_summaries(
workflow_definition, definition_id, workflow_run, qa_node_data
)
# Set up Langfuse tracing
parent_ctx = setup_langfuse_parent_context(workflow_run)
# Build LLM client
client_kwargs: dict[str, Any] = {"api_key": api_key}
if base_url:
client_kwargs["base_url"] = base_url
client = AsyncOpenAI(**client_kwargs)
total_token_usage: dict[str, int] = {}
node_results: dict[str, Any] = {}
prior_conversation: list[dict] = [] # Running accumulation of all prior nodes
for idx, (node_id, node_name, node_events) in enumerate(node_splits):
# Build this node's conversation and transcript
node_conversation = build_conversation_structure(node_events)
node_transcript = format_transcript(node_conversation)
if not node_transcript:
continue
# Compute per-node metrics
node_metrics = compute_call_metrics(node_events)
# Get node summary
node_summary = get_node_summary_text(node_summaries, node_id)
# Generate conversation summary from prior nodes (if not first)
previous_conversation_summary = ""
if idx > 0 and prior_conversation:
prior_transcript = format_transcript(prior_conversation)
previous_conversation_summary = await _generate_conversation_summary(
client,
model,
prior_transcript,
parent_ctx,
node_name,
total_token_usage,
)
# Substitute placeholders in the user's system prompt
template_context = {
"node_summary": node_summary,
"previous_conversation_summary": previous_conversation_summary,
"transcript": node_transcript,
"metrics": json.dumps(node_metrics, indent=2),
}
system_content = render_template(system_prompt, template_context)
messages = [
{"role": "system", "content": system_content},
{"role": "user", "content": f"## Transcript\n{node_transcript}"},
]
# Call QA LLM
try:
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
)
raw_response = response.choices[0].message.content
accumulate_token_usage(total_token_usage, response)
except Exception as e:
logger.error(
f"QA LLM call failed for node '{node_name}' on run {workflow_run_id}: {e}"
)
node_results[node_id] = {
"node_name": node_name,
"error": str(e),
"tags": [],
"summary": "",
"score": None,
}
prior_conversation.extend(node_conversation)
continue
# Trace
span_name = f"qa-node-{node_name}"
add_qa_span_to_trace(parent_ctx, model, messages, raw_response, span_name)
# Parse response
node_result: dict[str, Any] = {
"node_name": node_name,
"raw_response": raw_response,
}
try:
parsed = parse_llm_json(raw_response)
node_result["tags"] = parsed.get("tags", [])
node_result["summary"] = parsed.get("summary", "")
node_result["score"] = parsed.get("call_quality_score")
node_result["overall_sentiment"] = parsed.get("overall_sentiment")
except (json.JSONDecodeError, ValueError):
node_result["tags"] = []
node_result["summary"] = ""
node_result["score"] = None
node_results[node_id] = node_result
# Append this node's conversation to running total
prior_conversation.extend(node_conversation)
result: dict[str, Any] = {
"node_results": node_results,
"model": model,
}
if total_token_usage:
result["token_usage"] = total_token_usage
return result
async def _run_whole_call_qa_analysis(
qa_node_data: dict[str, Any],
workflow_run: WorkflowRunModel,
workflow_run_id: int,
) -> dict[str, Any]:
"""Run whole-call QA analysis (fallback when events lack node_id).
Returns results wrapped in the per-node format for consistency.
"""
logs = workflow_run.logs or {}
rtf_events = logs.get("realtime_feedback_events", [])
if not rtf_events:
logger.warning(f"No realtime_feedback_events for run {workflow_run_id}")
return {"error": "no_transcript", "node_results": {}}
conversation = build_conversation_structure(rtf_events)
transcript = format_transcript(conversation)
if not transcript:
logger.warning(f"Empty transcript for run {workflow_run_id}")
return {"error": "empty_transcript", "node_results": {}}
# Compute call metrics
usage_info = workflow_run.usage_info or {}
call_duration = usage_info.get("call_duration_seconds")
metrics = compute_call_metrics(rtf_events, call_duration)
# Resolve LLM config
system_prompt = qa_node_data.get("qa_system_prompt", "")
if not system_prompt:
logger.warning("No system prompt defined for QA Node")
return {"error": "no_system_prompt", "node_results": {}}
model, api_key, base_url = await resolve_llm_config(qa_node_data, workflow_run)
if not api_key:
logger.warning(
f"No LLM API key configured for QA analysis on run {workflow_run_id}"
)
return {"error": "no_api_key", "node_results": {}}
# Build messages — substitute all placeholders with sensible defaults
template_context = {
"node_summary": "",
"previous_conversation_summary": "",
"transcript": transcript,
"metrics": json.dumps(metrics, indent=2),
}
system_content = render_template(system_prompt, template_context)
messages = [
{"role": "system", "content": system_content},
{"role": "user", "content": f"## Transcript\n{transcript}"},
]
# Call LLM
client_kwargs: dict[str, Any] = {"api_key": api_key}
if base_url:
client_kwargs["base_url"] = base_url
client = AsyncOpenAI(**client_kwargs)
try:
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
)
raw_response = response.choices[0].message.content
except Exception as e:
logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}")
return {"error": str(e), "node_results": {}}
# Extract token usage
token_usage: dict[str, int] = {}
accumulate_token_usage(token_usage, response)
# Parse response
node_result: dict[str, Any] = {
"node_name": "whole_call",
"raw_response": raw_response,
}
try:
parsed = parse_llm_json(raw_response)
node_result["tags"] = parsed.get("tags", [])
node_result["summary"] = parsed.get("summary", "")
node_result["score"] = parsed.get("call_quality_score")
node_result["overall_sentiment"] = parsed.get("overall_sentiment")
except (json.JSONDecodeError, ValueError):
node_result["tags"] = []
node_result["summary"] = ""
node_result["score"] = None
# Langfuse tracing
parent_ctx = setup_langfuse_parent_context(workflow_run)
add_qa_span_to_trace(parent_ctx, model, messages, raw_response, "qa-analysis")
result: dict[str, Any] = {
"node_results": {"whole_call": node_result},
"model": model,
}
if token_usage:
result["token_usage"] = token_usage
return result

View file

@ -0,0 +1,109 @@
"""Conversation building, transcript formatting, and per-node event splitting."""
from collections import OrderedDict
from datetime import datetime
from pipecat.utils.enums import RealtimeFeedbackType
def build_conversation_structure(logs: list[dict]) -> list[dict]:
"""Transform raw call logs into a conversation structure for LLM QA analysis."""
if not logs:
return []
start_time = datetime.fromisoformat(logs[0]["timestamp"])
conversation = []
for event in logs:
if event["type"] == RealtimeFeedbackType.BOT_TEXT.value:
speaker = "assistant"
utterance_text = event["payload"]["text"]
try:
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
except KeyError:
event_time = datetime.fromisoformat(event["timestamp"])
elif event["type"] == RealtimeFeedbackType.USER_TRANSCRIPTION.value and event[
"payload"
].get("final", False):
speaker = "user"
utterance_text = event["payload"]["text"]
try:
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
except KeyError:
event_time = datetime.fromisoformat(event["timestamp"])
elif event["type"] == RealtimeFeedbackType.FUNCTION_CALL_START.value:
speaker = "tool_call"
payload = event["payload"]
utterance_text = payload.get("function_name", "unknown")
event_time = datetime.fromisoformat(event["timestamp"])
else:
continue
time_from_start = (event_time - start_time).total_seconds()
conversation.append(
{
"time_from_start_seconds": round(time_from_start, 2),
"speaker": speaker,
"text": utterance_text,
"node_name": event.get("node_name", ""),
"turn": event.get("turn", 0),
}
)
return conversation
def format_transcript(conversation: list[dict]) -> str:
"""Format conversation structure into a readable transcript string for the LLM."""
lines = []
for entry in conversation:
if entry["speaker"] == "tool_call":
lines.append(
f"[{entry['time_from_start_seconds']:.1f}s] "
f"[tool_call]: {entry['text']}"
)
else:
lines.append(
f"[{entry['time_from_start_seconds']:.1f}s] "
f"{entry['speaker']}: {entry['text']}"
)
return "\n".join(lines)
def split_events_by_node(
rtf_events: list[dict],
) -> list[tuple[str, str, list[dict]]]:
"""Split realtime_feedback_events by node_id.
Returns an ordered list of (node_id, node_name, events) tuples.
Only includes nodes that have conversational content (BOT_TEXT or USER_TRANSCRIPTION).
"""
conversational_types = {
RealtimeFeedbackType.BOT_TEXT.value,
RealtimeFeedbackType.USER_TRANSCRIPTION.value,
}
# Preserve insertion order — first occurrence defines position
node_events: OrderedDict[str, list[dict]] = OrderedDict()
node_names: dict[str, str] = {}
for event in rtf_events:
node_id = event.get("node_id")
if not node_id:
return [] # Events lack node_id — caller should fall back
if node_id not in node_events:
node_events[node_id] = []
node_names[node_id] = event.get("node_name", "")
node_events[node_id].append(event)
# Filter to nodes with conversational content
result = []
for node_id, events in node_events.items():
has_conversation = any(e["type"] in conversational_types for e in events)
if has_conversation:
result.append((node_id, node_names[node_id], events))
return result

View file

@ -0,0 +1,98 @@
"""LLM configuration resolution and token usage accumulation."""
from api.db import db_client
from api.db.models import WorkflowRunModel
def _provider_base_url(provider: str | None, endpoint: str = "") -> str | None:
"""Return the base URL for a given LLM provider."""
if provider == "openrouter":
return "https://openrouter.ai/api/v1"
if provider == "groq":
return "https://api.groq.com/openai/v1"
if provider == "google":
return "https://generativelanguage.googleapis.com/v1beta/openai/"
if provider == "azure":
return endpoint or None
return None
async def resolve_llm_config(
qa_node_data: dict, workflow_run: WorkflowRunModel
) -> tuple[str, str, str | None]:
"""Resolve the LLM model, API key, and base URL for QA analysis.
If the QA node has its own LLM configuration (qa_use_workflow_llm=False),
use those settings directly. Otherwise, fall back to the user's configured LLM.
Returns:
(model, api_key, base_url) tuple
"""
if not qa_node_data.get("qa_use_workflow_llm", True):
return (
qa_node_data.get("qa_model"),
qa_node_data.get("qa_api_key"),
_provider_base_url(
qa_node_data.get("qa_provider"),
qa_node_data.get("qa_endpoint", ""),
),
)
# Fall back to user's configured LLM
model, api_key, base_url = await resolve_user_llm_config(workflow_run)
qa_model = qa_node_data.get("qa_model", "default")
if qa_model and qa_model != "default":
model = qa_model
return model, api_key, base_url
async def resolve_user_llm_config(
workflow_run: WorkflowRunModel,
) -> tuple[str, str, str | None]:
"""Resolve the user's configured LLM (from UserConfiguration).
Returns:
(model, api_key, base_url) tuple
"""
user_id = None
if workflow_run.workflow and workflow_run.workflow.user:
user_id = workflow_run.workflow.user.id
llm_config: dict = {}
if user_id:
user_configuration = await db_client.get_user_configurations(user_id)
llm_config = user_configuration.model_dump(exclude_none=True).get("llm", {})
provider = llm_config.get("provider", "openai")
api_key = llm_config.get("api_key", "")
model = llm_config.get("model", "gpt-4.1")
base_url = _provider_base_url(provider, llm_config.get("endpoint", ""))
if provider == "openrouter" and llm_config.get("base_url"):
base_url = llm_config["base_url"]
return model, api_key, base_url
def accumulate_token_usage(total: dict, response) -> None:
"""Add token counts from an LLM response to the running total dict."""
if not response.usage:
return
total["prompt_tokens"] = total.get("prompt_tokens", 0) + (
response.usage.prompt_tokens or 0
)
total["completion_tokens"] = total.get("completion_tokens", 0) + (
response.usage.completion_tokens or 0
)
total["total_tokens"] = total.get("total_tokens", 0) + (
response.usage.total_tokens or 0
)
total["cache_read_input_tokens"] = total.get("cache_read_input_tokens", 0) + (
getattr(response.usage, "cache_read_input_tokens", 0) or 0
)
cache_creation = getattr(response.usage, "cache_creation_input_tokens", None)
if cache_creation is not None:
total["cache_creation_input_tokens"] = (
total.get("cache_creation_input_tokens") or 0
) + cache_creation

View file

@ -0,0 +1,37 @@
"""Call metrics computation from raw event logs."""
from pipecat.utils.enums import RealtimeFeedbackType
def compute_call_metrics(
logs: list[dict], call_duration_seconds: float | None = None
) -> dict:
"""Pre-compute quantitative metrics from raw call logs."""
latencies = []
ttfb_values = []
for event in logs:
if event["type"] == RealtimeFeedbackType.LATENCY_MEASURED.value:
latencies.append(event["payload"]["latency_seconds"])
elif event["type"] == RealtimeFeedbackType.TTFB_METRIC.value:
ttfb_values.append(event["payload"]["ttfb_seconds"])
turns = set()
for event in logs:
if event["type"] in (
RealtimeFeedbackType.USER_TRANSCRIPTION.value,
RealtimeFeedbackType.BOT_TEXT.value,
):
turns.add(event.get("turn", 0))
return {
"call_duration_seconds": call_duration_seconds,
"num_turns": len(turns),
"avg_latency_seconds": (
round(sum(latencies) / len(latencies), 2) if latencies else None
),
"avg_ttfb_seconds": (
round(sum(ttfb_values) / len(ttfb_values), 2) if ttfb_values else None
),
"max_latency_seconds": round(max(latencies), 2) if latencies else None,
}

View file

@ -0,0 +1,184 @@
"""Node summary generation and caching for per-node QA analysis."""
from typing import Any
from loguru import logger
from openai import AsyncOpenAI
from api.db import db_client
from api.db.models import WorkflowRunModel
from api.services.workflow.dto import NodeType
from api.services.workflow.qa.llm_config import resolve_llm_config
from api.services.workflow.qa.tracing import create_node_summary_trace
NODE_SUMMARY_SYSTEM_PROMPT = (
"You are analyzing a voice AI agent script. This is only a part of a larger script. "
"Produce a concise summary (2-4 sentences) describing this script purpose, "
"what the agent should accomplish, and key behaviors. We will be using this "
"summary to do a QA on the conversation that the agent would do with someone "
"so try to capture the nuances of the script as much as possible."
)
CONVERSATION_SUMMARY_SYSTEM_PROMPT = (
"You are summarizing a portion of a voice AI conversation. "
"Produce a concise summary (3-5 sentences) covering key topics, "
"information exchanged, and current state. We would be using this "
"summary in doing a QA of the conversation that the voice AI agent "
"did with someone so try to capture the nuances of the conversation "
"as much as possible."
)
def get_node_summary_text(node_summaries: dict, node_id: str) -> str:
"""Extract the summary text from a node_summaries entry.
Handles both the current format (dict with "summary" key) and the
legacy format (plain string) for backward compatibility.
"""
entry = node_summaries.get(node_id)
if entry is None:
return ""
if isinstance(entry, str):
return entry
return entry.get("summary", "")
async def ensure_node_summaries(
workflow_definition: dict,
definition_id: int | None,
workflow_run: WorkflowRunModel,
qa_node_data: dict,
) -> dict[str, Any]:
"""Ensure every agentNode/startCall node has a summary in the definition.
Returns the node_summaries dict:
{node_id: {"summary": "...", "trace_url": "..."}, ...}
"""
existing_summaries: dict[str, Any] = workflow_definition.get("node_summaries", {})
nodes = workflow_definition.get("nodes", [])
summarizable_types = {NodeType.agentNode.value, NodeType.startNode.value}
nodes_needing_summary = [
n
for n in nodes
if n.get("type") in summarizable_types and n.get("id") not in existing_summaries
]
if not nodes_needing_summary:
return existing_summaries
model, api_key, base_url = await resolve_llm_config(qa_node_data, workflow_run)
if not api_key:
logger.warning("No API key for node summary generation, skipping")
return existing_summaries
client_kwargs: dict[str, Any] = {"api_key": api_key}
if base_url:
client_kwargs["base_url"] = base_url
client = AsyncOpenAI(**client_kwargs)
updated_summaries = dict(existing_summaries)
# Collect all tool UUIDs across nodes and fetch them in one query
all_tool_uuids: set[str] = set()
for node in nodes_needing_summary:
node_data = node.get("data", {})
for uuid in node_data.get("tool_uuids", []):
all_tool_uuids.add(uuid)
tool_map: dict[str, Any] = {}
if all_tool_uuids:
organization_id = (
workflow_run.workflow.organization_id if workflow_run.workflow else None
)
if organization_id:
try:
tools = await db_client.get_tools_by_uuids(
list(all_tool_uuids), organization_id
)
for t in tools:
tool_map[t.tool_uuid] = {
"name": t.name,
"description": t.description or "",
}
except Exception as e:
logger.warning(f"Failed to fetch tools for node summaries: {e}")
# Build a map of outgoing edges per node (edges are also tool calls)
edges = workflow_definition.get("edges", [])
outgoing_edges_by_node: dict[str, list[dict]] = {}
for edge in edges:
source = edge.get("source")
if source:
outgoing_edges_by_node.setdefault(source, []).append(edge)
for node in nodes_needing_summary:
node_id = node["id"]
node_data = node.get("data", {})
node_name = node_data.get("name", "Unnamed")
# Build a description of the node for the LLM
node_info_parts = [f"Node name: {node_name}"]
if node_data.get("prompt"):
node_info_parts.append(f"Agent prompt:\n{node_data['prompt']}")
# Collect all available tools: custom tools + outgoing edges
tool_descriptions = []
node_tool_uuids = node_data.get("tool_uuids", [])
for uuid in node_tool_uuids:
tool_info = tool_map.get(uuid)
if tool_info:
desc = f"- {tool_info['name']}"
if tool_info["description"]:
desc += f": {tool_info['description']}"
tool_descriptions.append(desc)
for edge in outgoing_edges_by_node.get(node_id, []):
edge_data = edge.get("data", {})
label = edge_data.get("label", "")
condition = edge_data.get("condition", "")
if label:
desc = f"- {label}"
if condition:
desc += f": {condition}"
tool_descriptions.append(desc)
if tool_descriptions:
node_info_parts.append("Available tools:\n" + "\n".join(tool_descriptions))
node_info = "\n".join(node_info_parts)
messages = [
{"role": "system", "content": NODE_SUMMARY_SYSTEM_PROMPT},
{"role": "user", "content": node_info},
]
try:
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
)
summary_text = response.choices[0].message.content or ""
except Exception as e:
logger.warning(f"Failed to generate summary for node {node_id}: {e}")
updated_summaries[node_id] = {"summary": ""}
continue
# Create a Langfuse trace for this summary generation
trace_url = create_node_summary_trace(model, messages, summary_text, node_name)
entry: dict[str, Any] = {"summary": summary_text}
if trace_url:
entry["trace_url"] = trace_url
updated_summaries[node_id] = entry
# Persist to DB
if definition_id and updated_summaries != existing_summaries:
try:
await db_client.update_definition_node_summaries(
definition_id, updated_summaries
)
except Exception as e:
logger.warning(f"Failed to persist node summaries: {e}")
return updated_summaries

View file

@ -0,0 +1,154 @@
"""Langfuse / OpenTelemetry tracing helpers for QA analysis."""
import re
from loguru import logger
from api.db.models import WorkflowRunModel
def extract_trace_id(gathered_context: dict) -> str | None:
"""Extract Langfuse trace_id from gathered_context trace_url.
URL format: https://langfuse.dograh.com/project/<project_id>/traces/<trace_id>
"""
trace_url = gathered_context.get("trace_url")
if not trace_url:
return None
try:
match = re.search(r"/traces/([a-fA-F0-9]+)$", trace_url)
if match:
return match.group(1)
except Exception:
pass
return None
def setup_langfuse_parent_context(workflow_run: WorkflowRunModel):
"""Set up OTEL parent context from the workflow run's Langfuse trace.
Returns the parent context object, or None if tracing is unavailable.
"""
try:
from opentelemetry.trace import (
NonRecordingSpan,
SpanContext,
TraceFlags,
set_span_in_context,
)
from api.services.pipecat.tracing_config import (
is_tracing_enabled,
setup_tracing_exporter,
)
if not is_tracing_enabled():
return None
setup_tracing_exporter()
gathered_context = workflow_run.gathered_context or {}
trace_id = extract_trace_id(gathered_context)
if not trace_id:
logger.debug("No trace_id found, skipping Langfuse tracing")
return None
parent_span_ctx = SpanContext(
trace_id=int(trace_id, 16),
span_id=0x1,
is_remote=True,
trace_flags=TraceFlags(0x01),
)
return set_span_in_context(NonRecordingSpan(parent_span_ctx))
except Exception as e:
logger.warning(f"Failed to set up Langfuse parent context: {e}")
return None
def add_qa_span_to_trace(
parent_ctx,
model: str,
messages: list[dict],
output: str,
span_name: str,
) -> None:
"""Create a child span under the conversation trace."""
if parent_ctx is None:
return
try:
from opentelemetry import trace as otel_trace
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
tracer = otel_trace.get_tracer("pipecat")
with tracer.start_as_current_span(
span_name,
context=parent_ctx,
) as span:
add_llm_span_attributes(
span,
service_name="OpenAILLMService",
model=model,
operation_name=span_name,
messages=messages,
output=output,
stream=False,
parameters={"temperature": 0},
)
except Exception as e:
logger.warning(f"Failed to trace span '{span_name}' to Langfuse: {e}")
def create_node_summary_trace(
model: str,
messages: list[dict],
output: str,
node_name: str,
) -> str | None:
"""Create a standalone Langfuse trace for a node summary generation.
Returns the trace URL, or None if tracing is unavailable.
"""
try:
from opentelemetry import trace as otel_trace
from opentelemetry.context import Context
from api.services.pipecat.tracing_config import (
is_tracing_enabled,
setup_tracing_exporter,
)
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
if not is_tracing_enabled():
return None
setup_tracing_exporter()
tracer = otel_trace.get_tracer("pipecat")
# Create a root span (new trace) for this node summary generation
with tracer.start_as_current_span(
f"node-summary-{node_name}",
context=Context(),
) as span:
add_llm_span_attributes(
span,
service_name="OpenAILLMService",
model=model,
operation_name=f"node-summary-{node_name}",
messages=messages,
output=output,
stream=False,
parameters={"temperature": 0},
)
trace_id = format(span.get_span_context().trace_id, "032x")
from langfuse import get_client
langfuse = get_client()
return langfuse.get_trace_url(trace_id=trace_id)
except Exception as e:
logger.warning(f"Failed to create node summary trace for '{node_name}': {e}")
return None