dograh/api/services/qa_analysis.py
Abhishek a836825b83
feat: add qa node in workflow builder (#172)
* feat: add qa node in workflow builder

* feat: add qa analysis token usage in usage_info

* fix: mask the API key in QA node

* feat: add advanced configuration in QA node
2026-02-25 13:53:30 +05:30

360 lines
12 KiB
Python

"""QA analysis service for post-call quality assessment.
Runs LLM-based analysis on call transcripts, traces under the same
Langfuse trace as the conversation, and returns structured results.
"""
import json
import re
from datetime import datetime
from typing import Any
from loguru import logger
from openai import AsyncOpenAI
from api.db import db_client
from api.db.models import WorkflowRunModel
from api.services.gen_ai.json_parser import parse_llm_json
from pipecat.utils.enums import RealtimeFeedbackType
def build_conversation_structure(logs: list[dict]) -> list[dict]:
"""Transform raw call logs into a conversation structure for LLM QA analysis."""
if not logs:
return []
start_time = datetime.fromisoformat(logs[0]["timestamp"])
conversation = []
for event in logs:
if event["type"] == RealtimeFeedbackType.BOT_TEXT.value:
speaker = "assistant"
utterance_text = event["payload"]["text"]
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
elif event["type"] == RealtimeFeedbackType.USER_TRANSCRIPTION.value and event[
"payload"
].get("final", False):
speaker = "user"
utterance_text = event["payload"]["text"]
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
else:
continue
time_from_start = (event_time - start_time).total_seconds()
conversation.append(
{
"time_from_start_seconds": round(time_from_start, 2),
"speaker": speaker,
"text": utterance_text,
"node_name": event.get("node_name", ""),
"turn": event.get("turn", 0),
}
)
return conversation
def format_transcript(conversation: list[dict]) -> str:
"""Format conversation structure into a readable transcript string for the LLM."""
lines = []
for entry in conversation:
lines.append(
f"[{entry['time_from_start_seconds']:.1f}s] "
f"{entry['speaker']}: {entry['text']}"
)
return "\n".join(lines)
def compute_call_metrics(
logs: list[dict], call_duration_seconds: float | None = None
) -> dict:
"""Pre-compute quantitative metrics from raw call logs."""
latencies = []
ttfb_values = []
for event in logs:
if event["type"] == RealtimeFeedbackType.LATENCY_MEASURED.value:
latencies.append(event["payload"]["latency_seconds"])
elif event["type"] == RealtimeFeedbackType.TTFB_METRIC.value:
ttfb_values.append(event["payload"]["ttfb_seconds"])
turns = set()
for event in logs:
if event["type"] in (
RealtimeFeedbackType.USER_TRANSCRIPTION.value,
RealtimeFeedbackType.BOT_TEXT.value,
):
turns.add(event.get("turn", 0))
return {
"call_duration_seconds": call_duration_seconds,
"num_turns": len(turns),
"avg_latency_seconds": (
round(sum(latencies) / len(latencies), 2) if latencies else None
),
"avg_ttfb_seconds": (
round(sum(ttfb_values) / len(ttfb_values), 2) if ttfb_values else None
),
"max_latency_seconds": round(max(latencies), 2) if latencies else None,
}
def _extract_trace_id(gathered_context: dict) -> str | None:
"""Extract Langfuse trace_id from gathered_context trace_url.
URL format: https://langfuse.dograh.com/project/<project_id>/traces/<trace_id>
"""
trace_url = gathered_context.get("trace_url")
if not trace_url:
return None
try:
match = re.search(r"/traces/([a-fA-F0-9]+)$", trace_url)
if match:
return match.group(1)
except Exception:
pass
return None
def _provider_base_url(provider: str | None, endpoint: str = "") -> str | None:
"""Return the base URL for a given LLM provider."""
if provider == "openrouter":
return "https://openrouter.ai/api/v1"
if provider == "groq":
return "https://api.groq.com/openai/v1"
if provider == "google":
return "https://generativelanguage.googleapis.com/v1beta/openai/"
if provider == "azure":
return endpoint or None
return None
async def _resolve_llm_config(
qa_node_data: dict, workflow_run: WorkflowRunModel
) -> tuple[str, str, str | None]:
"""Resolve the LLM model, API key, and base URL for QA analysis.
If the QA node has its own LLM configuration (qa_use_workflow_llm=False),
use those settings directly. Otherwise, fall back to the user's configured LLM.
Returns:
(model, api_key, base_url) tuple
"""
if not qa_node_data.get("qa_use_workflow_llm", True):
return (
qa_node_data.get("qa_model"),
qa_node_data.get("qa_api_key"),
_provider_base_url(
qa_node_data.get("qa_provider"),
qa_node_data.get("qa_endpoint", ""),
),
)
# Fall back to user's configured LLM
user_id = None
if workflow_run.workflow and workflow_run.workflow.user:
user_id = workflow_run.workflow.user.id
llm_config: dict = {}
if user_id:
user_configuration = await db_client.get_user_configurations(user_id)
llm_config = user_configuration.model_dump(exclude_none=True).get("llm", {})
provider = llm_config.get("provider", "openai")
api_key = llm_config.get("api_key", "")
qa_model = qa_node_data.get("qa_model", "default")
if qa_model and qa_model != "default":
model = qa_model
else:
model = llm_config.get("model", "gpt-4.1")
base_url = _provider_base_url(provider, llm_config.get("endpoint", ""))
# For openrouter, prefer user-configured base_url if set
if provider == "openrouter" and llm_config.get("base_url"):
base_url = llm_config["base_url"]
return model, api_key, base_url
async def run_qa_analysis(
qa_node_data: dict[str, Any],
workflow_run: WorkflowRunModel,
workflow_run_id: int,
) -> dict[str, Any]:
"""Run QA analysis on a completed workflow run.
Args:
qa_node_data: The QA node's data dict from workflow definition
workflow_run: The workflow run model with logs and context
workflow_run_id: The workflow run ID
Returns:
Dict with tags, summary, score, raw_response
"""
# Extract transcript from logs
logs = workflow_run.logs or {}
rtf_events = logs.get("realtime_feedback_events", [])
if not rtf_events:
logger.warning(f"No realtime_feedback_events for run {workflow_run_id}")
return {"error": "no_transcript", "tags": [], "summary": "", "score": None}
conversation = build_conversation_structure(rtf_events)
transcript = format_transcript(conversation)
if not transcript:
logger.warning(f"Empty transcript for run {workflow_run_id}")
return {"error": "empty_transcript", "tags": [], "summary": "", "score": None}
# Compute call metrics
usage_info = workflow_run.usage_info or {}
call_duration = usage_info.get("call_duration_seconds")
metrics = compute_call_metrics(rtf_events, call_duration)
# Resolve LLM config
system_prompt = qa_node_data.get("qa_system_prompt", "")
if not system_prompt:
logger.warning("No system prompt defined for QA Node")
return {"error": "no_system_prompt", "tags": [], "summary": "", "score": None}
model, api_key, base_url = await _resolve_llm_config(qa_node_data, workflow_run)
if not api_key:
logger.warning(
f"No LLM API key configured for QA analysis on run {workflow_run_id}"
)
return {"error": "no_api_key", "tags": [], "summary": "", "score": None}
# Build messages
system_content = system_prompt.replace("{metrics}", json.dumps(metrics, indent=2))
messages = [
{"role": "system", "content": system_content},
{"role": "user", "content": f"## Transcript\n{transcript}"},
]
# Call LLM
client_kwargs: dict[str, Any] = {"api_key": api_key}
if base_url:
client_kwargs["base_url"] = base_url
client = AsyncOpenAI(**client_kwargs)
try:
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=0,
)
raw_response = response.choices[0].message.content
except Exception as e:
logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}")
return {"error": str(e), "tags": [], "summary": "", "score": None}
# Extract token usage from LLM response
token_usage = None
if response.usage:
token_usage = {
"prompt_tokens": response.usage.prompt_tokens or 0,
"completion_tokens": response.usage.completion_tokens or 0,
"total_tokens": response.usage.total_tokens or 0,
"cache_read_input_tokens": getattr(
response.usage, "cache_read_input_tokens", 0
)
or 0,
"cache_creation_input_tokens": getattr(
response.usage, "cache_creation_input_tokens", None
),
}
# Parse response
result: dict[str, Any] = {"raw_response": raw_response, "model": model}
if token_usage:
result["token_usage"] = token_usage
try:
parsed = parse_llm_json(raw_response)
result["tags"] = parsed.get("tags", [])
result["summary"] = parsed.get("summary", "")
result["score"] = parsed.get("call_quality_score")
result["overall_sentiment"] = parsed.get("overall_sentiment")
except (json.JSONDecodeError, ValueError):
result["tags"] = []
result["summary"] = ""
result["score"] = None
# Langfuse tracing — attach QA generation to the conversation trace
_add_qa_span_to_conversation_trace(
workflow_run, model, messages, raw_response, result
)
return result
def _add_qa_span_to_conversation_trace(
workflow_run: WorkflowRunModel,
model: str,
messages: list[dict],
raw_response: str,
result: dict,
):
"""Attach the QA generation to the existing Langfuse conversation trace.
Uses OpenTelemetry directly to create a child span under the existing trace,
matching the same attribute format used by the pipecat pipeline (gen_ai.*).
"""
try:
from opentelemetry import trace as otel_trace
from opentelemetry.trace import (
NonRecordingSpan,
SpanContext,
TraceFlags,
set_span_in_context,
)
from api.services.pipecat.tracing_config import (
is_tracing_enabled,
setup_tracing_exporter,
)
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
if not is_tracing_enabled():
return
# Ensure the OTEL exporter is initialized (idempotent — no-op if
# already called in the pipeline process, required in the ARQ worker).
setup_tracing_exporter()
gathered_context = workflow_run.gathered_context or {}
trace_id = _extract_trace_id(gathered_context)
if not trace_id:
logger.debug("No trace_id found, skipping Langfuse QA trace")
return
tracer = otel_trace.get_tracer("pipecat")
# Create a remote parent context from the existing trace ID
parent_span_ctx = SpanContext(
trace_id=int(trace_id, 16),
span_id=0x1, # dummy parent span id
is_remote=True,
trace_flags=TraceFlags(0x01),
)
parent_ctx = set_span_in_context(NonRecordingSpan(parent_span_ctx))
# Create a child span under the existing trace
with tracer.start_as_current_span(
"qa-analysis",
context=parent_ctx,
) as span:
add_llm_span_attributes(
span,
service_name="OpenAILLMService",
model=model,
operation_name="qa-analysis",
messages=messages,
output=raw_response,
stream=False,
parameters={"temperature": 0},
)
except Exception as e:
logger.warning(f"Failed to trace QA to Langfuse: {e}")