mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
* feat: add qa node in workflow builder * feat: add qa analysis token usage in usage_info * fix: mask the API key in QA node * feat: add advanced configuration in QA node
360 lines
12 KiB
Python
360 lines
12 KiB
Python
"""QA analysis service for post-call quality assessment.
|
|
|
|
Runs LLM-based analysis on call transcripts, traces under the same
|
|
Langfuse trace as the conversation, and returns structured results.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from datetime import datetime
|
|
from typing import Any
|
|
|
|
from loguru import logger
|
|
from openai import AsyncOpenAI
|
|
|
|
from api.db import db_client
|
|
from api.db.models import WorkflowRunModel
|
|
from api.services.gen_ai.json_parser import parse_llm_json
|
|
from pipecat.utils.enums import RealtimeFeedbackType
|
|
|
|
|
|
def build_conversation_structure(logs: list[dict]) -> list[dict]:
|
|
"""Transform raw call logs into a conversation structure for LLM QA analysis."""
|
|
if not logs:
|
|
return []
|
|
|
|
start_time = datetime.fromisoformat(logs[0]["timestamp"])
|
|
|
|
conversation = []
|
|
for event in logs:
|
|
if event["type"] == RealtimeFeedbackType.BOT_TEXT.value:
|
|
speaker = "assistant"
|
|
utterance_text = event["payload"]["text"]
|
|
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
|
|
elif event["type"] == RealtimeFeedbackType.USER_TRANSCRIPTION.value and event[
|
|
"payload"
|
|
].get("final", False):
|
|
speaker = "user"
|
|
utterance_text = event["payload"]["text"]
|
|
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
|
|
else:
|
|
continue
|
|
|
|
time_from_start = (event_time - start_time).total_seconds()
|
|
|
|
conversation.append(
|
|
{
|
|
"time_from_start_seconds": round(time_from_start, 2),
|
|
"speaker": speaker,
|
|
"text": utterance_text,
|
|
"node_name": event.get("node_name", ""),
|
|
"turn": event.get("turn", 0),
|
|
}
|
|
)
|
|
|
|
return conversation
|
|
|
|
|
|
def format_transcript(conversation: list[dict]) -> str:
|
|
"""Format conversation structure into a readable transcript string for the LLM."""
|
|
lines = []
|
|
for entry in conversation:
|
|
lines.append(
|
|
f"[{entry['time_from_start_seconds']:.1f}s] "
|
|
f"{entry['speaker']}: {entry['text']}"
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def compute_call_metrics(
|
|
logs: list[dict], call_duration_seconds: float | None = None
|
|
) -> dict:
|
|
"""Pre-compute quantitative metrics from raw call logs."""
|
|
latencies = []
|
|
ttfb_values = []
|
|
|
|
for event in logs:
|
|
if event["type"] == RealtimeFeedbackType.LATENCY_MEASURED.value:
|
|
latencies.append(event["payload"]["latency_seconds"])
|
|
elif event["type"] == RealtimeFeedbackType.TTFB_METRIC.value:
|
|
ttfb_values.append(event["payload"]["ttfb_seconds"])
|
|
|
|
turns = set()
|
|
for event in logs:
|
|
if event["type"] in (
|
|
RealtimeFeedbackType.USER_TRANSCRIPTION.value,
|
|
RealtimeFeedbackType.BOT_TEXT.value,
|
|
):
|
|
turns.add(event.get("turn", 0))
|
|
|
|
return {
|
|
"call_duration_seconds": call_duration_seconds,
|
|
"num_turns": len(turns),
|
|
"avg_latency_seconds": (
|
|
round(sum(latencies) / len(latencies), 2) if latencies else None
|
|
),
|
|
"avg_ttfb_seconds": (
|
|
round(sum(ttfb_values) / len(ttfb_values), 2) if ttfb_values else None
|
|
),
|
|
"max_latency_seconds": round(max(latencies), 2) if latencies else None,
|
|
}
|
|
|
|
|
|
def _extract_trace_id(gathered_context: dict) -> str | None:
|
|
"""Extract Langfuse trace_id from gathered_context trace_url.
|
|
|
|
URL format: https://langfuse.dograh.com/project/<project_id>/traces/<trace_id>
|
|
"""
|
|
trace_url = gathered_context.get("trace_url")
|
|
if not trace_url:
|
|
return None
|
|
try:
|
|
match = re.search(r"/traces/([a-fA-F0-9]+)$", trace_url)
|
|
if match:
|
|
return match.group(1)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def _provider_base_url(provider: str | None, endpoint: str = "") -> str | None:
|
|
"""Return the base URL for a given LLM provider."""
|
|
if provider == "openrouter":
|
|
return "https://openrouter.ai/api/v1"
|
|
if provider == "groq":
|
|
return "https://api.groq.com/openai/v1"
|
|
if provider == "google":
|
|
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
|
if provider == "azure":
|
|
return endpoint or None
|
|
return None
|
|
|
|
|
|
async def _resolve_llm_config(
|
|
qa_node_data: dict, workflow_run: WorkflowRunModel
|
|
) -> tuple[str, str, str | None]:
|
|
"""Resolve the LLM model, API key, and base URL for QA analysis.
|
|
|
|
If the QA node has its own LLM configuration (qa_use_workflow_llm=False),
|
|
use those settings directly. Otherwise, fall back to the user's configured LLM.
|
|
|
|
Returns:
|
|
(model, api_key, base_url) tuple
|
|
"""
|
|
if not qa_node_data.get("qa_use_workflow_llm", True):
|
|
return (
|
|
qa_node_data.get("qa_model"),
|
|
qa_node_data.get("qa_api_key"),
|
|
_provider_base_url(
|
|
qa_node_data.get("qa_provider"),
|
|
qa_node_data.get("qa_endpoint", ""),
|
|
),
|
|
)
|
|
|
|
# Fall back to user's configured LLM
|
|
user_id = None
|
|
if workflow_run.workflow and workflow_run.workflow.user:
|
|
user_id = workflow_run.workflow.user.id
|
|
|
|
llm_config: dict = {}
|
|
if user_id:
|
|
user_configuration = await db_client.get_user_configurations(user_id)
|
|
llm_config = user_configuration.model_dump(exclude_none=True).get("llm", {})
|
|
|
|
provider = llm_config.get("provider", "openai")
|
|
api_key = llm_config.get("api_key", "")
|
|
|
|
qa_model = qa_node_data.get("qa_model", "default")
|
|
if qa_model and qa_model != "default":
|
|
model = qa_model
|
|
else:
|
|
model = llm_config.get("model", "gpt-4.1")
|
|
|
|
base_url = _provider_base_url(provider, llm_config.get("endpoint", ""))
|
|
# For openrouter, prefer user-configured base_url if set
|
|
if provider == "openrouter" and llm_config.get("base_url"):
|
|
base_url = llm_config["base_url"]
|
|
|
|
return model, api_key, base_url
|
|
|
|
|
|
async def run_qa_analysis(
|
|
qa_node_data: dict[str, Any],
|
|
workflow_run: WorkflowRunModel,
|
|
workflow_run_id: int,
|
|
) -> dict[str, Any]:
|
|
"""Run QA analysis on a completed workflow run.
|
|
|
|
Args:
|
|
qa_node_data: The QA node's data dict from workflow definition
|
|
workflow_run: The workflow run model with logs and context
|
|
workflow_run_id: The workflow run ID
|
|
|
|
Returns:
|
|
Dict with tags, summary, score, raw_response
|
|
"""
|
|
# Extract transcript from logs
|
|
logs = workflow_run.logs or {}
|
|
rtf_events = logs.get("realtime_feedback_events", [])
|
|
if not rtf_events:
|
|
logger.warning(f"No realtime_feedback_events for run {workflow_run_id}")
|
|
return {"error": "no_transcript", "tags": [], "summary": "", "score": None}
|
|
|
|
conversation = build_conversation_structure(rtf_events)
|
|
transcript = format_transcript(conversation)
|
|
if not transcript:
|
|
logger.warning(f"Empty transcript for run {workflow_run_id}")
|
|
return {"error": "empty_transcript", "tags": [], "summary": "", "score": None}
|
|
|
|
# Compute call metrics
|
|
usage_info = workflow_run.usage_info or {}
|
|
call_duration = usage_info.get("call_duration_seconds")
|
|
metrics = compute_call_metrics(rtf_events, call_duration)
|
|
|
|
# Resolve LLM config
|
|
system_prompt = qa_node_data.get("qa_system_prompt", "")
|
|
if not system_prompt:
|
|
logger.warning("No system prompt defined for QA Node")
|
|
return {"error": "no_system_prompt", "tags": [], "summary": "", "score": None}
|
|
|
|
model, api_key, base_url = await _resolve_llm_config(qa_node_data, workflow_run)
|
|
|
|
if not api_key:
|
|
logger.warning(
|
|
f"No LLM API key configured for QA analysis on run {workflow_run_id}"
|
|
)
|
|
return {"error": "no_api_key", "tags": [], "summary": "", "score": None}
|
|
|
|
# Build messages
|
|
system_content = system_prompt.replace("{metrics}", json.dumps(metrics, indent=2))
|
|
messages = [
|
|
{"role": "system", "content": system_content},
|
|
{"role": "user", "content": f"## Transcript\n{transcript}"},
|
|
]
|
|
|
|
# Call LLM
|
|
client_kwargs: dict[str, Any] = {"api_key": api_key}
|
|
if base_url:
|
|
client_kwargs["base_url"] = base_url
|
|
|
|
client = AsyncOpenAI(**client_kwargs)
|
|
|
|
try:
|
|
response = await client.chat.completions.create(
|
|
model=model,
|
|
messages=messages,
|
|
temperature=0,
|
|
)
|
|
raw_response = response.choices[0].message.content
|
|
except Exception as e:
|
|
logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}")
|
|
return {"error": str(e), "tags": [], "summary": "", "score": None}
|
|
|
|
# Extract token usage from LLM response
|
|
token_usage = None
|
|
if response.usage:
|
|
token_usage = {
|
|
"prompt_tokens": response.usage.prompt_tokens or 0,
|
|
"completion_tokens": response.usage.completion_tokens or 0,
|
|
"total_tokens": response.usage.total_tokens or 0,
|
|
"cache_read_input_tokens": getattr(
|
|
response.usage, "cache_read_input_tokens", 0
|
|
)
|
|
or 0,
|
|
"cache_creation_input_tokens": getattr(
|
|
response.usage, "cache_creation_input_tokens", None
|
|
),
|
|
}
|
|
|
|
# Parse response
|
|
result: dict[str, Any] = {"raw_response": raw_response, "model": model}
|
|
if token_usage:
|
|
result["token_usage"] = token_usage
|
|
try:
|
|
parsed = parse_llm_json(raw_response)
|
|
result["tags"] = parsed.get("tags", [])
|
|
result["summary"] = parsed.get("summary", "")
|
|
result["score"] = parsed.get("call_quality_score")
|
|
result["overall_sentiment"] = parsed.get("overall_sentiment")
|
|
except (json.JSONDecodeError, ValueError):
|
|
result["tags"] = []
|
|
result["summary"] = ""
|
|
result["score"] = None
|
|
|
|
# Langfuse tracing — attach QA generation to the conversation trace
|
|
_add_qa_span_to_conversation_trace(
|
|
workflow_run, model, messages, raw_response, result
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def _add_qa_span_to_conversation_trace(
|
|
workflow_run: WorkflowRunModel,
|
|
model: str,
|
|
messages: list[dict],
|
|
raw_response: str,
|
|
result: dict,
|
|
):
|
|
"""Attach the QA generation to the existing Langfuse conversation trace.
|
|
|
|
Uses OpenTelemetry directly to create a child span under the existing trace,
|
|
matching the same attribute format used by the pipecat pipeline (gen_ai.*).
|
|
"""
|
|
try:
|
|
from opentelemetry import trace as otel_trace
|
|
from opentelemetry.trace import (
|
|
NonRecordingSpan,
|
|
SpanContext,
|
|
TraceFlags,
|
|
set_span_in_context,
|
|
)
|
|
|
|
from api.services.pipecat.tracing_config import (
|
|
is_tracing_enabled,
|
|
setup_tracing_exporter,
|
|
)
|
|
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
|
|
|
|
if not is_tracing_enabled():
|
|
return
|
|
|
|
# Ensure the OTEL exporter is initialized (idempotent — no-op if
|
|
# already called in the pipeline process, required in the ARQ worker).
|
|
setup_tracing_exporter()
|
|
|
|
gathered_context = workflow_run.gathered_context or {}
|
|
trace_id = _extract_trace_id(gathered_context)
|
|
if not trace_id:
|
|
logger.debug("No trace_id found, skipping Langfuse QA trace")
|
|
return
|
|
|
|
tracer = otel_trace.get_tracer("pipecat")
|
|
|
|
# Create a remote parent context from the existing trace ID
|
|
parent_span_ctx = SpanContext(
|
|
trace_id=int(trace_id, 16),
|
|
span_id=0x1, # dummy parent span id
|
|
is_remote=True,
|
|
trace_flags=TraceFlags(0x01),
|
|
)
|
|
parent_ctx = set_span_in_context(NonRecordingSpan(parent_span_ctx))
|
|
|
|
# Create a child span under the existing trace
|
|
with tracer.start_as_current_span(
|
|
"qa-analysis",
|
|
context=parent_ctx,
|
|
) as span:
|
|
add_llm_span_attributes(
|
|
span,
|
|
service_name="OpenAILLMService",
|
|
model=model,
|
|
operation_name="qa-analysis",
|
|
messages=messages,
|
|
output=raw_response,
|
|
stream=False,
|
|
parameters={"temperature": 0},
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to trace QA to Langfuse: {e}")
|