mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-19 08:28:10 +02:00
feat: add qa node in workflow builder (#172)
* feat: add qa node in workflow builder * feat: add qa analysis token usage in usage_info * fix: mask the API key in QA node * feat: add advanced configuration in QA node
This commit is contained in:
parent
f1f4830012
commit
a836825b83
30 changed files with 1619 additions and 265 deletions
360
api/services/qa_analysis.py
Normal file
360
api/services/qa_analysis.py
Normal file
|
|
@ -0,0 +1,360 @@
|
|||
"""QA analysis service for post-call quality assessment.
|
||||
|
||||
Runs LLM-based analysis on call transcripts, traces under the same
|
||||
Langfuse trace as the conversation, and returns structured results.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from loguru import logger
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from api.db import db_client
|
||||
from api.db.models import WorkflowRunModel
|
||||
from api.services.gen_ai.json_parser import parse_llm_json
|
||||
from pipecat.utils.enums import RealtimeFeedbackType
|
||||
|
||||
|
||||
def build_conversation_structure(logs: list[dict]) -> list[dict]:
|
||||
"""Transform raw call logs into a conversation structure for LLM QA analysis."""
|
||||
if not logs:
|
||||
return []
|
||||
|
||||
start_time = datetime.fromisoformat(logs[0]["timestamp"])
|
||||
|
||||
conversation = []
|
||||
for event in logs:
|
||||
if event["type"] == RealtimeFeedbackType.BOT_TEXT.value:
|
||||
speaker = "assistant"
|
||||
utterance_text = event["payload"]["text"]
|
||||
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
|
||||
elif event["type"] == RealtimeFeedbackType.USER_TRANSCRIPTION.value and event[
|
||||
"payload"
|
||||
].get("final", False):
|
||||
speaker = "user"
|
||||
utterance_text = event["payload"]["text"]
|
||||
event_time = datetime.fromisoformat(event["payload"]["timestamp"])
|
||||
else:
|
||||
continue
|
||||
|
||||
time_from_start = (event_time - start_time).total_seconds()
|
||||
|
||||
conversation.append(
|
||||
{
|
||||
"time_from_start_seconds": round(time_from_start, 2),
|
||||
"speaker": speaker,
|
||||
"text": utterance_text,
|
||||
"node_name": event.get("node_name", ""),
|
||||
"turn": event.get("turn", 0),
|
||||
}
|
||||
)
|
||||
|
||||
return conversation
|
||||
|
||||
|
||||
def format_transcript(conversation: list[dict]) -> str:
|
||||
"""Format conversation structure into a readable transcript string for the LLM."""
|
||||
lines = []
|
||||
for entry in conversation:
|
||||
lines.append(
|
||||
f"[{entry['time_from_start_seconds']:.1f}s] "
|
||||
f"{entry['speaker']}: {entry['text']}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def compute_call_metrics(
|
||||
logs: list[dict], call_duration_seconds: float | None = None
|
||||
) -> dict:
|
||||
"""Pre-compute quantitative metrics from raw call logs."""
|
||||
latencies = []
|
||||
ttfb_values = []
|
||||
|
||||
for event in logs:
|
||||
if event["type"] == RealtimeFeedbackType.LATENCY_MEASURED.value:
|
||||
latencies.append(event["payload"]["latency_seconds"])
|
||||
elif event["type"] == RealtimeFeedbackType.TTFB_METRIC.value:
|
||||
ttfb_values.append(event["payload"]["ttfb_seconds"])
|
||||
|
||||
turns = set()
|
||||
for event in logs:
|
||||
if event["type"] in (
|
||||
RealtimeFeedbackType.USER_TRANSCRIPTION.value,
|
||||
RealtimeFeedbackType.BOT_TEXT.value,
|
||||
):
|
||||
turns.add(event.get("turn", 0))
|
||||
|
||||
return {
|
||||
"call_duration_seconds": call_duration_seconds,
|
||||
"num_turns": len(turns),
|
||||
"avg_latency_seconds": (
|
||||
round(sum(latencies) / len(latencies), 2) if latencies else None
|
||||
),
|
||||
"avg_ttfb_seconds": (
|
||||
round(sum(ttfb_values) / len(ttfb_values), 2) if ttfb_values else None
|
||||
),
|
||||
"max_latency_seconds": round(max(latencies), 2) if latencies else None,
|
||||
}
|
||||
|
||||
|
||||
def _extract_trace_id(gathered_context: dict) -> str | None:
|
||||
"""Extract Langfuse trace_id from gathered_context trace_url.
|
||||
|
||||
URL format: https://langfuse.dograh.com/project/<project_id>/traces/<trace_id>
|
||||
"""
|
||||
trace_url = gathered_context.get("trace_url")
|
||||
if not trace_url:
|
||||
return None
|
||||
try:
|
||||
match = re.search(r"/traces/([a-fA-F0-9]+)$", trace_url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def _provider_base_url(provider: str | None, endpoint: str = "") -> str | None:
|
||||
"""Return the base URL for a given LLM provider."""
|
||||
if provider == "openrouter":
|
||||
return "https://openrouter.ai/api/v1"
|
||||
if provider == "groq":
|
||||
return "https://api.groq.com/openai/v1"
|
||||
if provider == "google":
|
||||
return "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||
if provider == "azure":
|
||||
return endpoint or None
|
||||
return None
|
||||
|
||||
|
||||
async def _resolve_llm_config(
|
||||
qa_node_data: dict, workflow_run: WorkflowRunModel
|
||||
) -> tuple[str, str, str | None]:
|
||||
"""Resolve the LLM model, API key, and base URL for QA analysis.
|
||||
|
||||
If the QA node has its own LLM configuration (qa_use_workflow_llm=False),
|
||||
use those settings directly. Otherwise, fall back to the user's configured LLM.
|
||||
|
||||
Returns:
|
||||
(model, api_key, base_url) tuple
|
||||
"""
|
||||
if not qa_node_data.get("qa_use_workflow_llm", True):
|
||||
return (
|
||||
qa_node_data.get("qa_model"),
|
||||
qa_node_data.get("qa_api_key"),
|
||||
_provider_base_url(
|
||||
qa_node_data.get("qa_provider"),
|
||||
qa_node_data.get("qa_endpoint", ""),
|
||||
),
|
||||
)
|
||||
|
||||
# Fall back to user's configured LLM
|
||||
user_id = None
|
||||
if workflow_run.workflow and workflow_run.workflow.user:
|
||||
user_id = workflow_run.workflow.user.id
|
||||
|
||||
llm_config: dict = {}
|
||||
if user_id:
|
||||
user_configuration = await db_client.get_user_configurations(user_id)
|
||||
llm_config = user_configuration.model_dump(exclude_none=True).get("llm", {})
|
||||
|
||||
provider = llm_config.get("provider", "openai")
|
||||
api_key = llm_config.get("api_key", "")
|
||||
|
||||
qa_model = qa_node_data.get("qa_model", "default")
|
||||
if qa_model and qa_model != "default":
|
||||
model = qa_model
|
||||
else:
|
||||
model = llm_config.get("model", "gpt-4.1")
|
||||
|
||||
base_url = _provider_base_url(provider, llm_config.get("endpoint", ""))
|
||||
# For openrouter, prefer user-configured base_url if set
|
||||
if provider == "openrouter" and llm_config.get("base_url"):
|
||||
base_url = llm_config["base_url"]
|
||||
|
||||
return model, api_key, base_url
|
||||
|
||||
|
||||
async def run_qa_analysis(
|
||||
qa_node_data: dict[str, Any],
|
||||
workflow_run: WorkflowRunModel,
|
||||
workflow_run_id: int,
|
||||
) -> dict[str, Any]:
|
||||
"""Run QA analysis on a completed workflow run.
|
||||
|
||||
Args:
|
||||
qa_node_data: The QA node's data dict from workflow definition
|
||||
workflow_run: The workflow run model with logs and context
|
||||
workflow_run_id: The workflow run ID
|
||||
|
||||
Returns:
|
||||
Dict with tags, summary, score, raw_response
|
||||
"""
|
||||
# Extract transcript from logs
|
||||
logs = workflow_run.logs or {}
|
||||
rtf_events = logs.get("realtime_feedback_events", [])
|
||||
if not rtf_events:
|
||||
logger.warning(f"No realtime_feedback_events for run {workflow_run_id}")
|
||||
return {"error": "no_transcript", "tags": [], "summary": "", "score": None}
|
||||
|
||||
conversation = build_conversation_structure(rtf_events)
|
||||
transcript = format_transcript(conversation)
|
||||
if not transcript:
|
||||
logger.warning(f"Empty transcript for run {workflow_run_id}")
|
||||
return {"error": "empty_transcript", "tags": [], "summary": "", "score": None}
|
||||
|
||||
# Compute call metrics
|
||||
usage_info = workflow_run.usage_info or {}
|
||||
call_duration = usage_info.get("call_duration_seconds")
|
||||
metrics = compute_call_metrics(rtf_events, call_duration)
|
||||
|
||||
# Resolve LLM config
|
||||
system_prompt = qa_node_data.get("qa_system_prompt", "")
|
||||
if not system_prompt:
|
||||
logger.warning("No system prompt defined for QA Node")
|
||||
return {"error": "no_system_prompt", "tags": [], "summary": "", "score": None}
|
||||
|
||||
model, api_key, base_url = await _resolve_llm_config(qa_node_data, workflow_run)
|
||||
|
||||
if not api_key:
|
||||
logger.warning(
|
||||
f"No LLM API key configured for QA analysis on run {workflow_run_id}"
|
||||
)
|
||||
return {"error": "no_api_key", "tags": [], "summary": "", "score": None}
|
||||
|
||||
# Build messages
|
||||
system_content = system_prompt.replace("{metrics}", json.dumps(metrics, indent=2))
|
||||
messages = [
|
||||
{"role": "system", "content": system_content},
|
||||
{"role": "user", "content": f"## Transcript\n{transcript}"},
|
||||
]
|
||||
|
||||
# Call LLM
|
||||
client_kwargs: dict[str, Any] = {"api_key": api_key}
|
||||
if base_url:
|
||||
client_kwargs["base_url"] = base_url
|
||||
|
||||
client = AsyncOpenAI(**client_kwargs)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
)
|
||||
raw_response = response.choices[0].message.content
|
||||
except Exception as e:
|
||||
logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}")
|
||||
return {"error": str(e), "tags": [], "summary": "", "score": None}
|
||||
|
||||
# Extract token usage from LLM response
|
||||
token_usage = None
|
||||
if response.usage:
|
||||
token_usage = {
|
||||
"prompt_tokens": response.usage.prompt_tokens or 0,
|
||||
"completion_tokens": response.usage.completion_tokens or 0,
|
||||
"total_tokens": response.usage.total_tokens or 0,
|
||||
"cache_read_input_tokens": getattr(
|
||||
response.usage, "cache_read_input_tokens", 0
|
||||
)
|
||||
or 0,
|
||||
"cache_creation_input_tokens": getattr(
|
||||
response.usage, "cache_creation_input_tokens", None
|
||||
),
|
||||
}
|
||||
|
||||
# Parse response
|
||||
result: dict[str, Any] = {"raw_response": raw_response, "model": model}
|
||||
if token_usage:
|
||||
result["token_usage"] = token_usage
|
||||
try:
|
||||
parsed = parse_llm_json(raw_response)
|
||||
result["tags"] = parsed.get("tags", [])
|
||||
result["summary"] = parsed.get("summary", "")
|
||||
result["score"] = parsed.get("call_quality_score")
|
||||
result["overall_sentiment"] = parsed.get("overall_sentiment")
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
result["tags"] = []
|
||||
result["summary"] = ""
|
||||
result["score"] = None
|
||||
|
||||
# Langfuse tracing — attach QA generation to the conversation trace
|
||||
_add_qa_span_to_conversation_trace(
|
||||
workflow_run, model, messages, raw_response, result
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _add_qa_span_to_conversation_trace(
|
||||
workflow_run: WorkflowRunModel,
|
||||
model: str,
|
||||
messages: list[dict],
|
||||
raw_response: str,
|
||||
result: dict,
|
||||
):
|
||||
"""Attach the QA generation to the existing Langfuse conversation trace.
|
||||
|
||||
Uses OpenTelemetry directly to create a child span under the existing trace,
|
||||
matching the same attribute format used by the pipecat pipeline (gen_ai.*).
|
||||
"""
|
||||
try:
|
||||
from opentelemetry import trace as otel_trace
|
||||
from opentelemetry.trace import (
|
||||
NonRecordingSpan,
|
||||
SpanContext,
|
||||
TraceFlags,
|
||||
set_span_in_context,
|
||||
)
|
||||
|
||||
from api.services.pipecat.tracing_config import (
|
||||
is_tracing_enabled,
|
||||
setup_tracing_exporter,
|
||||
)
|
||||
from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
|
||||
|
||||
if not is_tracing_enabled():
|
||||
return
|
||||
|
||||
# Ensure the OTEL exporter is initialized (idempotent — no-op if
|
||||
# already called in the pipeline process, required in the ARQ worker).
|
||||
setup_tracing_exporter()
|
||||
|
||||
gathered_context = workflow_run.gathered_context or {}
|
||||
trace_id = _extract_trace_id(gathered_context)
|
||||
if not trace_id:
|
||||
logger.debug("No trace_id found, skipping Langfuse QA trace")
|
||||
return
|
||||
|
||||
tracer = otel_trace.get_tracer("pipecat")
|
||||
|
||||
# Create a remote parent context from the existing trace ID
|
||||
parent_span_ctx = SpanContext(
|
||||
trace_id=int(trace_id, 16),
|
||||
span_id=0x1, # dummy parent span id
|
||||
is_remote=True,
|
||||
trace_flags=TraceFlags(0x01),
|
||||
)
|
||||
parent_ctx = set_span_in_context(NonRecordingSpan(parent_span_ctx))
|
||||
|
||||
# Create a child span under the existing trace
|
||||
with tracer.start_as_current_span(
|
||||
"qa-analysis",
|
||||
context=parent_ctx,
|
||||
) as span:
|
||||
add_llm_span_attributes(
|
||||
span,
|
||||
service_name="OpenAILLMService",
|
||||
model=model,
|
||||
operation_name="qa-analysis",
|
||||
messages=messages,
|
||||
output=raw_response,
|
||||
stream=False,
|
||||
parameters={"temperature": 0},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to trace QA to Langfuse: {e}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue