feat: add qa node in workflow builder (#172)

* feat: add qa node in workflow builder * feat: add qa analysis token usage in usage_info * fix: mask the API key in QA node * feat: add advanced configuration in QA node
2026-06-19 08:28:10 +02:00 · 2026-02-25 13:53:30 +05:30 · 2026-02-25 13:53:30 +05:30 · a836825b83
commit a836825b83
parent f1f4830012
30 changed files with 1619 additions and 265 deletions
--- a/api/services/qa_analysis.py
+++ b/api/services/qa_analysis.py
@ -0,0 +1,360 @@
+"""QA analysis service for post-call quality assessment.
+
+Runs LLM-based analysis on call transcripts, traces under the same
+Langfuse trace as the conversation, and returns structured results.
+"""
+
+import json
+import re
+from datetime import datetime
+from typing import Any
+
+from loguru import logger
+from openai import AsyncOpenAI
+
+from api.db import db_client
+from api.db.models import WorkflowRunModel
+from api.services.gen_ai.json_parser import parse_llm_json
+from pipecat.utils.enums import RealtimeFeedbackType
+
+
+def build_conversation_structure(logs: list[dict]) -> list[dict]:
+    """Transform raw call logs into a conversation structure for LLM QA analysis."""
+    if not logs:
+        return []
+
+    start_time = datetime.fromisoformat(logs[0]["timestamp"])
+
+    conversation = []
+    for event in logs:
+        if event["type"] == RealtimeFeedbackType.BOT_TEXT.value:
+            speaker = "assistant"
+            utterance_text = event["payload"]["text"]
+            event_time = datetime.fromisoformat(event["payload"]["timestamp"])
+        elif event["type"] == RealtimeFeedbackType.USER_TRANSCRIPTION.value and event[
+            "payload"
+        ].get("final", False):
+            speaker = "user"
+            utterance_text = event["payload"]["text"]
+            event_time = datetime.fromisoformat(event["payload"]["timestamp"])
+        else:
+            continue
+
+        time_from_start = (event_time - start_time).total_seconds()
+
+        conversation.append(
+            {
+                "time_from_start_seconds": round(time_from_start, 2),
+                "speaker": speaker,
+                "text": utterance_text,
+                "node_name": event.get("node_name", ""),
+                "turn": event.get("turn", 0),
+            }
+        )
+
+    return conversation
+
+
+def format_transcript(conversation: list[dict]) -> str:
+    """Format conversation structure into a readable transcript string for the LLM."""
+    lines = []
+    for entry in conversation:
+        lines.append(
+            f"[{entry['time_from_start_seconds']:.1f}s] "
+            f"{entry['speaker']}: {entry['text']}"
+        )
+    return "\n".join(lines)
+
+
+def compute_call_metrics(
+    logs: list[dict], call_duration_seconds: float | None = None
+) -> dict:
+    """Pre-compute quantitative metrics from raw call logs."""
+    latencies = []
+    ttfb_values = []
+
+    for event in logs:
+        if event["type"] == RealtimeFeedbackType.LATENCY_MEASURED.value:
+            latencies.append(event["payload"]["latency_seconds"])
+        elif event["type"] == RealtimeFeedbackType.TTFB_METRIC.value:
+            ttfb_values.append(event["payload"]["ttfb_seconds"])
+
+    turns = set()
+    for event in logs:
+        if event["type"] in (
+            RealtimeFeedbackType.USER_TRANSCRIPTION.value,
+            RealtimeFeedbackType.BOT_TEXT.value,
+        ):
+            turns.add(event.get("turn", 0))
+
+    return {
+        "call_duration_seconds": call_duration_seconds,
+        "num_turns": len(turns),
+        "avg_latency_seconds": (
+            round(sum(latencies) / len(latencies), 2) if latencies else None
+        ),
+        "avg_ttfb_seconds": (
+            round(sum(ttfb_values) / len(ttfb_values), 2) if ttfb_values else None
+        ),
+        "max_latency_seconds": round(max(latencies), 2) if latencies else None,
+    }
+
+
+def _extract_trace_id(gathered_context: dict) -> str | None:
+    """Extract Langfuse trace_id from gathered_context trace_url.
+
+    URL format: https://langfuse.dograh.com/project/<project_id>/traces/<trace_id>
+    """
+    trace_url = gathered_context.get("trace_url")
+    if not trace_url:
+        return None
+    try:
+        match = re.search(r"/traces/([a-fA-F0-9]+)$", trace_url)
+        if match:
+            return match.group(1)
+    except Exception:
+        pass
+    return None
+
+
+def _provider_base_url(provider: str | None, endpoint: str = "") -> str | None:
+    """Return the base URL for a given LLM provider."""
+    if provider == "openrouter":
+        return "https://openrouter.ai/api/v1"
+    if provider == "groq":
+        return "https://api.groq.com/openai/v1"
+    if provider == "google":
+        return "https://generativelanguage.googleapis.com/v1beta/openai/"
+    if provider == "azure":
+        return endpoint or None
+    return None
+
+
+async def _resolve_llm_config(
+    qa_node_data: dict, workflow_run: WorkflowRunModel
+) -> tuple[str, str, str | None]:
+    """Resolve the LLM model, API key, and base URL for QA analysis.
+
+    If the QA node has its own LLM configuration (qa_use_workflow_llm=False),
+    use those settings directly. Otherwise, fall back to the user's configured LLM.
+
+    Returns:
+        (model, api_key, base_url) tuple
+    """
+    if not qa_node_data.get("qa_use_workflow_llm", True):
+        return (
+            qa_node_data.get("qa_model"),
+            qa_node_data.get("qa_api_key"),
+            _provider_base_url(
+                qa_node_data.get("qa_provider"),
+                qa_node_data.get("qa_endpoint", ""),
+            ),
+        )
+
+    # Fall back to user's configured LLM
+    user_id = None
+    if workflow_run.workflow and workflow_run.workflow.user:
+        user_id = workflow_run.workflow.user.id
+
+    llm_config: dict = {}
+    if user_id:
+        user_configuration = await db_client.get_user_configurations(user_id)
+        llm_config = user_configuration.model_dump(exclude_none=True).get("llm", {})
+
+    provider = llm_config.get("provider", "openai")
+    api_key = llm_config.get("api_key", "")
+
+    qa_model = qa_node_data.get("qa_model", "default")
+    if qa_model and qa_model != "default":
+        model = qa_model
+    else:
+        model = llm_config.get("model", "gpt-4.1")
+
+    base_url = _provider_base_url(provider, llm_config.get("endpoint", ""))
+    # For openrouter, prefer user-configured base_url if set
+    if provider == "openrouter" and llm_config.get("base_url"):
+        base_url = llm_config["base_url"]
+
+    return model, api_key, base_url
+
+
+async def run_qa_analysis(
+    qa_node_data: dict[str, Any],
+    workflow_run: WorkflowRunModel,
+    workflow_run_id: int,
+) -> dict[str, Any]:
+    """Run QA analysis on a completed workflow run.
+
+    Args:
+        qa_node_data: The QA node's data dict from workflow definition
+        workflow_run: The workflow run model with logs and context
+        workflow_run_id: The workflow run ID
+
+    Returns:
+        Dict with tags, summary, score, raw_response
+    """
+    # Extract transcript from logs
+    logs = workflow_run.logs or {}
+    rtf_events = logs.get("realtime_feedback_events", [])
+    if not rtf_events:
+        logger.warning(f"No realtime_feedback_events for run {workflow_run_id}")
+        return {"error": "no_transcript", "tags": [], "summary": "", "score": None}
+
+    conversation = build_conversation_structure(rtf_events)
+    transcript = format_transcript(conversation)
+    if not transcript:
+        logger.warning(f"Empty transcript for run {workflow_run_id}")
+        return {"error": "empty_transcript", "tags": [], "summary": "", "score": None}
+
+    # Compute call metrics
+    usage_info = workflow_run.usage_info or {}
+    call_duration = usage_info.get("call_duration_seconds")
+    metrics = compute_call_metrics(rtf_events, call_duration)
+
+    # Resolve LLM config
+    system_prompt = qa_node_data.get("qa_system_prompt", "")
+    if not system_prompt:
+        logger.warning("No system prompt defined for QA Node")
+        return {"error": "no_system_prompt", "tags": [], "summary": "", "score": None}
+
+    model, api_key, base_url = await _resolve_llm_config(qa_node_data, workflow_run)
+
+    if not api_key:
+        logger.warning(
+            f"No LLM API key configured for QA analysis on run {workflow_run_id}"
+        )
+        return {"error": "no_api_key", "tags": [], "summary": "", "score": None}
+
+    # Build messages
+    system_content = system_prompt.replace("{metrics}", json.dumps(metrics, indent=2))
+    messages = [
+        {"role": "system", "content": system_content},
+        {"role": "user", "content": f"## Transcript\n{transcript}"},
+    ]
+
+    # Call LLM
+    client_kwargs: dict[str, Any] = {"api_key": api_key}
+    if base_url:
+        client_kwargs["base_url"] = base_url
+
+    client = AsyncOpenAI(**client_kwargs)
+
+    try:
+        response = await client.chat.completions.create(
+            model=model,
+            messages=messages,
+            temperature=0,
+        )
+        raw_response = response.choices[0].message.content
+    except Exception as e:
+        logger.error(f"QA LLM call failed for run {workflow_run_id}: {e}")
+        return {"error": str(e), "tags": [], "summary": "", "score": None}
+
+    # Extract token usage from LLM response
+    token_usage = None
+    if response.usage:
+        token_usage = {
+            "prompt_tokens": response.usage.prompt_tokens or 0,
+            "completion_tokens": response.usage.completion_tokens or 0,
+            "total_tokens": response.usage.total_tokens or 0,
+            "cache_read_input_tokens": getattr(
+                response.usage, "cache_read_input_tokens", 0
+            )
+            or 0,
+            "cache_creation_input_tokens": getattr(
+                response.usage, "cache_creation_input_tokens", None
+            ),
+        }
+
+    # Parse response
+    result: dict[str, Any] = {"raw_response": raw_response, "model": model}
+    if token_usage:
+        result["token_usage"] = token_usage
+    try:
+        parsed = parse_llm_json(raw_response)
+        result["tags"] = parsed.get("tags", [])
+        result["summary"] = parsed.get("summary", "")
+        result["score"] = parsed.get("call_quality_score")
+        result["overall_sentiment"] = parsed.get("overall_sentiment")
+    except (json.JSONDecodeError, ValueError):
+        result["tags"] = []
+        result["summary"] = ""
+        result["score"] = None
+
+    # Langfuse tracing — attach QA generation to the conversation trace
+    _add_qa_span_to_conversation_trace(
+        workflow_run, model, messages, raw_response, result
+    )
+
+    return result
+
+
+def _add_qa_span_to_conversation_trace(
+    workflow_run: WorkflowRunModel,
+    model: str,
+    messages: list[dict],
+    raw_response: str,
+    result: dict,
+):
+    """Attach the QA generation to the existing Langfuse conversation trace.
+
+    Uses OpenTelemetry directly to create a child span under the existing trace,
+    matching the same attribute format used by the pipecat pipeline (gen_ai.*).
+    """
+    try:
+        from opentelemetry import trace as otel_trace
+        from opentelemetry.trace import (
+            NonRecordingSpan,
+            SpanContext,
+            TraceFlags,
+            set_span_in_context,
+        )
+
+        from api.services.pipecat.tracing_config import (
+            is_tracing_enabled,
+            setup_tracing_exporter,
+        )
+        from pipecat.utils.tracing.service_attributes import add_llm_span_attributes
+
+        if not is_tracing_enabled():
+            return
+
+        # Ensure the OTEL exporter is initialized (idempotent — no-op if
+        # already called in the pipeline process, required in the ARQ worker).
+        setup_tracing_exporter()
+
+        gathered_context = workflow_run.gathered_context or {}
+        trace_id = _extract_trace_id(gathered_context)
+        if not trace_id:
+            logger.debug("No trace_id found, skipping Langfuse QA trace")
+            return
+
+        tracer = otel_trace.get_tracer("pipecat")
+
+        # Create a remote parent context from the existing trace ID
+        parent_span_ctx = SpanContext(
+            trace_id=int(trace_id, 16),
+            span_id=0x1,  # dummy parent span id
+            is_remote=True,
+            trace_flags=TraceFlags(0x01),
+        )
+        parent_ctx = set_span_in_context(NonRecordingSpan(parent_span_ctx))
+
+        # Create a child span under the existing trace
+        with tracer.start_as_current_span(
+            "qa-analysis",
+            context=parent_ctx,
+        ) as span:
+            add_llm_span_attributes(
+                span,
+                service_name="OpenAILLMService",
+                model=model,
+                operation_name="qa-analysis",
+                messages=messages,
+                output=raw_response,
+                stream=False,
+                parameters={"temperature": 0},
+            )
+
+    except Exception as e:
+        logger.warning(f"Failed to trace QA to Langfuse: {e}")