From 6e194f4b5931b98749d209327d6de64d5878eb87 Mon Sep 17 00:00:00 2001
From: Mubashir R <112580905+Mubashirrrr@users.noreply.github.com>
Date: Fri, 19 Jun 2026 13:40:53 +0500
Subject: [PATCH] fix(qa): tolerate non-dict JSON from QA LLM instead of
 crashing (#408)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(qa): tolerate non-dict JSON from QA LLM instead of crashing

parse_llm_json is explicitly designed to return a list when the model emits a
top-level JSON array (it has a dedicated test for that). The QA analyzers then
call parsed.get("tags", ...) directly on the result. When parsed is a list,
that raises AttributeError, which is NOT caught by the surrounding
except (json.JSONDecodeError, ValueError) — so a single stray array response
from the QA model crashed the entire QA analysis run instead of degrading to
empty results.

The live variable-extraction path already guards this exact case with an
isinstance(..., dict) check; mirror it in both QA analysis call sites
(_run_qa_analysis per-node and _run_whole_call_qa_analysis fallback) so a
non-dict parse result coerces to {} and the run produces empty defaults.

Adds a regression test that drives the whole-call analyzer with an array
response and asserts empty results rather than a crash.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>

* fix(qa): log non-object QA JSON responses

---------

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
Co-authored-by: Abhishek Kumar <abhishek@a6k.me>
---
 api/services/workflow/qa/analysis.py          | 20 ++++++
 .../test_qa_analysis_non_dict_response.py     | 72 +++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 api/tests/test_qa_analysis_non_dict_response.py

diff --git a/api/services/workflow/qa/analysis.py b/api/services/workflow/qa/analysis.py
index 0afb2e19..ec3ecd4e 100644
--- a/api/services/workflow/qa/analysis.py
+++ b/api/services/workflow/qa/analysis.py
@@ -206,6 +206,16 @@ async def run_per_node_qa_analysis(
         }
         try:
             parsed = parse_llm_json(raw_response)
+            # parse_llm_json can return a list (e.g. when the model emits a
+            # top-level JSON array); coerce non-dict results so the .get()
+            # lookups below don't raise AttributeError.
+            if not isinstance(parsed, dict):
+                logger.warning(
+                    f"QA LLM returned non-object JSON for node '{node_name}' "
+                    f"on run {workflow_run_id}; got {type(parsed).__name__}, "
+                    "using empty QA result"
+                )
+                parsed = {}
             node_result["tags"] = parsed.get("tags", [])
             node_result["summary"] = parsed.get("summary", "")
             node_result["score"] = parsed.get("call_quality_score")
@@ -296,6 +306,16 @@ async def _run_whole_call_qa_analysis(
     }
     try:
         parsed = parse_llm_json(raw_response)
+        # parse_llm_json can return a list (e.g. when the model emits a
+        # top-level JSON array); coerce non-dict results so the .get()
+        # lookups below don't raise AttributeError.
+        if not isinstance(parsed, dict):
+            logger.warning(
+                f"QA LLM returned non-object JSON for whole-call QA on run "
+                f"{workflow_run_id}; got {type(parsed).__name__}, using empty "
+                "QA result"
+            )
+            parsed = {}
         node_result["tags"] = parsed.get("tags", [])
         node_result["summary"] = parsed.get("summary", "")
         node_result["score"] = parsed.get("call_quality_score")
diff --git a/api/tests/test_qa_analysis_non_dict_response.py b/api/tests/test_qa_analysis_non_dict_response.py
new file mode 100644
index 00000000..d47df090
--- /dev/null
+++ b/api/tests/test_qa_analysis_non_dict_response.py
@@ -0,0 +1,72 @@
+"""Regression test for QA analysis when the LLM returns a non-dict JSON value.
+
+``parse_llm_json`` is explicitly designed to return a list when the model emits
+a top-level JSON array (see ``test_json_parser.py``). The QA analyzers then call
+``parsed.get(...)`` on the result. For a list that raises ``AttributeError``,
+which is NOT caught by the surrounding ``except (json.JSONDecodeError, ValueError)``
+— so a stray array response crashed the whole QA run instead of degrading to
+empty results.
+"""
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, Mock, patch
+
+import pytest
+
+from api.services.workflow.qa import analysis as qa_analysis
+
+
+@pytest.mark.asyncio
+async def test_whole_call_qa_tolerates_array_llm_response():
+    """A top-level JSON array from the QA LLM degrades to empty results."""
+    qa_data = SimpleNamespace(qa_system_prompt="Summarize: {transcript}")
+    workflow_run = SimpleNamespace(
+        logs={
+            "realtime_feedback_events": [
+                {"role": "user", "content": "hello"},
+                {"role": "assistant", "content": "hi there"},
+            ]
+        },
+        usage_info={"call_duration_seconds": 12},
+    )
+    warning_mock = Mock()
+
+    with (
+        patch.object(
+            qa_analysis, "build_conversation_structure", return_value=[{"x": 1}]
+        ),
+        patch.object(qa_analysis, "format_transcript", return_value="user: hello"),
+        patch.object(qa_analysis, "compute_call_metrics", return_value={}),
+        patch.object(
+            qa_analysis,
+            "resolve_llm_config",
+            new=AsyncMock(return_value=("openai", "gpt-4o", "sk-test", {})),
+        ),
+        patch.object(qa_analysis, "create_llm_service_from_provider", return_value=object()),
+        patch.object(
+            qa_analysis,
+            "_run_llm_inference",
+            new=AsyncMock(return_value='["tag1", "tag2"]'),
+        ),
+        patch.object(
+            qa_analysis, "setup_langfuse_parent_context", return_value=None
+        ),
+        patch.object(qa_analysis, "add_qa_span_to_trace", return_value=None),
+        patch.object(qa_analysis.logger, "warning", warning_mock),
+    ):
+        # Before the fix this raised AttributeError: 'list' object has no
+        # attribute 'get'.
+        result = await qa_analysis._run_whole_call_qa_analysis(
+            qa_data, workflow_run, workflow_run_id=99
+        )
+
+    node_result = result["node_results"]["whole_call"]
+    assert node_result["tags"] == []
+    assert node_result["summary"] == ""
+    assert node_result["score"] is None
+    warning_mock.assert_called_once()
+    warning_message = warning_mock.call_args.args[0]
+    assert "non-object JSON" in warning_message
+    assert "run 99" in warning_message
+    assert "list" in warning_message
+    assert "tag1" not in warning_message