From 6e194f4b5931b98749d209327d6de64d5878eb87 Mon Sep 17 00:00:00 2001 From: Mubashir R <112580905+Mubashirrrr@users.noreply.github.com> Date: Fri, 19 Jun 2026 13:40:53 +0500 Subject: [PATCH] fix(qa): tolerate non-dict JSON from QA LLM instead of crashing (#408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(qa): tolerate non-dict JSON from QA LLM instead of crashing parse_llm_json is explicitly designed to return a list when the model emits a top-level JSON array (it has a dedicated test for that). The QA analyzers then call parsed.get("tags", ...) directly on the result. When parsed is a list, that raises AttributeError, which is NOT caught by the surrounding except (json.JSONDecodeError, ValueError) — so a single stray array response from the QA model crashed the entire QA analysis run instead of degrading to empty results. The live variable-extraction path already guards this exact case with an isinstance(..., dict) check; mirror it in both QA analysis call sites (_run_qa_analysis per-node and _run_whole_call_qa_analysis fallback) so a non-dict parse result coerces to {} and the run produces empty defaults. Adds a regression test that drives the whole-call analyzer with an array response and asserts empty results rather than a crash. Co-Authored-By: Claude Opus 4.8 * fix(qa): log non-object QA JSON responses --------- Co-authored-by: Claude Opus 4.8 Co-authored-by: Abhishek Kumar --- api/services/workflow/qa/analysis.py | 20 ++++++ .../test_qa_analysis_non_dict_response.py | 72 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 api/tests/test_qa_analysis_non_dict_response.py diff --git a/api/services/workflow/qa/analysis.py b/api/services/workflow/qa/analysis.py index 0afb2e19..ec3ecd4e 100644 --- a/api/services/workflow/qa/analysis.py +++ b/api/services/workflow/qa/analysis.py @@ -206,6 +206,16 @@ async def run_per_node_qa_analysis( } try: parsed = parse_llm_json(raw_response) + # parse_llm_json can return a list (e.g. when the model emits a + # top-level JSON array); coerce non-dict results so the .get() + # lookups below don't raise AttributeError. + if not isinstance(parsed, dict): + logger.warning( + f"QA LLM returned non-object JSON for node '{node_name}' " + f"on run {workflow_run_id}; got {type(parsed).__name__}, " + "using empty QA result" + ) + parsed = {} node_result["tags"] = parsed.get("tags", []) node_result["summary"] = parsed.get("summary", "") node_result["score"] = parsed.get("call_quality_score") @@ -296,6 +306,16 @@ async def _run_whole_call_qa_analysis( } try: parsed = parse_llm_json(raw_response) + # parse_llm_json can return a list (e.g. when the model emits a + # top-level JSON array); coerce non-dict results so the .get() + # lookups below don't raise AttributeError. + if not isinstance(parsed, dict): + logger.warning( + f"QA LLM returned non-object JSON for whole-call QA on run " + f"{workflow_run_id}; got {type(parsed).__name__}, using empty " + "QA result" + ) + parsed = {} node_result["tags"] = parsed.get("tags", []) node_result["summary"] = parsed.get("summary", "") node_result["score"] = parsed.get("call_quality_score") diff --git a/api/tests/test_qa_analysis_non_dict_response.py b/api/tests/test_qa_analysis_non_dict_response.py new file mode 100644 index 00000000..d47df090 --- /dev/null +++ b/api/tests/test_qa_analysis_non_dict_response.py @@ -0,0 +1,72 @@ +"""Regression test for QA analysis when the LLM returns a non-dict JSON value. + +``parse_llm_json`` is explicitly designed to return a list when the model emits +a top-level JSON array (see ``test_json_parser.py``). The QA analyzers then call +``parsed.get(...)`` on the result. For a list that raises ``AttributeError``, +which is NOT caught by the surrounding ``except (json.JSONDecodeError, ValueError)`` +— so a stray array response crashed the whole QA run instead of degrading to +empty results. +""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock, Mock, patch + +import pytest + +from api.services.workflow.qa import analysis as qa_analysis + + +@pytest.mark.asyncio +async def test_whole_call_qa_tolerates_array_llm_response(): + """A top-level JSON array from the QA LLM degrades to empty results.""" + qa_data = SimpleNamespace(qa_system_prompt="Summarize: {transcript}") + workflow_run = SimpleNamespace( + logs={ + "realtime_feedback_events": [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi there"}, + ] + }, + usage_info={"call_duration_seconds": 12}, + ) + warning_mock = Mock() + + with ( + patch.object( + qa_analysis, "build_conversation_structure", return_value=[{"x": 1}] + ), + patch.object(qa_analysis, "format_transcript", return_value="user: hello"), + patch.object(qa_analysis, "compute_call_metrics", return_value={}), + patch.object( + qa_analysis, + "resolve_llm_config", + new=AsyncMock(return_value=("openai", "gpt-4o", "sk-test", {})), + ), + patch.object(qa_analysis, "create_llm_service_from_provider", return_value=object()), + patch.object( + qa_analysis, + "_run_llm_inference", + new=AsyncMock(return_value='["tag1", "tag2"]'), + ), + patch.object( + qa_analysis, "setup_langfuse_parent_context", return_value=None + ), + patch.object(qa_analysis, "add_qa_span_to_trace", return_value=None), + patch.object(qa_analysis.logger, "warning", warning_mock), + ): + # Before the fix this raised AttributeError: 'list' object has no + # attribute 'get'. + result = await qa_analysis._run_whole_call_qa_analysis( + qa_data, workflow_run, workflow_run_id=99 + ) + + node_result = result["node_results"]["whole_call"] + assert node_result["tags"] == [] + assert node_result["summary"] == "" + assert node_result["score"] is None + warning_mock.assert_called_once() + warning_message = warning_mock.call_args.args[0] + assert "non-object JSON" in warning_message + assert "run 99" in warning_message + assert "list" in warning_message + assert "tag1" not in warning_message