mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-13 08:15:21 +02:00
fix(qa): tolerate non-dict JSON from QA LLM instead of crashing
parse_llm_json is explicitly designed to return a list when the model emits a
top-level JSON array (it has a dedicated test for that). The QA analyzers then
call parsed.get("tags", ...) directly on the result. When parsed is a list,
that raises AttributeError, which is NOT caught by the surrounding
except (json.JSONDecodeError, ValueError) — so a single stray array response
from the QA model crashed the entire QA analysis run instead of degrading to
empty results.
The live variable-extraction path already guards this exact case with an
isinstance(..., dict) check; mirror it in both QA analysis call sites
(_run_qa_analysis per-node and _run_whole_call_qa_analysis fallback) so a
non-dict parse result coerces to {} and the run produces empty defaults.
Adds a regression test that drives the whole-call analyzer with an array
response and asserts empty results rather than a crash.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
acc2ef9e96
commit
6ae9ba5d9e
2 changed files with 74 additions and 0 deletions
|
|
@ -206,6 +206,11 @@ async def run_per_node_qa_analysis(
|
|||
}
|
||||
try:
|
||||
parsed = parse_llm_json(raw_response)
|
||||
# parse_llm_json can return a list (e.g. when the model emits a
|
||||
# top-level JSON array); coerce non-dict results so the .get()
|
||||
# lookups below don't raise AttributeError.
|
||||
if not isinstance(parsed, dict):
|
||||
parsed = {}
|
||||
node_result["tags"] = parsed.get("tags", [])
|
||||
node_result["summary"] = parsed.get("summary", "")
|
||||
node_result["score"] = parsed.get("call_quality_score")
|
||||
|
|
@ -296,6 +301,11 @@ async def _run_whole_call_qa_analysis(
|
|||
}
|
||||
try:
|
||||
parsed = parse_llm_json(raw_response)
|
||||
# parse_llm_json can return a list (e.g. when the model emits a
|
||||
# top-level JSON array); coerce non-dict results so the .get()
|
||||
# lookups below don't raise AttributeError.
|
||||
if not isinstance(parsed, dict):
|
||||
parsed = {}
|
||||
node_result["tags"] = parsed.get("tags", [])
|
||||
node_result["summary"] = parsed.get("summary", "")
|
||||
node_result["score"] = parsed.get("call_quality_score")
|
||||
|
|
|
|||
64
api/tests/test_qa_analysis_non_dict_response.py
Normal file
64
api/tests/test_qa_analysis_non_dict_response.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
"""Regression test for QA analysis when the LLM returns a non-dict JSON value.
|
||||
|
||||
``parse_llm_json`` is explicitly designed to return a list when the model emits
|
||||
a top-level JSON array (see ``test_json_parser.py``). The QA analyzers then call
|
||||
``parsed.get(...)`` on the result. For a list that raises ``AttributeError``,
|
||||
which is NOT caught by the surrounding ``except (json.JSONDecodeError, ValueError)``
|
||||
— so a stray array response crashed the whole QA run instead of degrading to
|
||||
empty results.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from api.services.workflow.qa import analysis as qa_analysis
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_whole_call_qa_tolerates_array_llm_response():
|
||||
"""A top-level JSON array from the QA LLM degrades to empty results."""
|
||||
qa_data = SimpleNamespace(qa_system_prompt="Summarize: {transcript}")
|
||||
workflow_run = SimpleNamespace(
|
||||
logs={
|
||||
"realtime_feedback_events": [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
]
|
||||
},
|
||||
usage_info={"call_duration_seconds": 12},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
qa_analysis, "build_conversation_structure", return_value=[{"x": 1}]
|
||||
),
|
||||
patch.object(qa_analysis, "format_transcript", return_value="user: hello"),
|
||||
patch.object(qa_analysis, "compute_call_metrics", return_value={}),
|
||||
patch.object(
|
||||
qa_analysis,
|
||||
"resolve_llm_config",
|
||||
new=AsyncMock(return_value=("openai", "gpt-4o", "sk-test", {})),
|
||||
),
|
||||
patch.object(qa_analysis, "create_llm_service_from_provider", return_value=object()),
|
||||
patch.object(
|
||||
qa_analysis,
|
||||
"_run_llm_inference",
|
||||
new=AsyncMock(return_value='["tag1", "tag2"]'),
|
||||
),
|
||||
patch.object(
|
||||
qa_analysis, "setup_langfuse_parent_context", return_value=None
|
||||
),
|
||||
patch.object(qa_analysis, "add_qa_span_to_trace", return_value=None),
|
||||
):
|
||||
# Before the fix this raised AttributeError: 'list' object has no
|
||||
# attribute 'get'.
|
||||
result = await qa_analysis._run_whole_call_qa_analysis(
|
||||
qa_data, workflow_run, workflow_run_id=99
|
||||
)
|
||||
|
||||
node_result = result["node_results"]["whole_call"]
|
||||
assert node_result["tags"] == []
|
||||
assert node_result["summary"] == ""
|
||||
assert node_result["score"] is None
|
||||
Loading…
Add table
Add a link
Reference in a new issue