dograh/api/tests/test_qa_analysis_non_dict_response.py

"""Regression test for QA analysis when the LLM returns a non-dict JSON value.

``parse_llm_json`` is explicitly designed to return a list when the model emits
a top-level JSON array (see ``test_json_parser.py``). The QA analyzers then call
``parsed.get(...)`` on the result. For a list that raises ``AttributeError``,
which is NOT caught by the surrounding ``except (json.JSONDecodeError, ValueError)``
— so a stray array response crashed the whole QA run instead of degrading to
empty results.
"""

from types import SimpleNamespace
from unittest.mock import AsyncMock, Mock, patch

import pytest

from api.services.workflow.qa import analysis as qa_analysis


@pytest.mark.asyncio
async def test_whole_call_qa_tolerates_array_llm_response():
    """A top-level JSON array from the QA LLM degrades to empty results."""
    qa_data = SimpleNamespace(qa_system_prompt="Summarize: {transcript}")
    workflow_run = SimpleNamespace(
        logs={
            "realtime_feedback_events": [
                {"role": "user", "content": "hello"},
                {"role": "assistant", "content": "hi there"},
            ]
        },
        usage_info={"call_duration_seconds": 12},
    )
    warning_mock = Mock()

    with (
        patch.object(
            qa_analysis, "build_conversation_structure", return_value=[{"x": 1}]
        ),
        patch.object(qa_analysis, "format_transcript", return_value="user: hello"),
        patch.object(qa_analysis, "compute_call_metrics", return_value={}),
        patch.object(
            qa_analysis,
            "resolve_llm_config",
            new=AsyncMock(return_value=("openai", "gpt-4o", "sk-test", {})),
        ),
        patch.object(
            qa_analysis, "create_llm_service_from_provider", return_value=object()
        ),
        patch.object(
            qa_analysis,
            "_run_llm_inference",
            new=AsyncMock(return_value='["tag1", "tag2"]'),
        ),
        patch.object(qa_analysis, "setup_langfuse_parent_context", return_value=None),
        patch.object(qa_analysis, "add_qa_span_to_trace", return_value=None),
        patch.object(qa_analysis.logger, "warning", warning_mock),
    ):
        # Before the fix this raised AttributeError: 'list' object has no
        # attribute 'get'.
        result = await qa_analysis._run_whole_call_qa_analysis(
            qa_data, workflow_run, workflow_run_id=99
        )

    node_result = result["node_results"]["whole_call"]
    assert node_result["tags"] == []
    assert node_result["summary"] == ""
    assert node_result["score"] is None
    warning_mock.assert_called_once()
    warning_message = warning_mock.call_args.args[0]
    assert "non-object JSON" in warning_message
    assert "run 99" in warning_message
    assert "list" in warning_message
    assert "tag1" not in warning_message
fix(qa): tolerate non-dict JSON from QA LLM instead of crashing (#408) * fix(qa): tolerate non-dict JSON from QA LLM instead of crashing parse_llm_json is explicitly designed to return a list when the model emits a top-level JSON array (it has a dedicated test for that). The QA analyzers then call parsed.get("tags", ...) directly on the result. When parsed is a list, that raises AttributeError, which is NOT caught by the surrounding except (json.JSONDecodeError, ValueError) — so a single stray array response from the QA model crashed the entire QA analysis run instead of degrading to empty results. The live variable-extraction path already guards this exact case with an isinstance(..., dict) check; mirror it in both QA analysis call sites (_run_qa_analysis per-node and _run_whole_call_qa_analysis fallback) so a non-dict parse result coerces to {} and the run produces empty defaults. Adds a regression test that drives the whole-call analyzer with an array response and asserts empty results rather than a crash. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(qa): log non-object QA JSON responses --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com> Co-authored-by: Abhishek Kumar <abhishek@a6k.me> 2026-06-19 13:40:53 +05:00			`"""Regression test for QA analysis when the LLM returns a non-dict JSON value.`

			``parse_llm_json`` is explicitly designed to return a list when the model emits
			a top-level JSON array (see ``test_json_parser.py``). The QA analyzers then call
			``parsed.get(...)`` on the result. For a list that raises ``AttributeError``,
			which is NOT caught by the surrounding ``except (json.JSONDecodeError, ValueError)``
			`— so a stray array response crashed the whole QA run instead of degrading to`
			`empty results.`
			`"""`

			`from types import SimpleNamespace`
			`from unittest.mock import AsyncMock, Mock, patch`

			`import pytest`

			`from api.services.workflow.qa import analysis as qa_analysis`


			`@pytest.mark.asyncio`
			`async def test_whole_call_qa_tolerates_array_llm_response():`
			`"""A top-level JSON array from the QA LLM degrades to empty results."""`
			`qa_data = SimpleNamespace(qa_system_prompt="Summarize: {transcript}")`
			`workflow_run = SimpleNamespace(`
			`logs={`
			`"realtime_feedback_events": [`
			`{"role": "user", "content": "hello"},`
			`{"role": "assistant", "content": "hi there"},`
			`]`
			`},`
			`usage_info={"call_duration_seconds": 12},`
			`)`
			`warning_mock = Mock()`

			`with (`
			`patch.object(`
			`qa_analysis, "build_conversation_structure", return_value=[{"x": 1}]`
			`),`
			`patch.object(qa_analysis, "format_transcript", return_value="user: hello"),`
			`patch.object(qa_analysis, "compute_call_metrics", return_value={}),`
			`patch.object(`
			`qa_analysis,`
			`"resolve_llm_config",`
			`new=AsyncMock(return_value=("openai", "gpt-4o", "sk-test", {})),`
			`),`
fix: disable duplicate trigger nodes in workflow builder (#402) * fix: disable duplicate trigger nodes in workflow builder AddNodePanel: disable trigger buttons and show tooltip when a trigger already exists on the canvas, using bySpecName to identify trigger- category specs from the live node list. useWorkflowState: preflight in saveWorkflow rejects saves with multiple trigger nodes via a sonner toast before the network request is made. text_chat_session_service: include the original exception message in TextChatSessionExecutionError so the HTTP 500 detail surfaces the root cause without DB inspection. Closes #378 * style: format test_text_chat_session_service.py with ruff * chore: retrigger CI checks * fix(workflow): enforce node instance constraints --------- Co-authored-by: Abhishek Kumar <abhishek@a6k.me> 2026-06-19 03:29:30 -07:00			`patch.object(`
			`qa_analysis, "create_llm_service_from_provider", return_value=object()`
			`),`
fix(qa): tolerate non-dict JSON from QA LLM instead of crashing (#408) * fix(qa): tolerate non-dict JSON from QA LLM instead of crashing parse_llm_json is explicitly designed to return a list when the model emits a top-level JSON array (it has a dedicated test for that). The QA analyzers then call parsed.get("tags", ...) directly on the result. When parsed is a list, that raises AttributeError, which is NOT caught by the surrounding except (json.JSONDecodeError, ValueError) — so a single stray array response from the QA model crashed the entire QA analysis run instead of degrading to empty results. The live variable-extraction path already guards this exact case with an isinstance(..., dict) check; mirror it in both QA analysis call sites (_run_qa_analysis per-node and _run_whole_call_qa_analysis fallback) so a non-dict parse result coerces to {} and the run produces empty defaults. Adds a regression test that drives the whole-call analyzer with an array response and asserts empty results rather than a crash. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(qa): log non-object QA JSON responses --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com> Co-authored-by: Abhishek Kumar <abhishek@a6k.me> 2026-06-19 13:40:53 +05:00			`patch.object(`
			`qa_analysis,`
			`"_run_llm_inference",`
			`new=AsyncMock(return_value='["tag1", "tag2"]'),`
			`),`
fix: disable duplicate trigger nodes in workflow builder (#402) * fix: disable duplicate trigger nodes in workflow builder AddNodePanel: disable trigger buttons and show tooltip when a trigger already exists on the canvas, using bySpecName to identify trigger- category specs from the live node list. useWorkflowState: preflight in saveWorkflow rejects saves with multiple trigger nodes via a sonner toast before the network request is made. text_chat_session_service: include the original exception message in TextChatSessionExecutionError so the HTTP 500 detail surfaces the root cause without DB inspection. Closes #378 * style: format test_text_chat_session_service.py with ruff * chore: retrigger CI checks * fix(workflow): enforce node instance constraints --------- Co-authored-by: Abhishek Kumar <abhishek@a6k.me> 2026-06-19 03:29:30 -07:00			`patch.object(qa_analysis, "setup_langfuse_parent_context", return_value=None),`
fix(qa): tolerate non-dict JSON from QA LLM instead of crashing (#408) * fix(qa): tolerate non-dict JSON from QA LLM instead of crashing parse_llm_json is explicitly designed to return a list when the model emits a top-level JSON array (it has a dedicated test for that). The QA analyzers then call parsed.get("tags", ...) directly on the result. When parsed is a list, that raises AttributeError, which is NOT caught by the surrounding except (json.JSONDecodeError, ValueError) — so a single stray array response from the QA model crashed the entire QA analysis run instead of degrading to empty results. The live variable-extraction path already guards this exact case with an isinstance(..., dict) check; mirror it in both QA analysis call sites (_run_qa_analysis per-node and _run_whole_call_qa_analysis fallback) so a non-dict parse result coerces to {} and the run produces empty defaults. Adds a regression test that drives the whole-call analyzer with an array response and asserts empty results rather than a crash. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(qa): log non-object QA JSON responses --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com> Co-authored-by: Abhishek Kumar <abhishek@a6k.me> 2026-06-19 13:40:53 +05:00			`patch.object(qa_analysis, "add_qa_span_to_trace", return_value=None),`
			`patch.object(qa_analysis.logger, "warning", warning_mock),`
			`):`
			`# Before the fix this raised AttributeError: 'list' object has no`
			`# attribute 'get'.`
			`result = await qa_analysis._run_whole_call_qa_analysis(`
			`qa_data, workflow_run, workflow_run_id=99`
			`)`

			`node_result = result["node_results"]["whole_call"]`
			`assert node_result["tags"] == []`
			`assert node_result["summary"] == ""`
			`assert node_result["score"] is None`
			`warning_mock.assert_called_once()`
			`warning_message = warning_mock.call_args.args[0]`
			`assert "non-object JSON" in warning_message`
			`assert "run 99" in warning_message`
			`assert "list" in warning_message`
			`assert "tag1" not in warning_message`