fix(chat): normalize provider-safe message history

This commit is contained in:
Anish Sarkar 2026-06-12 02:17:37 +05:30
parent 5d5d574550
commit 3dd54230e7
9 changed files with 382 additions and 6 deletions

View file

@ -0,0 +1,40 @@
"""Regression tests for model-boundary message sanitization."""
from __future__ import annotations
import pytest
from langchain_core.messages import AIMessage
from app.agents.chat.runtime.llm_config import _sanitize_messages
pytestmark = pytest.mark.unit
def test_sanitize_messages_strips_provider_specific_thinking_blocks() -> None:
original = AIMessage(
content=[
{"type": "thinking", "thinking": "private reasoning"},
{"type": "text", "text": "visible answer"},
]
)
sanitized = _sanitize_messages([original])
assert sanitized[0].content == "visible answer"
assert original.content == [
{"type": "thinking", "thinking": "private reasoning"},
{"type": "text", "text": "visible answer"},
]
def test_sanitize_messages_sets_tool_only_ai_content_to_none() -> None:
message = AIMessage(
content="",
tool_calls=[{"name": "search", "args": {"q": "x"}, "id": "call_1"}],
)
sanitized = _sanitize_messages([message])
assert sanitized[0].content is None
assert message.content == ""

View file

@ -0,0 +1,62 @@
"""Unit tests for provider-safe LLM history normalization."""
from __future__ import annotations
import pytest
from app.tasks.chat.llm_history_normalizer import (
assistant_content_to_llm_text,
user_content_to_llm_content,
)
pytestmark = pytest.mark.unit
def test_assistant_ui_parts_drop_thinking_steps_for_llm_history() -> None:
content = [
{"type": "data-thinking-steps", "data": [{"id": "thinking-1"}]},
{"type": "text", "text": "visible answer"},
]
assert assistant_content_to_llm_text(content) == "visible answer"
def test_provider_thinking_blocks_are_not_replayed_to_llm() -> None:
content = [
{"type": "thinking", "thinking": "private reasoning"},
{"type": "text", "text": "final answer"},
]
assert assistant_content_to_llm_text(content) == "final answer"
def test_unknown_assistant_blocks_are_dropped() -> None:
content = [
{"type": "redacted_thinking", "data": "hidden"},
{"type": "tool_use", "name": "search"},
{"type": "text", "text": "kept"},
]
assert assistant_content_to_llm_text(content) == "kept"
def test_user_images_convert_to_openai_compatible_image_url_blocks() -> None:
content = [
{"type": "text", "text": "look"},
{"type": "image", "image": "data:image/png;base64,abc"},
]
assert user_content_to_llm_content(content, allow_images=True) == [
{"type": "text", "text": "look"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
]
def test_user_images_can_be_dropped_for_text_only_history() -> None:
content = [
{"type": "text", "text": "look"},
{"type": "image", "image": "data:image/png;base64,abc"},
]
assert user_content_to_llm_content(content, allow_images=False) == "look"

View file

@ -0,0 +1,68 @@
"""Unit tests for final assistant message part normalization."""
from __future__ import annotations
import pytest
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from app.tasks.chat.message_parts_normalizer import (
final_assistant_parts_from_messages,
merge_streamed_and_final_parts,
normalize_ai_message_to_parts,
)
pytestmark = pytest.mark.unit
def test_string_ai_message_content_becomes_text_part() -> None:
assert normalize_ai_message_to_parts(AIMessage(content="hello")) == [
{"type": "text", "text": "hello"}
]
def test_deepseek_thinking_plus_text_blocks_backfill_only_text() -> None:
message = AIMessage(
content=[
{"type": "thinking", "thinking": "hidden reasoning"},
{"type": "text", "text": "Yo bro! What's up?"},
],
additional_kwargs={"reasoning_content": "hidden reasoning"},
)
assert normalize_ai_message_to_parts(message) == [
{"type": "text", "text": "Yo bro! What's up?"}
]
def test_final_parts_use_last_ai_message_and_skip_trailing_tool_messages() -> None:
messages = [
HumanMessage(content="ask"),
AIMessage(content="draft"),
ToolMessage(content="tool output", tool_call_id="tc-1"),
AIMessage(content=[{"type": "text", "text": "final answer"}]),
ToolMessage(content="trailing tool noise", tool_call_id="tc-2"),
]
assert final_assistant_parts_from_messages(messages) == [
{"type": "text", "text": "final answer"}
]
def test_merge_adds_final_text_when_stream_only_has_thinking_steps() -> None:
streamed = [
{
"type": "data-thinking-steps",
"data": [{"id": "thinking-1", "status": "completed"}],
}
]
final = [{"type": "text", "text": "visible answer"}]
assert merge_streamed_and_final_parts(streamed, final) == [*streamed, *final]
def test_merge_does_not_duplicate_when_stream_already_has_text() -> None:
streamed = [{"type": "text", "text": "streamed answer"}]
final = [{"type": "text", "text": "final answer"}]
assert merge_streamed_and_final_parts(streamed, final) == streamed