"""Tests for strip_markdown_fences() and extract_text_content() in app/utils/content_utils.py. Out of scope: bootstrap_history_from_db() — async + DB, belongs in integration tests. Run: uv run pytest -m unit tests/unit/utils/test_content_utils.py """ import pytest pytestmark = pytest.mark.unit # =========================================================================== # strip_markdown_fences() # =========================================================================== class TestStripMarkdownFences: """Tests for strip_markdown_fences(text: str) -> str. Regex: r"^```(?:\\w+)?\\s*\\n(.*?)```\\s*$" (re.DOTALL) Called on text.strip() — so surrounding whitespace is handled before the regex runs. The captured group is also .strip()-ped before return. """ # ------------------------------------------------------------------ # Fenced with a language tag # ------------------------------------------------------------------ def test_json_fence_returns_inner_content(self): from app.utils.content_utils import strip_markdown_fences text = '```json\n{"key": "value"}\n```' assert strip_markdown_fences(text) == '{"key": "value"}' def test_python_fence_returns_inner_content(self): from app.utils.content_utils import strip_markdown_fences text = "```python\ndef hello():\n return 'hi'\n```" assert strip_markdown_fences(text) == "def hello():\n return 'hi'" def test_yaml_fence_returns_inner_content(self): from app.utils.content_utils import strip_markdown_fences text = "```yaml\nkey: value\n```" assert strip_markdown_fences(text) == "key: value" def test_sql_multiline_fence_returns_inner_content(self): from app.utils.content_utils import strip_markdown_fences text = "```sql\nSELECT *\nFROM users\nWHERE id = 1;\n```" assert strip_markdown_fences(text) == "SELECT *\nFROM users\nWHERE id = 1;" # ------------------------------------------------------------------ # Fenced without a language tag # ------------------------------------------------------------------ def test_no_lang_tag_single_line_returns_inner_content(self): from app.utils.content_utils import strip_markdown_fences text = "```\nhello world\n```" assert strip_markdown_fences(text) == "hello world" def test_no_lang_tag_multiline_returns_inner_content(self): from app.utils.content_utils import strip_markdown_fences text = "```\nline one\nline two\n```" assert strip_markdown_fences(text) == "line one\nline two" # ------------------------------------------------------------------ # Plain text — no fences → returned unchanged # ------------------------------------------------------------------ def test_plain_text_returned_unchanged(self): from app.utils.content_utils import strip_markdown_fences text = "just plain text with no fences" assert strip_markdown_fences(text) == text def test_plain_text_with_newlines_returned_unchanged(self): from app.utils.content_utils import strip_markdown_fences text = "line one\nline two\nline three" assert strip_markdown_fences(text) == text def test_empty_string_returned_unchanged(self): from app.utils.content_utils import strip_markdown_fences assert strip_markdown_fences("") == "" # ------------------------------------------------------------------ # Surrounding whitespace handling # The function calls text.strip() before matching, so leading/trailing # whitespace outside the fence is consumed. The captured group is also # .strip()-ped, so whitespace between the fence markers and content is # removed too. # ------------------------------------------------------------------ def test_leading_whitespace_around_fence_stripped(self): from app.utils.content_utils import strip_markdown_fences text = " ```json\n{}\n```" assert strip_markdown_fences(text) == "{}" def test_trailing_whitespace_around_fence_stripped(self): from app.utils.content_utils import strip_markdown_fences text = "```json\n{}\n``` " assert strip_markdown_fences(text) == "{}" def test_surrounding_newlines_stripped(self): from app.utils.content_utils import strip_markdown_fences text = '\n\n```json\n{"a": 1}\n```\n\n' assert strip_markdown_fences(text) == '{"a": 1}' def test_inner_indentation_preserved(self): """The captured group is .strip()-ped, so leading whitespace on the *first* line is removed, but indentation on subsequent lines is kept.""" from app.utils.content_utils import strip_markdown_fences text = "```\n indented line\n deeper indent\n```" result = strip_markdown_fences(text) # .strip() removes the leading spaces from the first captured line assert "indented line" in result # indentation on the second line is preserved assert " deeper indent" in result # =========================================================================== # extract_text_content() # =========================================================================== class TestExtractTextContent: """Tests for extract_text_content(content: str | dict | list) -> str.""" # ------------------------------------------------------------------ # str input → returned as-is # ------------------------------------------------------------------ def test_str_input_returned_as_is(self): from app.utils.content_utils import extract_text_content assert extract_text_content("hello world") == "hello world" def test_str_empty_returned_as_is(self): from app.utils.content_utils import extract_text_content assert extract_text_content("") == "" def test_str_with_internal_whitespace_returned_as_is(self): from app.utils.content_utils import extract_text_content assert extract_text_content(" spaced ") == " spaced " # ------------------------------------------------------------------ # dict with "text" key → return content["text"] # ------------------------------------------------------------------ def test_dict_with_text_key_returns_its_value(self): from app.utils.content_utils import extract_text_content assert extract_text_content({"text": "from dict"}) == "from dict" def test_dict_with_text_key_empty_value(self): from app.utils.content_utils import extract_text_content assert extract_text_content({"text": ""}) == "" def test_dict_with_text_key_ignores_other_keys(self): from app.utils.content_utils import extract_text_content d = {"text": "important", "role": "assistant", "extra": 99} assert extract_text_content(d) == "important" # ------------------------------------------------------------------ # dict without "text" key → str(dict) # ------------------------------------------------------------------ def test_dict_without_text_key_returns_str_repr(self): from app.utils.content_utils import extract_text_content d = {"role": "assistant", "value": 42} assert extract_text_content(d) == str(d) def test_empty_dict_returns_str_repr(self): from app.utils.content_utils import extract_text_content assert extract_text_content({}) == str({}) # ------------------------------------------------------------------ # list of parts — text dicts and plain strings # Parts are joined with "\n" (per implementation: "\n".join(texts)) # ------------------------------------------------------------------ def test_list_text_type_parts_joined_with_newline(self): from app.utils.content_utils import extract_text_content parts = [ {"type": "text", "text": "Hello"}, {"type": "text", "text": "world"}, ] assert extract_text_content(parts) == "Hello\nworld" def test_list_plain_strings_joined_with_newline(self): from app.utils.content_utils import extract_text_content parts = ["foo", "bar"] assert extract_text_content(parts) == "foo\nbar" def test_list_mixed_text_dicts_and_plain_strings(self): from app.utils.content_utils import extract_text_content parts = [ {"type": "text", "text": "Hello"}, "plain", {"type": "text", "text": "world"}, ] result = extract_text_content(parts) assert "Hello" in result assert "plain" in result assert "world" in result def test_list_non_text_type_parts_ignored(self): """tool_use, image, and other non-text blocks must not leak into output.""" from app.utils.content_utils import extract_text_content parts = [ {"type": "tool_use", "id": "abc", "name": "search_kb"}, {"type": "text", "text": "visible text"}, {"type": "image", "source": {"url": "https://example.com/img.png"}}, ] result = extract_text_content(parts) assert result == "visible text" assert "tool_use" not in result assert "search_kb" not in result assert "image" not in result def test_list_only_non_text_parts_returns_empty_string(self): from app.utils.content_utils import extract_text_content parts = [ {"type": "tool_use", "id": "x"}, {"type": "image", "source": {}}, ] assert extract_text_content(parts) == "" def test_list_single_text_part(self): from app.utils.content_utils import extract_text_content parts = [{"type": "text", "text": "only me"}] assert extract_text_content(parts) == "only me" def test_list_text_part_missing_text_key_contributes_empty_string(self): """part.get("text", "") — a text-typed dict with no "text" key gives "".""" from app.utils.content_utils import extract_text_content parts = [{"type": "text"}, {"type": "text", "text": "after"}] result = extract_text_content(parts) # both parts collected; joined → "\nafter" or "after" depending on strip assert "after" in result # ------------------------------------------------------------------ # Empty list → empty string # ------------------------------------------------------------------ def test_empty_list_returns_empty_string(self): from app.utils.content_utils import extract_text_content assert extract_text_content([]) == "" # ------------------------------------------------------------------ # Unsupported types → empty string (the final bare `return ""`) # ------------------------------------------------------------------ def test_none_returns_empty_string(self): from app.utils.content_utils import extract_text_content assert extract_text_content(None) == "" def test_integer_returns_empty_string(self): from app.utils.content_utils import extract_text_content assert extract_text_content(42) == "" def test_boolean_returns_empty_string(self): from app.utils.content_utils import extract_text_content assert extract_text_content(True) == ""