SurfSense/surfsense_evals/tests/suites/test_frames_dataset.py
DESKTOP-RTLN3BA\$punk 3737118050 chore: evals
2026-05-13 14:02:26 -07:00

154 lines
5.3 KiB
Python

"""Tests for the FRAMES dataset parser.
Network-free: we round-trip a tiny fixture TSV through pandas and
``load_questions`` to confirm:
* row indices become zero-padded ``Q###`` ids,
* ``wiki_links`` (Python list literal) is materialised correctly,
* ``reasoning_types`` is split on the pipe separator,
* missing Prompt/Answer rows are dropped, and
* the legacy ``wikipedia_link_*`` per-cell fallback works when
``wiki_links`` is missing/empty.
"""
from __future__ import annotations
import textwrap
from pathlib import Path
import pytest
from surfsense_evals.suites.research.frames.dataset import (
FramesQuestion,
_parse_reasoning_types,
_parse_wiki_links,
load_questions,
)
# ---------------------------------------------------------------------------
# Pure-function tests
# ---------------------------------------------------------------------------
class TestParseWikiLinks:
def test_python_list_literal(self) -> None:
s = "['https://en.wikipedia.org/wiki/A', 'https://en.wikipedia.org/wiki/B']"
assert _parse_wiki_links(s) == [
"https://en.wikipedia.org/wiki/A",
"https://en.wikipedia.org/wiki/B",
]
def test_none_or_empty(self) -> None:
assert _parse_wiki_links(None) == []
assert _parse_wiki_links("") == []
assert _parse_wiki_links("[]") == []
def test_unquoted_csv_fallback(self) -> None:
# Defensive: non-Python-list strings still split on commas.
s = "https://a, https://b"
assert _parse_wiki_links(s) == ["https://a", "https://b"]
def test_already_a_list(self) -> None:
assert _parse_wiki_links(["x", "y"]) == ["x", "y"]
class TestParseReasoningTypes:
def test_pipe_separated(self) -> None:
assert _parse_reasoning_types("Numerical reasoning | Multiple constraints") == [
"Numerical reasoning",
"Multiple constraints",
]
def test_single_tag(self) -> None:
assert _parse_reasoning_types("Tabular reasoning") == ["Tabular reasoning"]
def test_empty(self) -> None:
assert _parse_reasoning_types(None) == []
assert _parse_reasoning_types("") == []
# ---------------------------------------------------------------------------
# Round-trip via pandas
# ---------------------------------------------------------------------------
def _write_tsv(path: Path, body: str) -> None:
"""Helper that writes a tab-separated fixture exactly as the user typed it."""
path.write_text(textwrap.dedent(body), encoding="utf-8")
def test_load_questions_basic(tmp_path: Path) -> None:
tsv = tmp_path / "test.tsv"
rows = [
# Header (first column is unnamed → pandas treats as index)
"\tPrompt\tAnswer\twikipedia_link_1\twikipedia_link_2\treasoning_types\twiki_links",
# Row 0
"0\tWho was the 15th president?\tJames Buchanan\t"
"https://en.wikipedia.org/wiki/James_Buchanan\t\t"
"Multiple constraints\t"
"['https://en.wikipedia.org/wiki/James_Buchanan']",
# Row 1
"1\tHow many years between A and B?\t87\t"
"https://en.wikipedia.org/wiki/A\thttps://en.wikipedia.org/wiki/B\t"
"Numerical reasoning | Temporal reasoning\t"
"['https://en.wikipedia.org/wiki/A', 'https://en.wikipedia.org/wiki/B']",
# Row 2 (intentionally missing Prompt — should be dropped)
"2\t\tunused\t\t\t\t",
]
tsv.write_text("\n".join(rows) + "\n", encoding="utf-8")
questions = load_questions(tsv)
assert len(questions) == 2
q0, q1 = questions
assert isinstance(q0, FramesQuestion)
assert q0.qid == "Q000"
assert q0.raw_index == 0
assert q0.gold_answer == "James Buchanan"
assert q0.wiki_urls == ["https://en.wikipedia.org/wiki/James_Buchanan"]
assert q0.reasoning_types == ["Multiple constraints"]
assert q1.qid == "Q001"
assert q1.gold_answer == "87"
assert q1.wiki_urls == [
"https://en.wikipedia.org/wiki/A",
"https://en.wikipedia.org/wiki/B",
]
assert q1.reasoning_types == ["Numerical reasoning", "Temporal reasoning"]
def test_load_questions_falls_back_to_per_cell_links(tmp_path: Path) -> None:
"""When ``wiki_links`` is empty, the loader should glue the
``wikipedia_link_*`` cells back together."""
tsv = tmp_path / "test.tsv"
rows = [
"\tPrompt\tAnswer\twikipedia_link_1\twikipedia_link_2\treasoning_types\twiki_links",
"0\tQ?\tA\t"
"https://en.wikipedia.org/wiki/Cell1\thttps://en.wikipedia.org/wiki/Cell2\t"
"Numerical reasoning\t",
]
tsv.write_text("\n".join(rows) + "\n", encoding="utf-8")
questions = load_questions(tsv)
assert len(questions) == 1
assert questions[0].wiki_urls == [
"https://en.wikipedia.org/wiki/Cell1",
"https://en.wikipedia.org/wiki/Cell2",
]
def test_load_questions_to_dict_round_trip(tmp_path: Path) -> None:
tsv = tmp_path / "test.tsv"
rows = [
"\tPrompt\tAnswer\treasoning_types\twiki_links",
"0\tQ?\tParis\tTemporal reasoning\t['https://en.wikipedia.org/wiki/Paris']",
]
tsv.write_text("\n".join(rows) + "\n", encoding="utf-8")
[q] = load_questions(tsv)
d = q.to_dict()
assert d["qid"] == "Q000"
assert d["wiki_urls"] == ["https://en.wikipedia.org/wiki/Paris"]
assert d["reasoning_types"] == ["Temporal reasoning"]