mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
154 lines
5.3 KiB
Python
154 lines
5.3 KiB
Python
"""Tests for the FRAMES dataset parser.
|
|
|
|
Network-free: we round-trip a tiny fixture TSV through pandas and
|
|
``load_questions`` to confirm:
|
|
|
|
* row indices become zero-padded ``Q###`` ids,
|
|
* ``wiki_links`` (Python list literal) is materialised correctly,
|
|
* ``reasoning_types`` is split on the pipe separator,
|
|
* missing Prompt/Answer rows are dropped, and
|
|
* the legacy ``wikipedia_link_*`` per-cell fallback works when
|
|
``wiki_links`` is missing/empty.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import textwrap
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from surfsense_evals.suites.research.frames.dataset import (
|
|
FramesQuestion,
|
|
_parse_reasoning_types,
|
|
_parse_wiki_links,
|
|
load_questions,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Pure-function tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestParseWikiLinks:
|
|
def test_python_list_literal(self) -> None:
|
|
s = "['https://en.wikipedia.org/wiki/A', 'https://en.wikipedia.org/wiki/B']"
|
|
assert _parse_wiki_links(s) == [
|
|
"https://en.wikipedia.org/wiki/A",
|
|
"https://en.wikipedia.org/wiki/B",
|
|
]
|
|
|
|
def test_none_or_empty(self) -> None:
|
|
assert _parse_wiki_links(None) == []
|
|
assert _parse_wiki_links("") == []
|
|
assert _parse_wiki_links("[]") == []
|
|
|
|
def test_unquoted_csv_fallback(self) -> None:
|
|
# Defensive: non-Python-list strings still split on commas.
|
|
s = "https://a, https://b"
|
|
assert _parse_wiki_links(s) == ["https://a", "https://b"]
|
|
|
|
def test_already_a_list(self) -> None:
|
|
assert _parse_wiki_links(["x", "y"]) == ["x", "y"]
|
|
|
|
|
|
class TestParseReasoningTypes:
|
|
def test_pipe_separated(self) -> None:
|
|
assert _parse_reasoning_types("Numerical reasoning | Multiple constraints") == [
|
|
"Numerical reasoning",
|
|
"Multiple constraints",
|
|
]
|
|
|
|
def test_single_tag(self) -> None:
|
|
assert _parse_reasoning_types("Tabular reasoning") == ["Tabular reasoning"]
|
|
|
|
def test_empty(self) -> None:
|
|
assert _parse_reasoning_types(None) == []
|
|
assert _parse_reasoning_types("") == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Round-trip via pandas
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _write_tsv(path: Path, body: str) -> None:
|
|
"""Helper that writes a tab-separated fixture exactly as the user typed it."""
|
|
|
|
path.write_text(textwrap.dedent(body), encoding="utf-8")
|
|
|
|
|
|
def test_load_questions_basic(tmp_path: Path) -> None:
|
|
tsv = tmp_path / "test.tsv"
|
|
rows = [
|
|
# Header (first column is unnamed → pandas treats as index)
|
|
"\tPrompt\tAnswer\twikipedia_link_1\twikipedia_link_2\treasoning_types\twiki_links",
|
|
# Row 0
|
|
"0\tWho was the 15th president?\tJames Buchanan\t"
|
|
"https://en.wikipedia.org/wiki/James_Buchanan\t\t"
|
|
"Multiple constraints\t"
|
|
"['https://en.wikipedia.org/wiki/James_Buchanan']",
|
|
# Row 1
|
|
"1\tHow many years between A and B?\t87\t"
|
|
"https://en.wikipedia.org/wiki/A\thttps://en.wikipedia.org/wiki/B\t"
|
|
"Numerical reasoning | Temporal reasoning\t"
|
|
"['https://en.wikipedia.org/wiki/A', 'https://en.wikipedia.org/wiki/B']",
|
|
# Row 2 (intentionally missing Prompt — should be dropped)
|
|
"2\t\tunused\t\t\t\t",
|
|
]
|
|
tsv.write_text("\n".join(rows) + "\n", encoding="utf-8")
|
|
|
|
questions = load_questions(tsv)
|
|
assert len(questions) == 2
|
|
|
|
q0, q1 = questions
|
|
assert isinstance(q0, FramesQuestion)
|
|
assert q0.qid == "Q000"
|
|
assert q0.raw_index == 0
|
|
assert q0.gold_answer == "James Buchanan"
|
|
assert q0.wiki_urls == ["https://en.wikipedia.org/wiki/James_Buchanan"]
|
|
assert q0.reasoning_types == ["Multiple constraints"]
|
|
|
|
assert q1.qid == "Q001"
|
|
assert q1.gold_answer == "87"
|
|
assert q1.wiki_urls == [
|
|
"https://en.wikipedia.org/wiki/A",
|
|
"https://en.wikipedia.org/wiki/B",
|
|
]
|
|
assert q1.reasoning_types == ["Numerical reasoning", "Temporal reasoning"]
|
|
|
|
|
|
def test_load_questions_falls_back_to_per_cell_links(tmp_path: Path) -> None:
|
|
"""When ``wiki_links`` is empty, the loader should glue the
|
|
``wikipedia_link_*`` cells back together."""
|
|
|
|
tsv = tmp_path / "test.tsv"
|
|
rows = [
|
|
"\tPrompt\tAnswer\twikipedia_link_1\twikipedia_link_2\treasoning_types\twiki_links",
|
|
"0\tQ?\tA\t"
|
|
"https://en.wikipedia.org/wiki/Cell1\thttps://en.wikipedia.org/wiki/Cell2\t"
|
|
"Numerical reasoning\t",
|
|
]
|
|
tsv.write_text("\n".join(rows) + "\n", encoding="utf-8")
|
|
questions = load_questions(tsv)
|
|
assert len(questions) == 1
|
|
assert questions[0].wiki_urls == [
|
|
"https://en.wikipedia.org/wiki/Cell1",
|
|
"https://en.wikipedia.org/wiki/Cell2",
|
|
]
|
|
|
|
|
|
def test_load_questions_to_dict_round_trip(tmp_path: Path) -> None:
|
|
tsv = tmp_path / "test.tsv"
|
|
rows = [
|
|
"\tPrompt\tAnswer\treasoning_types\twiki_links",
|
|
"0\tQ?\tParis\tTemporal reasoning\t['https://en.wikipedia.org/wiki/Paris']",
|
|
]
|
|
tsv.write_text("\n".join(rows) + "\n", encoding="utf-8")
|
|
|
|
[q] = load_questions(tsv)
|
|
d = q.to_dict()
|
|
assert d["qid"] == "Q000"
|
|
assert d["wiki_urls"] == ["https://en.wikipedia.org/wiki/Paris"]
|
|
assert d["reasoning_types"] == ["Temporal reasoning"]
|