Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
105 lines
3.6 KiB
Python
105 lines
3.6 KiB
Python
"""Plan 03-03 Task 1 RED + Task 2 GREEN — surface-feature formality scorer.
|
|
|
|
Validates the formality scorer against a RU+EN fixture of ~50 formal/informal pairs.
|
|
Constitutional guard: the scorer observes ONLY the user's surface text. There is no
|
|
user-internal-state signal anywhere in this test or in the module it tests.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import warnings
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
|
|
FIXTURE_PATH = Path(__file__).parent / "fixtures" / "formality_ru_en_50pairs.json"
|
|
|
|
|
|
def _load_fixture():
|
|
with FIXTURE_PATH.open() as f:
|
|
return json.load(f)
|
|
|
|
|
|
# ------------------------------------------------------------- fixture integrity
|
|
def test_fixture_loads_and_has_enough_pairs():
|
|
pairs = _load_fixture()
|
|
assert len(pairs) >= 45, f"expected ~50 pairs, got {len(pairs)}"
|
|
langs = {p["lang"] for p in pairs}
|
|
assert "en" in langs and "ru" in langs
|
|
|
|
|
|
def test_fixture_shape():
|
|
pairs = _load_fixture()
|
|
for p in pairs:
|
|
assert set(p.keys()) >= {"id", "lang", "formal", "informal"}
|
|
assert isinstance(p["formal"], str) and p["formal"].strip()
|
|
assert isinstance(p["informal"], str) and p["informal"].strip()
|
|
|
|
|
|
# ------------------------------------------------------------- scorer contract
|
|
def test_formality_score_fixture_accuracy_at_least_85_percent():
|
|
"""Formal text must score > informal text on >= 85% of pairs."""
|
|
from iai_mcp.formality import formality_score
|
|
|
|
pairs = _load_fixture()
|
|
wins = sum(
|
|
1
|
|
for p in pairs
|
|
if formality_score(p["formal"], p["lang"]) > formality_score(p["informal"], p["lang"])
|
|
)
|
|
accuracy = wins / len(pairs)
|
|
assert accuracy >= 0.85, f"accuracy {accuracy:.2%} ({wins}/{len(pairs)}) below 85% floor"
|
|
|
|
|
|
def test_formality_score_en_formal_anchor():
|
|
from iai_mcp.formality import formality_score
|
|
|
|
score = formality_score("The proposal is, therefore, accepted.", "en")
|
|
assert score >= 0.6, f"expected highly formal sentence >= 0.6, got {score:.3f}"
|
|
|
|
|
|
def test_formality_score_en_informal_anchor():
|
|
from iai_mcp.formality import formality_score
|
|
|
|
score = formality_score("yo, works for me lol", "en")
|
|
assert score <= 0.3, f"expected clearly informal <= 0.3, got {score:.3f}"
|
|
|
|
|
|
def test_formality_score_unknown_lang_returns_neutral_with_warning():
|
|
"""MEMORY.md global-product mandate: unknown lang degrades gracefully."""
|
|
from iai_mcp.formality import formality_score
|
|
|
|
with warnings.catch_warnings(record=True) as w_list:
|
|
warnings.simplefilter("always")
|
|
score = formality_score("some test text", "zz")
|
|
assert score == 0.5
|
|
# A warning must have been issued.
|
|
assert any("formality_score" in str(w.message).lower() or "zz" in str(w.message) for w in w_list)
|
|
|
|
|
|
def test_formality_score_unknown_lang_never_raises():
|
|
from iai_mcp.formality import formality_score
|
|
|
|
# Must never raise, regardless of the lang string.
|
|
for bad_lang in ("", "zz", "xx", "de", "fr"):
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore")
|
|
_ = formality_score("test", bad_lang)
|
|
|
|
|
|
def test_formality_score_empty_text_returns_zero():
|
|
from iai_mcp.formality import formality_score
|
|
|
|
assert formality_score("", "en") == 0.0
|
|
assert formality_score(" ", "en") == 0.0
|
|
|
|
|
|
def test_formality_score_range_bounded_in_0_1():
|
|
from iai_mcp.formality import formality_score
|
|
|
|
pairs = _load_fixture()
|
|
for p in pairs:
|
|
for txt in (p["formal"], p["informal"]):
|
|
s = formality_score(txt, p["lang"])
|
|
assert 0.0 <= s <= 1.0, f"score {s} out of [0, 1] for {txt!r}"
|