test: add test-integrity delta fixtures (#79)

Co-authored-by: Sam Valladares <143034159+samvallad33@users.noreply.github.com>
This commit is contained in:
caioribeiroclw-pixel 2026-06-20 00:41:31 +00:00 committed by GitHub
parent d23870d906
commit 5c2db045f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 252 additions and 1 deletions

View file

@ -91,7 +91,8 @@ integrity decision.
## Minimal Fixture Suite
These cases are small enough to live as fixtures without turning Sanhedrin into
a correctness judge.
a correctness judge. Machine-readable examples live in
[`docs/fixtures/sanhedrin-test-integrity-deltas/`](fixtures/sanhedrin-test-integrity-deltas/).
| Case | Input pattern | Expected decision | Why |
| --- | --- | --- | --- |

View file

@ -0,0 +1,39 @@
{
"case": "justified-snapshot",
"description": "A snapshot changed alongside an intentional source/UI change, so the mechanical delta should remain explicit for policy or human review.",
"expectedDecision": "needs_human_review",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_justified_snapshot",
"commandReceiptId": "receipt_vitest_after_snapshot",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_dashboard_empty_state",
"testFiles": [
{
"path": "tests/__snapshots__/dashboard.test.ts.snap",
"hashBeforeImplementation": "sha256:6666666666666666666666666666666666666666666666666666666666666666",
"hashAfterVerification": "sha256:7777777777777777777777777777777777777777777777777777777777777777"
}
]
},
"implementationContext": "impl_ctx_dashboard_empty_state_copy",
"verifierContext": "verify_ctx_vitest",
"delta": {
"testFilesChangedAfterImplementation": true,
"removedOrDisabledTests": [],
"removedAssertions": 0,
"weakenedExpectations": [],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": 0,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_vitest_after_snapshot",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "needs_human_review",
"reason": "snapshot changed with the implementation; policy or human review must decide whether the churn is justified"
}
}

View file

@ -0,0 +1,45 @@
{
"case": "skipped-test",
"description": "A verifier command passed after a test was disabled with a skip/ignore marker.",
"expectedDecision": "downgraded",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_skipped_test",
"commandReceiptId": "receipt_pytest_after_skip",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_coupon_validation",
"testFiles": [
{
"path": "tests/test_coupon.py",
"hashBeforeImplementation": "sha256:2222222222222222222222222222222222222222222222222222222222222222",
"hashAfterVerification": "sha256:3333333333333333333333333333333333333333333333333333333333333333"
}
]
},
"implementationContext": "impl_ctx_coupon_fix",
"verifierContext": "verify_ctx_pytest",
"delta": {
"testFilesChangedAfterImplementation": true,
"removedOrDisabledTests": [
{
"kind": "skip_or_only",
"path": "tests/test_coupon.py",
"line": 42
}
],
"removedAssertions": 0,
"weakenedExpectations": [],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": -1.2,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_pytest_after_skip",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "downgraded",
"reason": "tests passed, but a test was disabled after implementation"
}
}

View file

@ -0,0 +1,39 @@
{
"case": "unchanged-good",
"description": "Implementation changes source, tests are unchanged, and a fresh verifier command ran after the last relevant edit.",
"expectedDecision": "accepted",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_unchanged_good",
"commandReceiptId": "receipt_cargo_test_after_fix",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_cart_discount",
"testFiles": [
{
"path": "tests/cart_discount_test.rs",
"hashBeforeImplementation": "sha256:1111111111111111111111111111111111111111111111111111111111111111",
"hashAfterVerification": "sha256:1111111111111111111111111111111111111111111111111111111111111111"
}
]
},
"implementationContext": "impl_ctx_cart_discount_fix",
"verifierContext": "verify_ctx_cargo_test",
"delta": {
"testFilesChangedAfterImplementation": false,
"removedOrDisabledTests": [],
"removedAssertions": 0,
"weakenedExpectations": [],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": 0,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_cargo_test_after_fix",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "accepted",
"reason": "tests passed after the implementation and the test artifact did not change"
}
}

View file

@ -0,0 +1,45 @@
{
"case": "weakened-assertion",
"description": "The test still ran, but an expectation was relaxed after implementation.",
"expectedDecision": "downgraded",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_weakened_assertion",
"commandReceiptId": "receipt_npm_test_after_weaken",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_login_errors",
"testFiles": [
{
"path": "tests/login.test.ts",
"hashBeforeImplementation": "sha256:4444444444444444444444444444444444444444444444444444444444444444",
"hashAfterVerification": "sha256:5555555555555555555555555555555555555555555555555555555555555555"
}
]
},
"implementationContext": "impl_ctx_login_errors",
"verifierContext": "verify_ctx_npm_test",
"delta": {
"testFilesChangedAfterImplementation": true,
"removedOrDisabledTests": [],
"removedAssertions": 0,
"weakenedExpectations": [
{
"path": "tests/login.test.ts",
"from": "rejects.toThrow(InvalidCredentialsError)",
"to": "resolves.not.toThrow()"
}
],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": 0,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_npm_test_after_weaken",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "downgraded",
"reason": "tests passed, but the asserted behavior was relaxed after implementation"
}
}

View file

@ -0,0 +1,82 @@
import json
from pathlib import Path
import unittest
FIXTURE_DIR = (
Path(__file__).resolve().parents[2]
/ "docs"
/ "fixtures"
/ "sanhedrin-test-integrity-deltas"
)
class TestSanhedrinTestIntegrityDeltaFixtures(unittest.TestCase):
def test_fixture_receipts_are_executable_contract_examples(self):
fixtures = sorted(FIXTURE_DIR.glob("*.json"))
self.assertEqual(
[fixture.name for fixture in fixtures],
[
"justified-snapshot.json",
"skipped-test.json",
"unchanged-good.json",
"weakened-assertion.json",
],
)
expected_decisions = {
"justified-snapshot": "needs_human_review",
"skipped-test": "downgraded",
"unchanged-good": "accepted",
"weakened-assertion": "downgraded",
}
for fixture in fixtures:
with self.subTest(fixture=fixture.name):
data = json.loads(fixture.read_text(encoding="utf-8"))
receipt = data["receipt"]
self.assertEqual(
receipt["schema"],
"vestige.sanhedrin.test_integrity_delta.v1",
)
self.assertEqual(data["expectedDecision"], receipt["decision"])
self.assertEqual(expected_decisions[data["case"]], receipt["decision"])
self.assertTrue(receipt["freshVerifier"]["checkedAfterLastRelevantEdit"])
self.assertEqual(receipt["freshVerifier"]["exitCode"], 0)
test_files = receipt["specSource"]["testFiles"]
self.assertGreaterEqual(len(test_files), 1)
for test_file in test_files:
self.assertTrue(test_file["path"])
self.assertRegex(
test_file["hashBeforeImplementation"],
r"^sha256:[0-9a-f]{64}$",
)
self.assertRegex(
test_file["hashAfterVerification"],
r"^sha256:[0-9a-f]{64}$",
)
def test_downgrade_fixtures_have_mechanical_downgrade_evidence(self):
for fixture in sorted(FIXTURE_DIR.glob("*.json")):
data = json.loads(fixture.read_text(encoding="utf-8"))
if data["expectedDecision"] != "downgraded":
continue
delta = data["receipt"]["delta"]
has_downgrade_evidence = any(
[
delta["removedOrDisabledTests"],
delta["removedAssertions"] > 0,
delta["weakenedExpectations"],
delta["snapshotChurnWithoutSourceChange"],
delta["coverageDelta"] < 0,
delta["mocksReplacingRealBoundary"],
]
)
self.assertTrue(has_downgrade_evidence, data["case"])
if __name__ == "__main__":
unittest.main()