diff --git a/docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md b/docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md index c9d12dc..c249819 100644 --- a/docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md +++ b/docs/SANHEDRIN_TEST_INTEGRITY_DELTAS.md @@ -91,7 +91,8 @@ integrity decision. ## Minimal Fixture Suite These cases are small enough to live as fixtures without turning Sanhedrin into -a correctness judge. +a correctness judge. Machine-readable examples live in +[`docs/fixtures/sanhedrin-test-integrity-deltas/`](fixtures/sanhedrin-test-integrity-deltas/). | Case | Input pattern | Expected decision | Why | | --- | --- | --- | --- | diff --git a/docs/fixtures/sanhedrin-test-integrity-deltas/justified-snapshot.json b/docs/fixtures/sanhedrin-test-integrity-deltas/justified-snapshot.json new file mode 100644 index 0000000..c76b069 --- /dev/null +++ b/docs/fixtures/sanhedrin-test-integrity-deltas/justified-snapshot.json @@ -0,0 +1,39 @@ +{ + "case": "justified-snapshot", + "description": "A snapshot changed alongside an intentional source/UI change, so the mechanical delta should remain explicit for policy or human review.", + "expectedDecision": "needs_human_review", + "receipt": { + "schema": "vestige.sanhedrin.test_integrity_delta.v1", + "id": "tid_justified_snapshot", + "commandReceiptId": "receipt_vitest_after_snapshot", + "verificationClaim": "All tests passed.", + "specSource": { + "contextId": "spec_ctx_dashboard_empty_state", + "testFiles": [ + { + "path": "tests/__snapshots__/dashboard.test.ts.snap", + "hashBeforeImplementation": "sha256:6666666666666666666666666666666666666666666666666666666666666666", + "hashAfterVerification": "sha256:7777777777777777777777777777777777777777777777777777777777777777" + } + ] + }, + "implementationContext": "impl_ctx_dashboard_empty_state_copy", + "verifierContext": "verify_ctx_vitest", + "delta": { + "testFilesChangedAfterImplementation": true, + "removedOrDisabledTests": [], + "removedAssertions": 0, + "weakenedExpectations": [], + "snapshotChurnWithoutSourceChange": false, + "coverageDelta": 0, + "mocksReplacingRealBoundary": [] + }, + "freshVerifier": { + "commandReceiptId": "receipt_vitest_after_snapshot", + "exitCode": 0, + "checkedAfterLastRelevantEdit": true + }, + "decision": "needs_human_review", + "reason": "snapshot changed with the implementation; policy or human review must decide whether the churn is justified" + } +} diff --git a/docs/fixtures/sanhedrin-test-integrity-deltas/skipped-test.json b/docs/fixtures/sanhedrin-test-integrity-deltas/skipped-test.json new file mode 100644 index 0000000..d68c2e6 --- /dev/null +++ b/docs/fixtures/sanhedrin-test-integrity-deltas/skipped-test.json @@ -0,0 +1,45 @@ +{ + "case": "skipped-test", + "description": "A verifier command passed after a test was disabled with a skip/ignore marker.", + "expectedDecision": "downgraded", + "receipt": { + "schema": "vestige.sanhedrin.test_integrity_delta.v1", + "id": "tid_skipped_test", + "commandReceiptId": "receipt_pytest_after_skip", + "verificationClaim": "All tests passed.", + "specSource": { + "contextId": "spec_ctx_coupon_validation", + "testFiles": [ + { + "path": "tests/test_coupon.py", + "hashBeforeImplementation": "sha256:2222222222222222222222222222222222222222222222222222222222222222", + "hashAfterVerification": "sha256:3333333333333333333333333333333333333333333333333333333333333333" + } + ] + }, + "implementationContext": "impl_ctx_coupon_fix", + "verifierContext": "verify_ctx_pytest", + "delta": { + "testFilesChangedAfterImplementation": true, + "removedOrDisabledTests": [ + { + "kind": "skip_or_only", + "path": "tests/test_coupon.py", + "line": 42 + } + ], + "removedAssertions": 0, + "weakenedExpectations": [], + "snapshotChurnWithoutSourceChange": false, + "coverageDelta": -1.2, + "mocksReplacingRealBoundary": [] + }, + "freshVerifier": { + "commandReceiptId": "receipt_pytest_after_skip", + "exitCode": 0, + "checkedAfterLastRelevantEdit": true + }, + "decision": "downgraded", + "reason": "tests passed, but a test was disabled after implementation" + } +} diff --git a/docs/fixtures/sanhedrin-test-integrity-deltas/unchanged-good.json b/docs/fixtures/sanhedrin-test-integrity-deltas/unchanged-good.json new file mode 100644 index 0000000..582bfcf --- /dev/null +++ b/docs/fixtures/sanhedrin-test-integrity-deltas/unchanged-good.json @@ -0,0 +1,39 @@ +{ + "case": "unchanged-good", + "description": "Implementation changes source, tests are unchanged, and a fresh verifier command ran after the last relevant edit.", + "expectedDecision": "accepted", + "receipt": { + "schema": "vestige.sanhedrin.test_integrity_delta.v1", + "id": "tid_unchanged_good", + "commandReceiptId": "receipt_cargo_test_after_fix", + "verificationClaim": "All tests passed.", + "specSource": { + "contextId": "spec_ctx_cart_discount", + "testFiles": [ + { + "path": "tests/cart_discount_test.rs", + "hashBeforeImplementation": "sha256:1111111111111111111111111111111111111111111111111111111111111111", + "hashAfterVerification": "sha256:1111111111111111111111111111111111111111111111111111111111111111" + } + ] + }, + "implementationContext": "impl_ctx_cart_discount_fix", + "verifierContext": "verify_ctx_cargo_test", + "delta": { + "testFilesChangedAfterImplementation": false, + "removedOrDisabledTests": [], + "removedAssertions": 0, + "weakenedExpectations": [], + "snapshotChurnWithoutSourceChange": false, + "coverageDelta": 0, + "mocksReplacingRealBoundary": [] + }, + "freshVerifier": { + "commandReceiptId": "receipt_cargo_test_after_fix", + "exitCode": 0, + "checkedAfterLastRelevantEdit": true + }, + "decision": "accepted", + "reason": "tests passed after the implementation and the test artifact did not change" + } +} diff --git a/docs/fixtures/sanhedrin-test-integrity-deltas/weakened-assertion.json b/docs/fixtures/sanhedrin-test-integrity-deltas/weakened-assertion.json new file mode 100644 index 0000000..5061509 --- /dev/null +++ b/docs/fixtures/sanhedrin-test-integrity-deltas/weakened-assertion.json @@ -0,0 +1,45 @@ +{ + "case": "weakened-assertion", + "description": "The test still ran, but an expectation was relaxed after implementation.", + "expectedDecision": "downgraded", + "receipt": { + "schema": "vestige.sanhedrin.test_integrity_delta.v1", + "id": "tid_weakened_assertion", + "commandReceiptId": "receipt_npm_test_after_weaken", + "verificationClaim": "All tests passed.", + "specSource": { + "contextId": "spec_ctx_login_errors", + "testFiles": [ + { + "path": "tests/login.test.ts", + "hashBeforeImplementation": "sha256:4444444444444444444444444444444444444444444444444444444444444444", + "hashAfterVerification": "sha256:5555555555555555555555555555555555555555555555555555555555555555" + } + ] + }, + "implementationContext": "impl_ctx_login_errors", + "verifierContext": "verify_ctx_npm_test", + "delta": { + "testFilesChangedAfterImplementation": true, + "removedOrDisabledTests": [], + "removedAssertions": 0, + "weakenedExpectations": [ + { + "path": "tests/login.test.ts", + "from": "rejects.toThrow(InvalidCredentialsError)", + "to": "resolves.not.toThrow()" + } + ], + "snapshotChurnWithoutSourceChange": false, + "coverageDelta": 0, + "mocksReplacingRealBoundary": [] + }, + "freshVerifier": { + "commandReceiptId": "receipt_npm_test_after_weaken", + "exitCode": 0, + "checkedAfterLastRelevantEdit": true + }, + "decision": "downgraded", + "reason": "tests passed, but the asserted behavior was relaxed after implementation" + } +} diff --git a/tests/hooks/test_sanhedrin_test_integrity_delta_fixtures.py b/tests/hooks/test_sanhedrin_test_integrity_delta_fixtures.py new file mode 100644 index 0000000..b635485 --- /dev/null +++ b/tests/hooks/test_sanhedrin_test_integrity_delta_fixtures.py @@ -0,0 +1,82 @@ +import json +from pathlib import Path +import unittest + + +FIXTURE_DIR = ( + Path(__file__).resolve().parents[2] + / "docs" + / "fixtures" + / "sanhedrin-test-integrity-deltas" +) + + +class TestSanhedrinTestIntegrityDeltaFixtures(unittest.TestCase): + def test_fixture_receipts_are_executable_contract_examples(self): + fixtures = sorted(FIXTURE_DIR.glob("*.json")) + self.assertEqual( + [fixture.name for fixture in fixtures], + [ + "justified-snapshot.json", + "skipped-test.json", + "unchanged-good.json", + "weakened-assertion.json", + ], + ) + + expected_decisions = { + "justified-snapshot": "needs_human_review", + "skipped-test": "downgraded", + "unchanged-good": "accepted", + "weakened-assertion": "downgraded", + } + + for fixture in fixtures: + with self.subTest(fixture=fixture.name): + data = json.loads(fixture.read_text(encoding="utf-8")) + receipt = data["receipt"] + + self.assertEqual( + receipt["schema"], + "vestige.sanhedrin.test_integrity_delta.v1", + ) + self.assertEqual(data["expectedDecision"], receipt["decision"]) + self.assertEqual(expected_decisions[data["case"]], receipt["decision"]) + self.assertTrue(receipt["freshVerifier"]["checkedAfterLastRelevantEdit"]) + self.assertEqual(receipt["freshVerifier"]["exitCode"], 0) + + test_files = receipt["specSource"]["testFiles"] + self.assertGreaterEqual(len(test_files), 1) + for test_file in test_files: + self.assertTrue(test_file["path"]) + self.assertRegex( + test_file["hashBeforeImplementation"], + r"^sha256:[0-9a-f]{64}$", + ) + self.assertRegex( + test_file["hashAfterVerification"], + r"^sha256:[0-9a-f]{64}$", + ) + + def test_downgrade_fixtures_have_mechanical_downgrade_evidence(self): + for fixture in sorted(FIXTURE_DIR.glob("*.json")): + data = json.loads(fixture.read_text(encoding="utf-8")) + if data["expectedDecision"] != "downgraded": + continue + + delta = data["receipt"]["delta"] + has_downgrade_evidence = any( + [ + delta["removedOrDisabledTests"], + delta["removedAssertions"] > 0, + delta["weakenedExpectations"], + delta["snapshotChurnWithoutSourceChange"], + delta["coverageDelta"] < 0, + delta["mocksReplacingRealBoundary"], + ] + ) + self.assertTrue(has_downgrade_evidence, data["case"]) + + +if __name__ == "__main__": + unittest.main()