test: add test-integrity delta fixtures (#79)

Co-authored-by: Sam Valladares <143034159+samvallad33@users.noreply.github.com>
This commit is contained in:
caioribeiroclw-pixel 2026-06-20 00:41:31 +00:00 committed by GitHub
parent d23870d906
commit 5c2db045f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 252 additions and 1 deletions

View file

@ -91,7 +91,8 @@ integrity decision.
## Minimal Fixture Suite
These cases are small enough to live as fixtures without turning Sanhedrin into
a correctness judge.
a correctness judge. Machine-readable examples live in
[`docs/fixtures/sanhedrin-test-integrity-deltas/`](fixtures/sanhedrin-test-integrity-deltas/).
| Case | Input pattern | Expected decision | Why |
| --- | --- | --- | --- |

View file

@ -0,0 +1,39 @@
{
"case": "justified-snapshot",
"description": "A snapshot changed alongside an intentional source/UI change, so the mechanical delta should remain explicit for policy or human review.",
"expectedDecision": "needs_human_review",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_justified_snapshot",
"commandReceiptId": "receipt_vitest_after_snapshot",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_dashboard_empty_state",
"testFiles": [
{
"path": "tests/__snapshots__/dashboard.test.ts.snap",
"hashBeforeImplementation": "sha256:6666666666666666666666666666666666666666666666666666666666666666",
"hashAfterVerification": "sha256:7777777777777777777777777777777777777777777777777777777777777777"
}
]
},
"implementationContext": "impl_ctx_dashboard_empty_state_copy",
"verifierContext": "verify_ctx_vitest",
"delta": {
"testFilesChangedAfterImplementation": true,
"removedOrDisabledTests": [],
"removedAssertions": 0,
"weakenedExpectations": [],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": 0,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_vitest_after_snapshot",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "needs_human_review",
"reason": "snapshot changed with the implementation; policy or human review must decide whether the churn is justified"
}
}

View file

@ -0,0 +1,45 @@
{
"case": "skipped-test",
"description": "A verifier command passed after a test was disabled with a skip/ignore marker.",
"expectedDecision": "downgraded",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_skipped_test",
"commandReceiptId": "receipt_pytest_after_skip",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_coupon_validation",
"testFiles": [
{
"path": "tests/test_coupon.py",
"hashBeforeImplementation": "sha256:2222222222222222222222222222222222222222222222222222222222222222",
"hashAfterVerification": "sha256:3333333333333333333333333333333333333333333333333333333333333333"
}
]
},
"implementationContext": "impl_ctx_coupon_fix",
"verifierContext": "verify_ctx_pytest",
"delta": {
"testFilesChangedAfterImplementation": true,
"removedOrDisabledTests": [
{
"kind": "skip_or_only",
"path": "tests/test_coupon.py",
"line": 42
}
],
"removedAssertions": 0,
"weakenedExpectations": [],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": -1.2,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_pytest_after_skip",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "downgraded",
"reason": "tests passed, but a test was disabled after implementation"
}
}

View file

@ -0,0 +1,39 @@
{
"case": "unchanged-good",
"description": "Implementation changes source, tests are unchanged, and a fresh verifier command ran after the last relevant edit.",
"expectedDecision": "accepted",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_unchanged_good",
"commandReceiptId": "receipt_cargo_test_after_fix",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_cart_discount",
"testFiles": [
{
"path": "tests/cart_discount_test.rs",
"hashBeforeImplementation": "sha256:1111111111111111111111111111111111111111111111111111111111111111",
"hashAfterVerification": "sha256:1111111111111111111111111111111111111111111111111111111111111111"
}
]
},
"implementationContext": "impl_ctx_cart_discount_fix",
"verifierContext": "verify_ctx_cargo_test",
"delta": {
"testFilesChangedAfterImplementation": false,
"removedOrDisabledTests": [],
"removedAssertions": 0,
"weakenedExpectations": [],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": 0,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_cargo_test_after_fix",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "accepted",
"reason": "tests passed after the implementation and the test artifact did not change"
}
}

View file

@ -0,0 +1,45 @@
{
"case": "weakened-assertion",
"description": "The test still ran, but an expectation was relaxed after implementation.",
"expectedDecision": "downgraded",
"receipt": {
"schema": "vestige.sanhedrin.test_integrity_delta.v1",
"id": "tid_weakened_assertion",
"commandReceiptId": "receipt_npm_test_after_weaken",
"verificationClaim": "All tests passed.",
"specSource": {
"contextId": "spec_ctx_login_errors",
"testFiles": [
{
"path": "tests/login.test.ts",
"hashBeforeImplementation": "sha256:4444444444444444444444444444444444444444444444444444444444444444",
"hashAfterVerification": "sha256:5555555555555555555555555555555555555555555555555555555555555555"
}
]
},
"implementationContext": "impl_ctx_login_errors",
"verifierContext": "verify_ctx_npm_test",
"delta": {
"testFilesChangedAfterImplementation": true,
"removedOrDisabledTests": [],
"removedAssertions": 0,
"weakenedExpectations": [
{
"path": "tests/login.test.ts",
"from": "rejects.toThrow(InvalidCredentialsError)",
"to": "resolves.not.toThrow()"
}
],
"snapshotChurnWithoutSourceChange": false,
"coverageDelta": 0,
"mocksReplacingRealBoundary": []
},
"freshVerifier": {
"commandReceiptId": "receipt_npm_test_after_weaken",
"exitCode": 0,
"checkedAfterLastRelevantEdit": true
},
"decision": "downgraded",
"reason": "tests passed, but the asserted behavior was relaxed after implementation"
}
}