2026-04-27 13:20:51 -05:00
#!/usr/bin/env python3
2026-05-01 05:24:03 -05:00
# sanhedrin-local.py — OpenAI-compatible Sanhedrin Executioner bridge.
2026-04-27 13:20:51 -05:00
# Drop-in replacement for the Haiku 4.5 subagent that sanhedrin.sh used to spawn.
#
# Reads draft from stdin, prints single-line verdict to stdout:
# yes
# no - [Sanhedrin Veto] [CLASS]: <reason under 120 chars>
#
# Architecture:
# stdin (draft) -> Vestige /api/deep_reference (single semantic query)
2026-05-01 05:24:03 -05:00
# -> OpenAI-compatible chat endpoint (one-shot judgment)
2026-04-27 13:20:51 -05:00
# -> stdout (single-line verdict)
#
2026-05-01 05:24:03 -05:00
# Fail-open: if the endpoint is unreachable, print "yes" and exit 0 (don't break
2026-04-27 13:20:51 -05:00
# the Cognitive Sandwich on infra errors). The wrapping sanhedrin.sh maps
# "yes" to exit 0, so this preserves existing fail-open semantics.
2026-05-24 16:09:44 -05:00
from __future__ import annotations
2026-04-27 13:20:51 -05:00
import json
import os
import re
import sys
2026-05-24 16:09:44 -05:00
import unicodedata
2026-04-27 13:20:51 -05:00
import urllib . error
2026-05-27 19:03:16 -05:00
import urllib . parse
2026-04-27 13:20:51 -05:00
import urllib . request
2026-05-24 16:09:44 -05:00
from dataclasses import asdict , dataclass , field , replace
2026-05-25 01:44:52 -05:00
from pathlib import Path
2026-05-24 16:09:44 -05:00
from typing import Any
2026-04-27 13:20:51 -05:00
2026-05-25 01:44:52 -05:00
sys . path . insert ( 0 , str ( Path ( __file__ ) . resolve ( ) . parent ) )
try :
import sanhedrin_core
except Exception :
sanhedrin_core = None
2026-04-27 13:20:51 -05:00
def env_int ( name : str , default : int ) - > int :
try :
return int ( os . environ . get ( name , " " ) or default )
except ValueError :
return default
DASHBOARD_PORT = os . environ . get ( " VESTIGE_DASHBOARD_PORT " ) or " 3927 "
VESTIGE_BASE_URL = (
os . environ . get ( " VESTIGE_BASE_URL " ) or f " http://127.0.0.1: { DASHBOARD_PORT } "
) . rstrip ( " / " )
2026-05-01 05:24:03 -05:00
SANHEDRIN_ENDPOINT = (
os . environ . get ( " VESTIGE_SANHEDRIN_ENDPOINT " )
or os . environ . get ( " MLX_ENDPOINT " )
2026-05-27 19:03:16 -05:00
or " "
2026-05-01 05:24:03 -05:00
)
2026-04-27 13:20:51 -05:00
VESTIGE_ENDPOINT = (
os . environ . get ( " VESTIGE_DEEP_REFERENCE_ENDPOINT " )
or f " { VESTIGE_BASE_URL } /api/deep_reference "
)
VESTIGE_HEALTH = (
os . environ . get ( " VESTIGE_HEALTH_ENDPOINT " ) or f " { VESTIGE_BASE_URL } /api/health "
)
2026-05-01 05:24:03 -05:00
MODEL = (
os . environ . get ( " VESTIGE_SANHEDRIN_MODEL " )
or os . environ . get ( " VESTIGE_SANDWICH_MODEL " )
2026-05-27 19:03:16 -05:00
or " "
2026-05-01 05:24:03 -05:00
)
SANHEDRIN_TIMEOUT = env_int ( " VESTIGE_SANHEDRIN_TIMEOUT " , env_int ( " MLX_TIMEOUT " , 45 ) )
2026-04-27 13:20:51 -05:00
VESTIGE_TIMEOUT = env_int ( " VESTIGE_TIMEOUT " , 5 )
2026-05-27 19:03:16 -05:00
SANHEDRIN_BACKEND = ( os . environ . get ( " VESTIGE_SANHEDRIN_BACKEND " ) or " " ) . strip ( ) . lower ( )
THINK_RE = re . compile (
r " <(?:think|thinking|reasoning)>.*?</(?:think|thinking|reasoning)> " ,
re . DOTALL | re . IGNORECASE ,
)
2026-04-27 13:20:51 -05:00
def post_json ( url : str , body : dict , timeout : int ) :
2026-05-27 19:03:16 -05:00
if not url :
return None
2026-04-27 13:20:51 -05:00
data = json . dumps ( body ) . encode ( " utf-8 " )
2026-05-27 19:03:16 -05:00
headers = { " Content-Type " : " application/json " }
api_key = os . environ . get ( " VESTIGE_SANHEDRIN_API_KEY " )
if api_key and same_endpoint_origin ( url , SANHEDRIN_ENDPOINT ) :
headers [ " Authorization " ] = f " Bearer { api_key } "
2026-04-27 13:20:51 -05:00
req = urllib . request . Request (
2026-05-27 19:03:16 -05:00
url , data = data , headers = headers
2026-04-27 13:20:51 -05:00
)
try :
with urllib . request . urlopen ( req , timeout = timeout ) as r :
return json . loads ( r . read ( ) )
except ( urllib . error . URLError , urllib . error . HTTPError , TimeoutError , OSError ) :
return None
2026-05-27 19:03:16 -05:00
def same_endpoint_origin ( url : str , endpoint : str ) - > bool :
try :
target = urllib . parse . urlsplit ( url )
expected = urllib . parse . urlsplit ( endpoint )
except ValueError :
return False
return (
target . scheme == expected . scheme
and target . netloc == expected . netloc
and target . path == expected . path
)
def use_backend_extensions ( ) - > bool :
if SANHEDRIN_BACKEND in { " mlx " , " vllm " } :
return True
if SANHEDRIN_BACKEND in { " openai " , " ollama " , " llama.cpp " , " llamacpp " , " litellm " } :
return False
return MODEL . startswith ( " mlx-community/ " )
def sanhedrin_model_configured ( ) - > bool :
return bool ( SANHEDRIN_ENDPOINT and MODEL )
def sanhedrin_body (
messages : list [ dict [ str , str ] ] ,
max_tokens : int ,
stop : list [ str ] | None = None ,
) - > dict [ str , Any ] :
body : dict [ str , Any ] = {
" model " : MODEL ,
" messages " : messages ,
" max_tokens " : max_tokens ,
" temperature " : 0.0 ,
" top_p " : 1.0 ,
" stream " : False ,
}
if stop :
body [ " stop " ] = stop
if use_backend_extensions ( ) :
body [ " top_k " ] = 1
body [ " seed " ] = 42
body [ " chat_template_kwargs " ] = { " enable_thinking " : False }
return body
2026-04-27 13:20:51 -05:00
TRUST_FLOOR = 0.55 # filter out low-trust memories that drive false-positive vetoes
2026-05-24 16:09:44 -05:00
CLAIM_MODE_ENV = " VESTIGE_SANHEDRIN_CLAIM_MODE "
OUTPUT_ENV = " VESTIGE_SANHEDRIN_OUTPUT "
STAGE_FILE_ENV = " VESTIGE_SANHEDRIN_STAGE_FILE "
MAX_CLAIMS = env_int ( " VESTIGE_SANHEDRIN_MAX_CLAIMS " , 8 )
MAX_CLAIM_CHARS = env_int ( " VESTIGE_SANHEDRIN_MAX_CLAIM_CHARS " , 500 )
MAX_EVIDENCE_CHARS = env_int ( " VESTIGE_SANHEDRIN_MAX_EVIDENCE_CHARS " , 420 )
CLAIM_CLASSES = {
" TECHNICAL " ,
" BIOGRAPHICAL " ,
" FINANCIAL " ,
" ACHIEVEMENT " ,
" TIMELINE " ,
" QUANTITATIVE " ,
" ATTRIBUTION " ,
" CAUSAL " ,
" COMPARATIVE " ,
" EXISTENTIAL " ,
" VAGUE-QUANTIFIER " ,
" UNVERIFIED-POSITIVE " ,
}
CRITICAL_ABSENCE_CLASSES = {
" BIOGRAPHICAL " ,
" FINANCIAL " ,
" ACHIEVEMENT " ,
" TIMELINE " ,
" QUANTITATIVE " ,
" ATTRIBUTION " ,
" VAGUE-QUANTIFIER " ,
}
STRUCTURED_VERDICTS = { " SUPPORTED " , " REFUTED " , " REFUTED_BY_ABSENCE " , " NEI " }
SEVERITY_ORDER = {
" BIOGRAPHICAL " : 0 ,
" FINANCIAL " : 1 ,
" ACHIEVEMENT " : 2 ,
" ATTRIBUTION " : 3 ,
" TIMELINE " : 4 ,
" QUANTITATIVE " : 5 ,
" VAGUE-QUANTIFIER " : 6 ,
" UNVERIFIED-POSITIVE " : 7 ,
" TECHNICAL " : 8 ,
" EXISTENTIAL " : 9 ,
" CAUSAL " : 10 ,
" COMPARATIVE " : 11 ,
}
USER_TERMS_RE = re . compile (
r " \ b(sam|sam ' s|the user|user ' s|you|your|yours|yourself) \ b " , re . IGNORECASE
)
HYPOTHETICAL_PREFIX_RE = re . compile (
r " ^ \ s*(if|suppose|imagine|hypothetically|assume|what if) \ b " ,
re . IGNORECASE ,
)
SUBJECT_MODAL_PREFIX_RE = re . compile (
r " ^ \ s*(sam|sam ' s|the user|user ' s|you|your) \ b \ s+(would|could) \ b " ,
re . IGNORECASE ,
)
TRAILING_MODAL_COMMENT_RE = re . compile (
r " \ s*,? \ s+(which|that) \ s+(would|could) \ b.*$ " ,
re . IGNORECASE ,
)
CURRENT_TURN_PREFIXES = [
re . compile ( r " ^ \ s*(per your request|as requested) \ s*,? \ s* " , re . IGNORECASE ) ,
re . compile (
r " ^ \ s*(you|sam|the user) \ s+(asked for|requested) \ s+maximum subagents \ b[^,.;]*(?:,? \ s*(and|so) \ s*)? " ,
re . IGNORECASE ,
) ,
re . compile (
r " ^ \ s*(you|sam|the user) \ s+(asked|told|requested|wanted) \ s+ "
r " (?:(me|us|codex|claude) \ s+)?(to|for) \ s+ " ,
re . IGNORECASE ,
) ,
re . compile (
r " ^ \ s*(your|sam ' s|the user ' s) \ s+request \ s+(was|is) \ s+(to|for) \ s+ " ,
re . IGNORECASE ,
) ,
]
FIRST_PERSON_DISCOURSE_RE = re . compile (
r " ^ \ s*(i|we) \ s+(reviewed|audited|checked|inspected|looked at|verified| "
r " confirmed|found|updated|changed|implemented|fixed|patched|added|removed| "
r " wired|ran|left) \ b " ,
re . IGNORECASE ,
)
DISCOURSE_ACTION_PREFIX_RE = re . compile (
r " ^ \ s*(audit|review|check|inspect|look at|verify|confirm|implement|fix| "
r " patch|add|remove|wire|run|use|go all in) \ b " ,
re . IGNORECASE ,
)
EMBEDDED_USER_CLAIM_RE = re . compile ( r " \ b(sam|sam ' s|the user|user ' s) \ b " , re . IGNORECASE )
TECHNICAL_RE = re . compile (
r " (/ \ w|[ \ w.-]+ \ .(py|rs|ts|tsx|js|jsx|json|md|toml|yaml|yml|sh) \ b| "
r " \ b(api|endpoint|env|flag|model|server|hook|script|function|class|repo| "
r " crate|mcp|http|json|sqlite|rust|python|typescript|command|config) \ b| "
r " \ b[A-Z][A-Z0-9_] { 2,} \ b) " ,
re . IGNORECASE ,
)
BIOGRAPHICAL_RE = re . compile (
r " \ b(born|lives?|located|based in|works? at|employed|employer|school| "
r " university|college|graduated|degree|founder|ceo|cto|student|job|role) \ b " ,
re . IGNORECASE ,
)
FINANCIAL_RE = re . compile (
r " ( \ $[ \ d,.]+| \ b(revenue|funding|raised|earned|paid|payout|prize money| "
r " salary|net worth|valuation|stock|shares?|portfolio|profit|loss) \ b) " ,
re . IGNORECASE ,
)
ACHIEVEMENT_RE = re . compile (
r " \ b(won|winner|ranked|placed|scored|score|completed|finished|launched| "
r " released|shipped|milestone|award|prize|accepted|published|graduated) \ b " ,
re . IGNORECASE ,
)
TIMELINE_RE = re . compile (
r " \ b( \ d {4} - \ d {2} - \ d {2} | \ d { 1,2}/ \ d { 1,2}/ \ d { 2,4}| "
r " jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?| "
r " jul(?:y)?|aug(?:ust)?|sep(?:tember)?|oct(?:ober)?|nov(?:ember)?| "
r " dec(?:ember)?|today|yesterday|tomorrow|last week|next week| "
r " \ d+ \ s+(days?|weeks?|months?|years?) \ b) " ,
re . IGNORECASE ,
)
QUANTITATIVE_RE = re . compile (
r " ( \ b \ d+(?: \ . \ d+)? \ s*( % |percent|x|times|stars?|users?|customers?| "
r " submissions?|points?|gb|mb|ms|s|seconds?|minutes?|hours?)? \ b| "
r " \ b(one|two|three|four|five|six|seven|eight|nine|ten|dozens?|hundreds?| "
r " thousands?|many|several|few|most) \ b) " ,
re . IGNORECASE ,
)
TOKEN_RE = re . compile ( r " \ $? \ b[a-z0-9][a-z0-9.-]* \ b " , re . IGNORECASE )
STOP_CLAIM_TOKENS = {
" about " ,
" after " ,
" also " ,
" because " ,
" been " ,
" before " ,
" claim " ,
" from " ,
" have " ,
" into " ,
" more " ,
" sam " ,
" that " ,
" their " ,
" there " ,
" this " ,
" user " ,
" with " ,
" your " ,
}
ATTRIBUTION_RE = re . compile (
r " \ b(said|told|asked|agreed|decided|approved|rejected|committed|authored| "
r " wrote|built|implemented|requested|wanted|prefers?) \ b " ,
re . IGNORECASE ,
)
VAGUE_QUANTIFIER_RE = re . compile (
r " \ b(a few|some|several|many|most|multiple) \ b.* \ b(wins?|won|prizes?| "
r " money|customers?|deals?|submissions?|placements?) \ b " ,
re . IGNORECASE ,
)
@dataclass ( frozen = True )
class Claim :
text : str
claim_class : str
source_index : int
sam_critical : bool
@dataclass ( frozen = True )
class EvidenceItem :
id : str
preview : str
trust : float
role : str = " evidence "
date : str = " "
durable : bool = True
source : str = " vestige "
@dataclass
class ClaimVerdict :
claim : Claim
status : str
reason : str = " "
evidence_ids : list [ str ] = field ( default_factory = list )
durable_evidence_count : int = 0
high_trust_evidence_count : int = 0
def env_flag ( name : str ) - > bool :
return ( os . environ . get ( name ) or " " ) . strip ( ) . lower ( ) in { " 1 " , " true " , " yes " , " on " }
def truncate_chars ( text : str , max_chars : int , suffix : str = " ... " ) - > str :
""" Truncate by Python characters, never UTF-8 bytes, and avoid dangling marks. """
if max_chars < = 0 :
return " "
if len ( text ) < = max_chars :
return text
if max_chars < = len ( suffix ) :
return text [ : max_chars ]
cut = text [ : max_chars - len ( suffix ) ] . rstrip ( )
while cut and unicodedata . combining ( cut [ - 1 ] ) :
cut = cut [ : - 1 ]
return f " { cut } { suffix } "
def safe_float ( value : Any , default : float = 0.0 ) - > float :
try :
return float ( value )
except ( TypeError , ValueError ) :
return default
2026-04-27 13:20:51 -05:00
def fetch_evidence ( draft : str ) - > tuple [ str , int ] :
""" Single deep_reference call — returns (formatted evidence, count of high-trust memories).
Only memories with trust > = TRUST_FLOOR are surfaced . If none qualify , returns ( " " , 0 )
and the caller should auto - pass without invoking the model .
"""
try :
with urllib . request . urlopen ( VESTIGE_HEALTH , timeout = VESTIGE_TIMEOUT ) as r :
r . read ( )
except Exception :
2026-05-27 19:03:16 -05:00
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open ( " vestige_health_unavailable " , VESTIGE_HEALTH )
2026-04-27 13:20:51 -05:00
return " " , 0
query = draft [ : 1500 ]
resp = post_json ( VESTIGE_ENDPOINT , { " query " : query , " depth " : 12 } , VESTIGE_TIMEOUT )
if not isinstance ( resp , dict ) :
2026-05-27 19:03:16 -05:00
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open ( " deep_reference_unavailable " , VESTIGE_ENDPOINT )
2026-04-27 13:20:51 -05:00
return " " , 0
parts = [ ]
high_trust_count = 0
confidence = resp . get ( " confidence " , 0 )
rec = resp . get ( " recommended " ) or { }
rec_trust = float ( rec . get ( " trust_score " , 0 ) or 0 )
if rec and rec_trust > = TRUST_FLOOR :
rid = ( rec . get ( " memory_id " ) or rec . get ( " id " ) or " " ) [ : 8 ]
date = ( rec . get ( " date " ) or " " ) [ : 10 ]
prev = ( rec . get ( " answer_preview " ) or rec . get ( " preview " ) or " " ) [ : 500 ]
parts . append ( f " RECOMMENDED [ { rid } ] trust= { rec_trust : .2f } date= { date } : \n { prev } " )
high_trust_count + = 1
contradictions = resp . get ( " contradictions " ) or [ ]
if contradictions :
parts . append ( f " \n CONTRADICTIONS DETECTED: { len ( contradictions ) } pair(s) " )
for c in contradictions [ : 3 ] :
parts . append ( f " - { json . dumps ( c ) [ : 200 ] } " )
superseded = resp . get ( " superseded " ) or [ ]
if superseded :
ht_super = [ s for s in superseded if float ( s . get ( " trust " , 0 ) or 0 ) > = TRUST_FLOOR ]
if ht_super :
parts . append ( f " \n SUPERSEDED MEMORIES (trust>= { TRUST_FLOOR } ): { len ( ht_super ) } " )
for s in ht_super [ : 3 ] :
sid = ( s . get ( " id " ) or " " ) [ : 8 ]
parts . append ( f " - [ { sid } ] { ( s . get ( ' preview ' ) or ' ' ) [ : 200 ] } " )
evidence = resp . get ( " evidence " ) or [ ]
high_trust_evidence = [ ev for ev in evidence if float ( ev . get ( " trust " , 0 ) or 0 ) > = TRUST_FLOOR ]
if high_trust_evidence :
parts . append ( f " \n HIGH-TRUST EVIDENCE (trust>= { TRUST_FLOOR } , { min ( len ( high_trust_evidence ) , 5 ) } of { len ( evidence ) } total): " )
for ev in high_trust_evidence [ : 5 ] :
eid = ( ev . get ( " id " ) or " " ) [ : 8 ]
role = ev . get ( " role " , " ? " )
trust = float ( ev . get ( " trust " , 0 ) or 0 )
prev = ( ev . get ( " preview " ) or " " ) . strip ( ) [ : 300 ]
parts . append ( f " [ { eid } ] role= { role } trust= { trust : .2f } \n { prev } " )
high_trust_count + = 1
if high_trust_count == 0 :
return " " , 0
header = f " VESTIGE CONFIDENCE: { int ( confidence * 100 ) } % | HIGH-TRUST MEMORIES: { high_trust_count } \n \n "
return header + " \n " . join ( parts ) , high_trust_count
2026-05-24 16:09:44 -05:00
def split_candidate_claims ( draft : str ) - > list [ str ] :
""" Return sentence-ish draft fragments that can be classified as claims. """
2026-05-27 19:03:16 -05:00
without_fences = re . sub ( r " ```.*?``` " , " " , draft [ : 16_384 ] , flags = re . DOTALL )
without_fences = re . sub ( r " `[^` \ n]+` " , " " , without_fences )
without_fences = re . sub ( r ' (^|[ \ s([ { ]) " [^ " \ n]+ " (?=([ \ s.,;:!?)} \ ]]|$)) ' , r " \ 1 " , without_fences )
2026-05-24 16:09:44 -05:00
fragments : list [ str ] = [ ]
for line in without_fences . splitlines ( ) :
2026-05-27 19:03:16 -05:00
if line . lstrip ( ) . startswith ( " > " ) :
continue
2026-05-24 16:09:44 -05:00
line = re . sub ( r " ^ \ s*[-*+] \ s+ " , " " , line ) . strip ( )
line = re . sub ( r " ^ \ s* \ d+[.)] \ s+ " , " " , line ) . strip ( )
if not line :
continue
parts = re . split ( r " (?<=[.!?]) \ s+(?=[A-Z0-9` \" ' ]) " , line )
fragments . extend ( part . strip ( " \t - " ) for part in parts if part . strip ( " \t - " ) )
2026-05-27 19:03:16 -05:00
if len ( fragments ) > = 512 :
return fragments [ : 512 ]
2026-05-24 16:09:44 -05:00
if not fragments :
compact = " " . join ( without_fences . split ( ) )
fragments = [
part . strip ( )
for part in re . split ( r " (?<=[.!?]) \ s+(?=[A-Z0-9` \" ' ]) " , compact )
if part . strip ( )
]
2026-05-27 19:03:16 -05:00
return fragments [ : 512 ]
2026-05-24 16:09:44 -05:00
def normalize_asserted_fragment ( text : str ) - > str | None :
text = " " . join ( text . split ( ) ) . strip ( )
if not text :
return None
2026-05-27 19:03:16 -05:00
if re . search ( r " \ b(need|still need|let me|will|should) \ s+to \ s+verify \ b " , text , re . I ) :
return None
if re . fullmatch (
r " (the user|user|sam|you) \ s+(said|told|asked|wrote|noted|mentioned) \ s* "
r " (earlier|before|previously)? \ .? " ,
text ,
re . I ,
) :
return None
2026-05-24 16:09:44 -05:00
text = TRAILING_MODAL_COMMENT_RE . sub ( " " , text ) . strip ( " ,;:- " )
if HYPOTHETICAL_PREFIX_RE . search ( text ) or SUBJECT_MODAL_PREFIX_RE . search ( text ) :
return None
for prefix in CURRENT_TURN_PREFIXES :
stripped = prefix . sub ( " " , text , count = 1 ) . strip ( " ,;:- " )
if stripped == text :
continue
embedded = EMBEDDED_USER_CLAIM_RE . search ( stripped )
if embedded and embedded . start ( ) > 0 and DISCOURSE_ACTION_PREFIX_RE . search ( stripped ) :
stripped = stripped [ embedded . start ( ) : ] . strip ( " ,;:- " )
elif DISCOURSE_ACTION_PREFIX_RE . search ( stripped ) or FIRST_PERSON_DISCOURSE_RE . search ( stripped ) :
return None
text = stripped
break
if FIRST_PERSON_DISCOURSE_RE . search ( text ) :
embedded = EMBEDDED_USER_CLAIM_RE . search ( text )
if embedded and embedded . start ( ) > 0 :
text = text [ embedded . start ( ) : ] . strip ( " ,;:- " )
else :
return None
text = TRAILING_MODAL_COMMENT_RE . sub ( " " , text ) . strip ( " ,;:- " )
if not text or HYPOTHETICAL_PREFIX_RE . search ( text ) or SUBJECT_MODAL_PREFIX_RE . search ( text ) :
return None
return text
def classify_claim ( text : str ) - > str | None :
""" Classify a factual-shaped claim with conservative, testable heuristics. """
if VAGUE_QUANTIFIER_RE . search ( text ) :
return " VAGUE-QUANTIFIER "
if BIOGRAPHICAL_RE . search ( text ) :
return " BIOGRAPHICAL "
if FINANCIAL_RE . search ( text ) :
return " FINANCIAL "
if ACHIEVEMENT_RE . search ( text ) :
return " ACHIEVEMENT "
if ATTRIBUTION_RE . search ( text ) :
return " ATTRIBUTION "
if TECHNICAL_RE . search ( text ) :
return " TECHNICAL "
if TIMELINE_RE . search ( text ) :
return " TIMELINE "
if QUANTITATIVE_RE . search ( text ) :
return " QUANTITATIVE "
if re . search ( r " \ b(exists?|there is|there are|contains?|includes?) \ b " , text , re . I ) :
return " EXISTENTIAL "
if re . search ( r " \ b(because|caused|causes|therefore|so that|as a result) \ b " , text , re . I ) :
return " CAUSAL "
if re . search ( r " \ b(better|best|faster|fastest|more than|less than|fewer than) \ b " , text , re . I ) :
return " COMPARATIVE "
return None
def is_sam_critical_claim ( text : str , claim_class : str ) - > bool :
if claim_class not in CRITICAL_ABSENCE_CLASSES :
return False
return bool ( USER_TERMS_RE . search ( text ) )
def extract_check_worthy_claims (
draft : str ,
max_claims : int = MAX_CLAIMS ,
max_claim_chars : int = MAX_CLAIM_CHARS ,
) - > list [ Claim ] :
claims : list [ Claim ] = [ ]
seen : set [ str ] = set ( )
for idx , fragment in enumerate ( split_candidate_claims ( draft ) ) :
text = normalize_asserted_fragment ( fragment )
if not text :
continue
claim_class = classify_claim ( text )
if not claim_class :
continue
text = truncate_chars ( text , max_claim_chars )
key = text . lower ( )
if key in seen :
continue
seen . add ( key )
claims . append (
Claim (
text = text ,
claim_class = claim_class ,
source_index = idx ,
sam_critical = is_sam_critical_claim ( text , claim_class ) ,
)
)
2026-05-27 19:03:16 -05:00
return sorted (
claims ,
key = lambda claim : ( SEVERITY_ORDER . get ( claim . claim_class , 99 ) , claim . source_index ) ,
) [ : max_claims ]
2026-05-24 16:09:44 -05:00
def normalize_evidence_item ( raw : Any , source : str = " vestige " ) - > EvidenceItem | None :
if isinstance ( raw , str ) :
preview = raw . strip ( )
if not preview :
return None
return EvidenceItem (
id = " stage " ,
preview = truncate_chars ( preview , MAX_EVIDENCE_CHARS ) ,
trust = 1.0 ,
role = " staged " ,
durable = False ,
source = " stage " ,
)
if not isinstance ( raw , dict ) :
return None
preview = (
raw . get ( " preview " )
or raw . get ( " answer_preview " )
or raw . get ( " content " )
or raw . get ( " text " )
or raw . get ( " claim " )
or " "
)
preview = str ( preview ) . strip ( )
if not preview :
return None
trust = safe_float ( raw . get ( " trust " , raw . get ( " trust_score " , 1.0 if source == " stage " else 0.0 ) ) )
item_id = str ( raw . get ( " memory_id " ) or raw . get ( " id " ) or source or " evidence " )
role = str ( raw . get ( " role " ) or ( " staged " if source == " stage " else " evidence " ) )
date = str ( raw . get ( " date " ) or raw . get ( " created_at " ) or " " ) [ : 32 ]
return EvidenceItem (
id = item_id ,
preview = truncate_chars ( preview , MAX_EVIDENCE_CHARS ) ,
trust = trust ,
role = role ,
date = date ,
durable = ( source != " stage " ) ,
source = source ,
)
def evidence_from_deep_reference ( resp : dict [ str , Any ] ) - > list [ EvidenceItem ] :
items : list [ EvidenceItem ] = [ ]
rec = resp . get ( " recommended " ) or { }
rec_item = normalize_evidence_item ( rec , " vestige " )
if rec_item :
items . append ( rec_item )
for raw in resp . get ( " evidence " ) or [ ] :
item = normalize_evidence_item ( raw , " vestige " )
if item :
items . append ( item )
for raw in resp . get ( " superseded " ) or [ ] :
item = normalize_evidence_item ( raw , " vestige " )
if item :
items . append ( item )
return dedupe_evidence ( items )
def dedupe_evidence ( items : list [ EvidenceItem ] ) - > list [ EvidenceItem ] :
deduped : list [ EvidenceItem ] = [ ]
seen : set [ tuple [ str , str ] ] = set ( )
for item in items :
key = ( item . source , item . id )
if key in seen :
continue
seen . add ( key )
deduped . append ( item )
return deduped
def load_staged_evidence ( path : str | None ) - > list [ EvidenceItem ] :
""" Read optional JSON-array staged evidence. It is non-durable by design. """
if not path :
return [ ]
try :
with open ( path , " r " , encoding = " utf-8 " ) as f :
raw = json . load ( f )
except ( OSError , json . JSONDecodeError ) :
return [ ]
if not isinstance ( raw , list ) :
return [ ]
items : list [ EvidenceItem ] = [ ]
for idx , raw_item in enumerate ( raw ) :
item = normalize_evidence_item ( raw_item , " stage " )
if item is None :
continue
if item . id == " stage " :
item = replace ( item , id = f " stage: { idx } " )
items . append ( item )
return items
def claim_query ( claim : Claim ) - > str :
return (
f " Class: { claim . claim_class } \n "
f " Claim: { claim . text } "
)
def fetch_claim_evidence ( claim : Claim ) - > tuple [ list [ EvidenceItem ] , bool ] :
resp = post_json ( VESTIGE_ENDPOINT , { " query " : claim_query ( claim ) , " depth " : 12 } , VESTIGE_TIMEOUT )
if not isinstance ( resp , dict ) :
return [ ] , False
if resp . get ( " error " ) or resp . get ( " errors " ) :
return [ ] , False
if str ( resp . get ( " status " ) or " " ) . strip ( ) . lower ( ) in {
" error " ,
" failed " ,
" failure " ,
" unavailable " ,
" timeout " ,
} :
return [ ] , False
if not any (
key in resp
for key in ( " confidence " , " evidence " , " recommended " , " reasoning " , " query " , " status " )
) :
return [ ] , False
return evidence_from_deep_reference ( resp ) , True
def high_trust ( items : list [ EvidenceItem ] ) - > list [ EvidenceItem ] :
return [ item for item in items if item . trust > = TRUST_FLOOR ]
def durable_high_trust ( items : list [ EvidenceItem ] ) - > list [ EvidenceItem ] :
return [ item for item in items if item . durable and item . trust > = TRUST_FLOOR ]
def salient_claim_tokens ( text : str ) - > set [ str ] :
tokens = { token . lower ( ) . strip ( " . " ) for token in TOKEN_RE . findall ( text ) }
return {
token
for token in tokens
if len ( token ) > = 4 and token not in STOP_CLAIM_TOKENS
}
def evidence_relevant_to_claim ( claim : Claim , evidence : EvidenceItem ) - > bool :
claim_numbers = set ( re . findall ( r " \ $? \ d+(?:[,.] \ d+)*(?: \ . \ d+)? " , claim . text ) )
if claim_numbers and any ( num in evidence . preview for num in claim_numbers ) :
return True
claim_tokens = salient_claim_tokens ( claim . text )
if not claim_tokens :
return True
preview_tokens = salient_claim_tokens ( evidence . preview )
overlap = claim_tokens & preview_tokens
threshold = 1 if claim . claim_class == " TECHNICAL " else 2
return len ( overlap ) > = threshold
def relevant_durable_high_trust ( claim : Claim , items : list [ EvidenceItem ] ) - > list [ EvidenceItem ] :
return [
item
for item in durable_high_trust ( items )
if evidence_relevant_to_claim ( claim , item )
]
def format_claim_evidence ( items : list [ EvidenceItem ] , claim : Claim | None = None ) - > str :
if not items :
return " (no relevant evidence retrieved) "
lines = [ ]
durable_count = (
len ( relevant_durable_high_trust ( claim , items ) )
if claim is not None
else len ( durable_high_trust ( items ) )
)
high_count = len ( high_trust ( items ) )
lines . append (
f " HIGH-TRUST EVIDENCE: { high_count } | DURABLE HIGH-TRUST EVIDENCE: { durable_count } "
)
stage_count = len ( [ item for item in items if not item . durable ] )
if stage_count :
lines . append (
" STAGED EVIDENCE PRESENT: non-durable overlay; do not count it as durable memory. "
)
for item in high_trust ( items ) [ : 8 ] :
durable = " durable " if item . durable else " staged "
short_id = item . id [ : 12 ]
lines . append (
f " [ { short_id } ] { durable } role= { item . role } trust= { item . trust : .2f } date= { item . date } \n "
f " { item . preview } "
)
return " \n \n " . join ( lines )
2026-05-06 02:22:24 -05:00
SYSTEM_PROMPT = """ You are the Sanhedrin Executioner. You judge whether a DRAFT contradicts Vestige memory evidence about the user. ONE LINE OF OUTPUT.
2026-04-27 13:20:51 -05:00
VALID CLASS TAGS ( closed set — pick exactly one ) :
TECHNICAL | ACHIEVEMENT | FINANCIAL | BIOGRAPHICAL | TIMELINE | ATTRIBUTION | VAGUE - QUANTIFIER | UNVERIFIED - POSITIVE
DEFAULT POSTURE
- DEFAULT to ` yes ` ( PASS ) for TECHNICAL / TIMELINE / EXISTENTIAL claims unless you can cite a same - subject direct contradiction .
2026-05-06 02:22:24 -05:00
- DEFAULT to ` no ` ( VETO , fail - closed ) for these specific user - about claims when high - trust evidence is silent on the named entity :
* Specific institution / employer / school / company the user is claimed to be at
2026-04-27 13:20:51 -05:00
* Specific dollar amount won / earned / raised
* Specific competition placement / score / prize received
2026-05-06 02:22:24 -05:00
* Specific date the user did something specific ( graduated , was hired , was born )
* Vague - quantifier positive about the user ( " a few wins " , " some prize money " , " most submissions placed top 10 " , " many customers " , " several deals " )
2026-04-27 13:20:51 -05:00
THREE FALSE - POSITIVE PROTECTIONS ( these output ` yes ` )
1. SUBJECT - EQUALITY GATE : only same - subject claims are veto candidates . Memory about Vestige ' s internal codebase ≠ contradiction with external tools (Qwen, MCP-protocol-spec, MLX, Cursor). Memory about project X ≠ contradiction with project Y.
2. VERSION - DISCRIMINATOR RULE : version / generation tokens ( M1 / M2 / M3 / M4 / M5 , v0 .5 / v1 .0 , GPT - 4 / GPT - 5 , Qwen3 .5 / Qwen3 .6 ) are subject discriminators . Different versions = different subjects = no contradiction by default .
3. AGREEMENT - IS - NOT - CONTRADICTION : if the memory preview AGREES with the draft claim , that ' s PASS not VETO.
INFERENCE BAN
- DO NOT use " implies " , " implying " , " suggests " , " must mean " , " would mean " , " indicates " , " therefore " in veto reasons .
- If you have to chain inferences from a memory to reach a contradiction , PASS .
- TIMELINE vetoes specifically : require an EXPLICIT date or duration in the cited memory that arithmetically excludes the draft ' s date. Vague phrases like " until I graduate " cannot ground a TIMELINE veto.
ARCHITECTURE - VS - COMPONENT RULE
- A memory describing OVERALL architecture ( Thalamus + Sanhedrin triad , 4 - layer biology ) does NOT contradict a draft about an INTERNAL COMPONENT ( subagent model , sidecar transport , bridge script ) . Different layers of the same stack are not contradictions .
OUTPUT FORMAT ( exactly one line , no preamble , no explanation , no markdown )
- PASS : yes
- VETO : no - [ Sanhedrin Veto ] [ CLASS ] : < reason under 140 chars , cite memory id verbatim from evidence >
EIGHT WORKED EXAMPLES — STUDY THESE PATTERNS
[ VETO — same - subject TECHNICAL contradiction ]
Evidence : " Vestige is a 2-crate Rust workspace (vestige-core + vestige-mcp) " trust = 0.62 [ de43be5a ]
Draft : " Edit the FastAPI router in vestige/main.py for Python extensions to Vestige "
Output : no - [ Sanhedrin Veto ] TECHNICAL : Draft says FastAPI / Python for Vestige , memory de43be5a says 2 - crate Rust workspace .
[ VETO — same - subject ACHIEVEMENT contradiction ]
2026-05-06 02:22:24 -05:00
Evidence : " Final benchmark submission scored 36/50 on April 15, no payout " trust = 0.71 [ 9 cf2a764 ]
Draft : " The user won the benchmark with a perfect 50/50 and took the $25K grand prize "
2026-04-27 13:20:51 -05:00
Output : no - [ Sanhedrin Veto ] ACHIEVEMENT : Draft claims 50 / 50 win + $ 25 K , memory 9 cf2a764 shows 36 / 50 final , no payout .
[ VETO — VAGUE - QUANTIFIER fail - closed ]
2026-05-06 02:22:24 -05:00
Evidence : high - trust memories about the user ' s competition history, none enumerate any wins
Draft : " The user won a few competitions and earned some prize money "
2026-04-27 13:20:51 -05:00
Output : no - [ Sanhedrin Veto ] VAGUE - QUANTIFIER : Draft says " a few wins / some prize money " , evidence enumerates zero wins , fail - closed .
[ VETO — UNVERIFIED - POSITIVE fail - closed ]
2026-05-06 02:22:24 -05:00
Evidence : high - trust memories about the user ' s identity/work, no example school or employer mention
Draft : " The user graduated from Example University in 2019 with a 3.94 GPA and worked at Example Labs "
2026-04-27 13:20:51 -05:00
Output : no - [ Sanhedrin Veto ] UNVERIFIED - POSITIVE : Specific Stanford / 2019 / Google Brain claims , evidence silent on all , fail - closed .
[ PASS — SUBJECT - EQUALITY gate ( external tool , not Vestige ) ]
Evidence : " Vestige is a 2-crate Rust workspace " trust = 0.62
Draft : " Switched the Sanhedrin executioner to local Qwen3.6-35B-A3B via mlx_lm.server "
Output : yes
[ PASS — VERSION - DISCRIMINATOR rule ]
Evidence : " M5 Max ~900 GB/s bandwidth (planned hardware) " trust = 0.62
Draft : " Memory bandwidth on the M3 Max is around 400 GB/s for the unified architecture "
Output : yes
[ PASS — AGREEMENT - IS - NOT - CONTRADICTION ]
2026-05-06 02:22:24 -05:00
Evidence : " The user ' s M3 Max MacBook Pro arrived 2026-04-20 " trust = 0.55
Draft : " The user ' s MacBook is an M3 Max "
2026-04-27 13:20:51 -05:00
Output : yes
[ PASS — ARCHITECTURE - VS - COMPONENT ]
Evidence : " Cognitive Sandwich = Thalamus preflight triad + Sanhedrin Stop council shipped 2026-04-20 " trust = 0.7
Draft : " Cognitive Sandwich ' s Sanhedrin originally used a Haiku 4.5 subagent for the Executioner role "
Output : yes
[ PASS — AUXILIARY - SCRIPT consumer - vs - consumed ]
Evidence : " Vestige is a 2-crate Rust workspace " trust = 0.62
Draft : " I added a Python script (sanhedrin-local.py) at ~/.claude/hooks/ that calls Vestige ' s HTTP API for evidence fetch. "
Reason : external script that CALLS Vestige is not the same subject as Vestige ' s internal implementation. The consumer is not the consumed.
Output : yes
[ PASS — HYPOTHETICAL - MOOD ]
2026-05-06 02:22:24 -05:00
Evidence : " Final benchmark score was 36/50 with no payout " trust = 0.71
Draft : " If the user wins the benchmark 50/50 next time around, they could claim the $25K grand prize. "
2026-04-27 13:20:51 -05:00
Reason : prefix ` if ` / ` suppose ` / ` imagine ` / ` hypothetically ` / ` would ` / ` could ` / ` assume ` marks the embedded claim as conditional , NOT asserted . Conditional claims about future or counterfactual states do not contradict factual memory .
Output : yes
HYPOTHETICAL - MOOD RULE : if a draft sentence is governed by ` if ` , ` suppose ` , ` imagine ` , ` hypothetically ` , ` would ` , ` could ` , ` assume ` , ` what if ` , the embedded claim is NOT being asserted as fact — PASS that claim regardless of memory state .
2026-05-06 02:22:24 -05:00
ARCHIVED - COMPETITION RULE : do NOT fail - closed on the EXISTENCE of a past competition or project just because evidence is silent on it . Fail - closed applies only to specific PLACEMENT , SCORE , PRIZE , INSTITUTION , or DOLLAR AMOUNT - - not to " the user participated in X. "
2026-04-27 13:20:51 -05:00
2026-05-06 02:22:24 -05:00
MULTI - CLAIM SEVERITY ORDERING : if multiple claims are vetoable , choose ACHIEVEMENT / FINANCIAL / BIOGRAPHICAL / UNVERIFIED - POSITIVE over TECHNICAL . Specific fabrications about the user ' s life are more dangerous than tech-stack mismatches.
2026-04-27 13:20:51 -05:00
2026-05-06 02:22:24 -05:00
When in doubt on TECHNICAL / TIMELINE : PASS . When in doubt on a user - about ACHIEVEMENT / FINANCIAL / BIOGRAPHICAL claim with specific named entities not in evidence : VETO with UNVERIFIED - POSITIVE . """
2026-04-27 13:20:51 -05:00
2026-05-24 16:09:44 -05:00
CLAIM_SYSTEM_PROMPT = """ You are the Sanhedrin Executioner in claim mode. Judge ONE extracted claim against the provided Vestige evidence.
Return exactly one JSON object , no markdown :
{
" status " : " SUPPORTED|REFUTED|REFUTED_BY_ABSENCE|NEI " ,
" class " : " <claim class> " ,
" reason " : " <short reason, under 140 chars> " ,
" evidence_ids " : [ " <ids used> " ]
2026-04-27 13:20:51 -05:00
}
2026-05-24 16:09:44 -05:00
Rules :
- SUPPORTED : high - trust evidence directly supports the claim .
- REFUTED : high - trust evidence directly contradicts the same - subject claim .
- REFUTED_BY_ABSENCE : use only when instructions say absence - fail - closed applies .
- NEI : not enough information , stale / noisy evidence , wrong subject , or inference required .
- Do not infer contradiction across different subjects , versions , projects , or architecture layers .
- Staged evidence is context only and is not durable Vestige memory .
- Reasons must not use implies , suggests , must mean , would mean , indicates , therefore , or this means .
"""
VALID_CLASSES = CLAIM_CLASSES
2026-04-27 13:20:51 -05:00
INFERENCE_VERBS = (
" implies " , " implying " , " suggests " , " must mean " , " would mean " ,
" indicates that " , " therefore the " , " this means " ,
)
VERDICT_RE = re . compile (
2026-05-24 16:09:44 -05:00
r " ^no - \ [Sanhedrin Veto \ ] \ [?([A-Z][A-Z \ -]*) \ ]?: (. { 1,180})$ "
2026-04-27 13:20:51 -05:00
)
def validate_verdict ( verdict : str ) - > str :
""" Post-validate the model ' s verdict. Fail-open ( ' yes ' ) on any malformation:
- Length over 220 chars
- Veto with class tag not in the closed set
- Veto reason containing inference verbs
- Veto not matching the canonical regex
"""
v = verdict . strip ( )
if not v :
return " yes "
low = v . lower ( )
if low == " yes " or low . startswith ( " yes " ) :
return " yes "
if not low . startswith ( " no " ) :
return " yes "
if len ( v ) > 220 :
return " yes " # runaway reasoning blob
m = VERDICT_RE . match ( v )
if not m :
return " yes " # format break
cls = m . group ( 1 )
reason = m . group ( 2 )
if cls not in VALID_CLASSES :
return " yes " # invented class tag
reason_low = reason . lower ( )
for verb in INFERENCE_VERBS :
if verb in reason_low :
return " yes " # inference-chain veto, downgrade per ban
return v
def judge ( draft : str , evidence : str ) - > str :
2026-05-27 19:03:16 -05:00
if not sanhedrin_model_configured ( ) :
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open (
" model_not_configured " ,
" Set VESTIGE_SANHEDRIN_ENDPOINT and VESTIGE_SANHEDRIN_MODEL, or choose a preset. " ,
)
return " "
2026-04-27 13:20:51 -05:00
user_msg = (
f " VESTIGE EVIDENCE (recommended + top trust-scored memories): \n "
f " { evidence if evidence else ' (no relevant evidence retrieved) ' } \n \n "
f " --- \n DRAFT TO JUDGE: \n { draft } "
)
2026-05-27 19:03:16 -05:00
body = sanhedrin_body (
[
2026-04-27 13:20:51 -05:00
{ " role " : " system " , " content " : SYSTEM_PROMPT } ,
{ " role " : " user " , " content " : user_msg } ,
] ,
2026-05-27 19:03:16 -05:00
2500 ,
[
2026-04-27 13:20:51 -05:00
" \n \n Wait, " , " \n \n Actually, " , " \n \n Let me " , " \n \n Hmm, " ,
" \n \n On second thought " , " \n \n Oh wait " ,
] ,
2026-05-27 19:03:16 -05:00
)
2026-05-01 05:24:03 -05:00
resp = post_json ( SANHEDRIN_ENDPOINT , body , SANHEDRIN_TIMEOUT )
2026-04-27 13:20:51 -05:00
if not isinstance ( resp , dict ) :
return " "
try :
msg = resp [ " choices " ] [ 0 ] [ " message " ]
raw = msg . get ( " content " ) or " "
if not raw . strip ( ) :
raw = msg . get ( " reasoning " ) or " "
except ( KeyError , IndexError , TypeError ) :
return " "
cleaned = THINK_RE . sub ( " " , raw ) . strip ( )
lines = [ ln . strip ( ) for ln in cleaned . splitlines ( ) if ln . strip ( ) ]
if not lines :
return " "
last = lines [ - 1 ]
low = last . lower ( )
if low . startswith ( " yes " ) or low . startswith ( " no " ) :
return validate_verdict ( last )
for ln in reversed ( lines ) :
l = ln . lower ( )
if l . startswith ( " yes " ) or l . startswith ( " no " ) :
return validate_verdict ( ln )
return " "
2026-05-24 16:09:44 -05:00
def absence_verdict ( claim : Claim ) - > ClaimVerdict :
reason = (
f " { claim . claim_class } claim about Sam has zero high-trust durable Vestige evidence. "
)
return ClaimVerdict (
claim = claim ,
status = " REFUTED_BY_ABSENCE " ,
reason = truncate_chars ( reason , 140 ) ,
)
def nei_verdict (
claim : Claim ,
reason : str ,
evidence : list [ EvidenceItem ] | None = None ,
) - > ClaimVerdict :
evidence = evidence or [ ]
return ClaimVerdict (
claim = claim ,
status = " NEI " ,
reason = truncate_chars ( reason , 140 ) ,
evidence_ids = [ item . id for item in high_trust ( evidence ) [ : 3 ] ] ,
durable_evidence_count = len ( relevant_durable_high_trust ( claim , evidence ) ) ,
high_trust_evidence_count = len ( high_trust ( evidence ) ) ,
)
def supported_verdict ( claim : Claim , evidence : list [ EvidenceItem ] ) - > ClaimVerdict :
return ClaimVerdict (
claim = claim ,
status = " SUPPORTED " ,
reason = " High-trust evidence supports or does not contradict the claim. " ,
evidence_ids = [ item . id for item in high_trust ( evidence ) [ : 3 ] ] ,
durable_evidence_count = len ( relevant_durable_high_trust ( claim , evidence ) ) ,
high_trust_evidence_count = len ( high_trust ( evidence ) ) ,
)
def parse_json_object ( raw : str ) - > dict [ str , Any ] | None :
cleaned = THINK_RE . sub ( " " , raw ) . strip ( )
cleaned = re . sub ( r " ^```(?:json)? \ s* " , " " , cleaned , flags = re . IGNORECASE ) . strip ( )
cleaned = re . sub ( r " \ s*```$ " , " " , cleaned ) . strip ( )
try :
obj = json . loads ( cleaned )
return obj if isinstance ( obj , dict ) else None
except json . JSONDecodeError :
pass
start = cleaned . find ( " { " )
end = cleaned . rfind ( " } " )
if start > = 0 and end > start :
try :
obj = json . loads ( cleaned [ start : end + 1 ] )
return obj if isinstance ( obj , dict ) else None
except json . JSONDecodeError :
return None
return None
def verdict_from_legacy_line ( claim : Claim , raw : str , evidence : list [ EvidenceItem ] ) - > ClaimVerdict | None :
line = validate_verdict ( raw )
if line == " yes " :
return supported_verdict ( claim , evidence )
m = VERDICT_RE . match ( line )
if not m :
return None
reason = m . group ( 2 )
if not relevant_durable_high_trust ( claim , evidence ) :
return nei_verdict ( claim , " Durable evidence required for refuted verdict. " , evidence )
return ClaimVerdict (
claim = claim ,
status = " REFUTED " ,
reason = truncate_chars ( reason , 140 ) ,
evidence_ids = [ item . id for item in high_trust ( evidence ) [ : 3 ] ] ,
durable_evidence_count = len ( relevant_durable_high_trust ( claim , evidence ) ) ,
high_trust_evidence_count = len ( high_trust ( evidence ) ) ,
)
def validate_structured_verdict (
claim : Claim ,
data : dict [ str , Any ] ,
evidence : list [ EvidenceItem ] ,
) - > ClaimVerdict :
status = str ( data . get ( " status " ) or " " ) . strip ( ) . upper ( )
if status not in STRUCTURED_VERDICTS :
status = " NEI "
claim_class = str ( data . get ( " class " ) or claim . claim_class ) . strip ( ) . upper ( )
if claim_class not in CLAIM_CLASSES :
claim_class = claim . claim_class
reason = truncate_chars ( str ( data . get ( " reason " ) or " " ) . strip ( ) , 140 )
if any ( verb in reason . lower ( ) for verb in INFERENCE_VERBS ) :
return nei_verdict ( claim , " Inference-chain verdict downgraded to NEI. " , evidence )
if status == " REFUTED_BY_ABSENCE " :
if not ( claim . sam_critical and claim . claim_class in CRITICAL_ABSENCE_CLASSES ) :
return nei_verdict ( claim , " Absence veto does not apply to this claim. " , evidence )
if relevant_durable_high_trust ( claim , evidence ) :
return nei_verdict ( claim , " Durable evidence exists; absence veto does not apply. " , evidence )
if status == " REFUTED " and not relevant_durable_high_trust ( claim , evidence ) :
return nei_verdict ( claim , " Durable evidence required for refuted verdict. " , evidence )
2026-05-27 19:03:16 -05:00
if status == " SUPPORTED " and high_trust ( evidence ) and not durable_high_trust ( evidence ) :
return nei_verdict ( claim , " Durable evidence required for supported verdict. " , evidence )
2026-05-24 16:09:44 -05:00
evidence_ids_raw = data . get ( " evidence_ids " ) or [ ]
evidence_ids = [
str ( eid ) for eid in evidence_ids_raw [ : 5 ]
] if isinstance ( evidence_ids_raw , list ) else [ ]
if not reason :
if status == " SUPPORTED " :
reason = " High-trust evidence supports or does not contradict the claim. "
elif status == " NEI " :
reason = " Not enough high-trust evidence to decide. "
elif status == " REFUTED_BY_ABSENCE " :
reason = absence_verdict ( claim ) . reason
else :
reason = " High-trust evidence refutes the claim. "
return ClaimVerdict (
claim = Claim (
text = claim . text ,
claim_class = claim_class ,
source_index = claim . source_index ,
sam_critical = claim . sam_critical ,
) ,
status = status ,
reason = reason ,
evidence_ids = evidence_ids ,
durable_evidence_count = len ( relevant_durable_high_trust ( claim , evidence ) ) ,
high_trust_evidence_count = len ( high_trust ( evidence ) ) ,
)
def judge_claim_with_model ( claim : Claim , evidence : list [ EvidenceItem ] ) - > ClaimVerdict :
2026-05-27 19:03:16 -05:00
if not sanhedrin_model_configured ( ) :
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open (
" model_not_configured " ,
" Set VESTIGE_SANHEDRIN_ENDPOINT and VESTIGE_SANHEDRIN_MODEL, or choose a preset. " ,
)
return nei_verdict ( claim , " Sanhedrin model not configured; fail-open for this claim. " , evidence )
2026-05-24 16:09:44 -05:00
user_msg = (
f " CLAIM CLASS: { claim . claim_class } \n "
f " SAM-CRITICAL: { ' yes ' if claim . sam_critical else ' no ' } \n "
f " ABSENCE-FAIL-CLOSED APPLIES: "
f " { ' yes ' if claim . sam_critical and claim . claim_class in CRITICAL_ABSENCE_CLASSES else ' no ' } \n "
f " DURABLE HIGH-TRUST EVIDENCE COUNT: { len ( relevant_durable_high_trust ( claim , evidence ) ) } \n \n "
f " CLAIM: \n { claim . text } \n \n "
f " EVIDENCE: \n { format_claim_evidence ( evidence , claim ) } "
)
2026-05-27 19:03:16 -05:00
body = sanhedrin_body (
[
2026-05-24 16:09:44 -05:00
{ " role " : " system " , " content " : CLAIM_SYSTEM_PROMPT } ,
{ " role " : " user " , " content " : user_msg } ,
] ,
2026-05-27 19:03:16 -05:00
700 ,
)
2026-05-24 16:09:44 -05:00
resp = post_json ( SANHEDRIN_ENDPOINT , body , SANHEDRIN_TIMEOUT )
if not isinstance ( resp , dict ) :
2026-05-27 19:03:16 -05:00
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open ( " model_unavailable " , f " endpoint= { SANHEDRIN_ENDPOINT } " )
2026-05-24 16:09:44 -05:00
return nei_verdict ( claim , " Sanhedrin model unavailable; fail-open for this claim. " , evidence )
try :
msg = resp [ " choices " ] [ 0 ] [ " message " ]
raw = msg . get ( " content " ) or msg . get ( " reasoning " ) or " "
except ( KeyError , IndexError , TypeError ) :
2026-05-27 19:03:16 -05:00
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open ( " malformed_model_response " , f " endpoint= { SANHEDRIN_ENDPOINT } " )
2026-05-24 16:09:44 -05:00
return nei_verdict ( claim , " Malformed Sanhedrin model response. " , evidence )
data = parse_json_object ( raw )
if data is not None :
return validate_structured_verdict ( claim , data , evidence )
legacy = verdict_from_legacy_line ( claim , raw , evidence )
if legacy is not None :
return legacy
2026-05-27 19:03:16 -05:00
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open ( " unstructured_model_response " , raw [ : 500 ] )
2026-05-24 16:09:44 -05:00
return nei_verdict ( claim , " Sanhedrin model did not return structured JSON. " , evidence )
def judge_claim ( claim : Claim , evidence : list [ EvidenceItem ] ) - > ClaimVerdict :
2026-05-27 19:03:16 -05:00
if not sanhedrin_model_configured ( ) :
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open (
" model_not_configured " ,
" Set VESTIGE_SANHEDRIN_ENDPOINT and VESTIGE_SANHEDRIN_MODEL, or choose a preset. " ,
)
return nei_verdict ( claim , " Sanhedrin model not configured; fail-open for this claim. " , evidence )
2026-05-24 16:09:44 -05:00
durable_count = len ( relevant_durable_high_trust ( claim , evidence ) )
high_count = len ( high_trust ( evidence ) )
if claim . sam_critical and claim . claim_class in CRITICAL_ABSENCE_CLASSES and durable_count == 0 :
verdict = absence_verdict ( claim )
verdict . high_trust_evidence_count = high_count
return verdict
if high_count == 0 :
return nei_verdict ( claim , " No high-trust evidence retrieved for this claim. " , evidence )
return judge_claim_with_model ( claim , evidence )
def render_legacy_from_verdicts ( verdicts : list [ ClaimVerdict ] ) - > str :
vetoes = [ v for v in verdicts if v . status in { " REFUTED " , " REFUTED_BY_ABSENCE " } ]
if not vetoes :
return " yes "
vetoes . sort (
key = lambda v : (
SEVERITY_ORDER . get ( v . claim . claim_class , 99 ) ,
v . claim . source_index ,
)
)
chosen = vetoes [ 0 ]
reason = truncate_chars ( chosen . reason or chosen . claim . text , 140 )
return f " no - [Sanhedrin Veto] [ { chosen . claim . claim_class } ]: { reason } "
2026-05-25 01:44:52 -05:00
def recompute_legacy_from_result ( result : dict [ str , Any ] ) - > str :
vetoes = [ ]
for raw in result . get ( " verdicts " , [ ] ) :
claim = raw . get ( " claim " , { } ) if isinstance ( raw , dict ) else { }
status = str ( raw . get ( " status " , " " ) )
if status not in { " REFUTED " , " REFUTED_BY_ABSENCE " } :
continue
vetoes . append (
(
SEVERITY_ORDER . get ( str ( claim . get ( " claim_class " , " " ) ) , 99 ) ,
int ( claim . get ( " source_index " , 0 ) or 0 ) ,
str ( claim . get ( " claim_class " , " TECHNICAL " ) ) ,
truncate_chars ( str ( raw . get ( " reason " ) or claim . get ( " text " ) or " " ) , 140 ) ,
)
)
if not vetoes :
return " yes "
_ , _ , claim_class , reason = sorted ( vetoes ) [ 0 ]
return f " no - [Sanhedrin Veto] [ { claim_class } ]: { reason } "
def apply_appeals_to_claim_mode_result ( result : dict [ str , Any ] ) - > dict [ str , Any ] :
if sanhedrin_core is None :
return result
appeals = sanhedrin_core . load_appeals ( )
changed = False
for raw in result . get ( " verdicts " , [ ] ) :
if not isinstance ( raw , dict ) or raw . get ( " status " ) not in { " REFUTED " , " REFUTED_BY_ABSENCE " } :
continue
claim = raw . get ( " claim " , { } ) if isinstance ( raw . get ( " claim " ) , dict ) else { }
text = str ( claim . get ( " text " ) or " " )
if sanhedrin_core . is_appealed ( { " fingerprint " : sanhedrin_core . claim_fingerprint ( text ) } , appeals ) :
raw [ " status " ] = " APPEALED "
raw [ " reason " ] = " Prior appeal suppresses this Sanhedrin veto. "
changed = True
if changed :
legacy = recompute_legacy_from_result ( result )
result [ " legacy_verdict " ] = legacy
result [ " decision " ] = " yes " if legacy == " yes " else " no "
result [ " verdict " ] = result [ " decision " ]
result [ " passed " ] = legacy == " yes "
result [ " reason " ] = " " if result [ " passed " ] else legacy . split ( " - " , 1 ) [ - 1 ]
return result
def save_claim_mode_receipt (
draft : str ,
result : dict [ str , Any ] ,
manifest : dict [ str , Any ] | None = None ,
) - > None :
if sanhedrin_core is None :
return
manifest = manifest or sanhedrin_core . new_manifest ( draft )
claims = [ ]
for idx , raw in enumerate ( result . get ( " verdicts " , [ ] ) , start = 1 ) :
if not isinstance ( raw , dict ) :
continue
claim = raw . get ( " claim " , { } ) if isinstance ( raw . get ( " claim " ) , dict ) else { }
text = str ( claim . get ( " text " ) or " " )
claim_class = str ( claim . get ( " claim_class " ) or " TECHNICAL " )
status = str ( raw . get ( " status " ) or " NEI " )
evidence_ids = raw . get ( " evidence_ids " ) if isinstance ( raw . get ( " evidence_ids " ) , list ) else [ ]
if status == " SUPPORTED " :
decision = " pass "
evidence_state = " supported "
fix = " No change required. "
elif status == " APPEALED " :
decision = " appealed "
evidence_state = " appealed "
fix = " Prior appeal suppresses this veto fingerprint. "
elif status == " REFUTED_BY_ABSENCE " :
decision = " veto "
evidence_state = " missing_precedent "
fix = " Remove the unsupported user-specific claim or cite durable Vestige evidence first. "
elif status == " REFUTED " :
decision = " veto "
evidence_state = " contradicted "
fix = " Remove or qualify the contradicted claim using the cited Vestige precedent. "
else :
decision = " pass_unverified "
evidence_state = " not_enough_information "
fix = " No blocking change required. "
claims . append (
{
" id " : f " c { idx : 03d } " ,
" text " : text ,
" fingerprint " : sanhedrin_core . claim_fingerprint ( text ) ,
" class " : claim_class ,
" subject " : " Sam " if bool ( claim . get ( " sam_critical " ) ) else " draft " ,
" risk " : " hard " if bool ( claim . get ( " sam_critical " ) ) else " normal " ,
" evidence_state " : evidence_state ,
" decision " : decision ,
" precedent " : [
{
" type " : " vestige " ,
" summary " : str ( raw . get ( " reason " ) or status ) ,
" evidence " : " , " . join ( str ( eid ) for eid in evidence_ids [ : 5 ] ) ,
" durableCount " : raw . get ( " durable_evidence_count " ) ,
" highTrustCount " : raw . get ( " high_trust_evidence_count " ) ,
}
] ,
" fix " : fix ,
" appeal " : {
" status " : " appealed " if decision == " appealed " else " open " ,
" actions " : [ " stale " , " wrong " , " too_strict " ] ,
} ,
}
)
manifest [ " claims " ] = claims
manifest [ " overall " ] = " pass " if result . get ( " passed " ) else " veto "
if any ( claim [ " decision " ] == " appealed " for claim in claims ) :
manifest [ " overall " ] = " pass_with_warnings " if result . get ( " passed " ) else manifest [ " overall " ]
manifest [ " verdictBar " ] = " APPEALED "
manifest [ " summary " ] = " Prior appeal suppressed a Sanhedrin veto. "
elif result . get ( " passed " ) :
manifest [ " verdictBar " ] = " PASS " if not claims else " NOTE "
manifest [ " summary " ] = " Sanhedrin found no blocking claim issues. "
else :
manifest [ " verdictBar " ] = " VETO "
manifest [ " summary " ] = str ( result . get ( " reason " ) or " Sanhedrin blocked a claim. " )
sanhedrin_core . save_manifest ( manifest )
def save_legacy_receipt ( manifest : dict [ str , Any ] | None , verdict : str , evidence : str = " " ) - > str :
if sanhedrin_core is None or manifest is None :
return verdict
updated = sanhedrin_core . apply_model_verdict ( manifest , verdict , evidence )
sanhedrin_core . save_manifest ( manifest )
return updated
2026-05-24 16:09:44 -05:00
def claim_mode_result ( draft : str ) - > dict [ str , Any ] :
claims = extract_check_worthy_claims ( draft )
staged = load_staged_evidence ( os . environ . get ( STAGE_FILE_ENV ) )
verdicts : list [ ClaimVerdict ] = [ ]
for claim in claims :
evidence , ok = fetch_claim_evidence ( claim )
if not ok :
2026-05-27 19:03:16 -05:00
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open ( " retrieval_unavailable " , claim . text )
2026-05-24 16:09:44 -05:00
verdicts . append (
nei_verdict (
claim ,
" Vestige retrieval unavailable; fail-open for this claim. " ,
staged ,
)
)
continue
combined = dedupe_evidence ( evidence + staged )
verdicts . append ( judge_claim ( claim , combined ) )
legacy_verdict = render_legacy_from_verdicts ( verdicts )
decision = " yes " if legacy_verdict == " yes " else " no "
json_reason = " " if decision == " yes " else legacy_verdict . split ( " - " , 1 ) [ - 1 ]
return {
" mode " : " claim " ,
" decision " : decision ,
" verdict " : decision ,
" reason " : json_reason ,
" passed " : legacy_verdict == " yes " ,
" legacy_verdict " : legacy_verdict ,
" claims_extracted " : len ( claims ) ,
" staged_evidence_count " : len ( staged ) ,
" verdicts " : [ asdict ( v ) for v in verdicts ] ,
}
def print_claim_mode_result ( result : dict [ str , Any ] ) - > None :
if ( os . environ . get ( OUTPUT_ENV ) or " " ) . strip ( ) . lower ( ) == " json " :
print ( json . dumps ( result , ensure_ascii = False , separators = ( " , " , " : " ) ) )
else :
print ( result . get ( " legacy_verdict " ) or " yes " )
2026-04-27 13:20:51 -05:00
def main ( ) - > None :
draft = sys . stdin . read ( ) . strip ( )
if not draft :
print ( " yes " )
return
2026-05-25 01:44:52 -05:00
manifest = sanhedrin_core . new_manifest ( draft ) if sanhedrin_core is not None else None
if sanhedrin_core is not None and manifest is not None :
receipt_veto = sanhedrin_core . apply_receipt_lock ( manifest )
if receipt_veto :
sanhedrin_core . save_manifest ( manifest )
print ( f " no - [Sanhedrin Veto] [TECHNICAL]: { receipt_veto } " )
return
2026-05-24 16:09:44 -05:00
if env_flag ( CLAIM_MODE_ENV ) :
2026-05-25 01:44:52 -05:00
result = apply_appeals_to_claim_mode_result ( claim_mode_result ( draft ) )
save_claim_mode_receipt ( draft , result , manifest )
print_claim_mode_result ( result )
2026-05-24 16:09:44 -05:00
return
2026-04-27 13:20:51 -05:00
evidence , high_trust_count = fetch_evidence ( draft )
# Auto-pass if no high-trust evidence — model can't legitimately veto
# without something concrete to cite. Eliminates the common false-positive
# mode where the model invents a contradiction from low-trust noise.
if high_trust_count == 0 :
2026-05-25 01:44:52 -05:00
save_legacy_receipt ( manifest , " yes " , evidence )
2026-04-27 13:20:51 -05:00
print ( " yes " )
return
verdict = judge ( draft , evidence )
if not verdict :
# Fail-open: server unreachable, malformed response, etc.
2026-05-27 19:03:16 -05:00
if sanhedrin_core is not None :
sanhedrin_core . record_fail_open ( " legacy_model_unavailable " , f " endpoint= { SANHEDRIN_ENDPOINT } " )
2026-05-25 01:44:52 -05:00
save_legacy_receipt ( manifest , " yes " , evidence )
2026-04-27 13:20:51 -05:00
print ( " yes " )
return
2026-05-25 01:44:52 -05:00
verdict = save_legacy_receipt ( manifest , verdict , evidence )
2026-04-27 13:20:51 -05:00
print ( verdict )
if __name__ == " __main__ " :
main ( )