Make Sanhedrin optional in v2.1.0

This commit is contained in:
Sam Valladares 2026-05-01 04:55:54 -05:00
parent c9e96b06fd
commit 4f457ec2db
8 changed files with 315 additions and 131 deletions

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python3
# sanhedrin-local.py — Local Qwen3.6-35B-A3B Sanhedrin Executioner.
# sanhedrin-local.py — OpenAI-compatible Sanhedrin Executioner bridge.
# Drop-in replacement for the Haiku 4.5 subagent that sanhedrin.sh used to spawn.
#
# Reads draft from stdin, prints single-line verdict to stdout:
@ -8,10 +8,10 @@
#
# Architecture:
# stdin (draft) -> Vestige /api/deep_reference (single semantic query)
# -> mlx_lm.server localhost:8080 (one-shot judgment)
# -> OpenAI-compatible chat endpoint (one-shot judgment)
# -> stdout (single-line verdict)
#
# Fail-open: if mlx-server unreachable, print "yes" and exit 0 (don't break
# Fail-open: if the endpoint is unreachable, print "yes" and exit 0 (don't break
# the Cognitive Sandwich on infra errors). The wrapping sanhedrin.sh maps
# "yes" to exit 0, so this preserves existing fail-open semantics.
@ -35,7 +35,11 @@ VESTIGE_BASE_URL = (
os.environ.get("VESTIGE_BASE_URL") or f"http://127.0.0.1:{DASHBOARD_PORT}"
).rstrip("/")
MLX_ENDPOINT = os.environ.get("MLX_ENDPOINT") or "http://127.0.0.1:8080/v1/chat/completions"
SANHEDRIN_ENDPOINT = (
os.environ.get("VESTIGE_SANHEDRIN_ENDPOINT")
or os.environ.get("MLX_ENDPOINT")
or "http://127.0.0.1:8080/v1/chat/completions"
)
VESTIGE_ENDPOINT = (
os.environ.get("VESTIGE_DEEP_REFERENCE_ENDPOINT")
or f"{VESTIGE_BASE_URL}/api/deep_reference"
@ -43,8 +47,12 @@ VESTIGE_ENDPOINT = (
VESTIGE_HEALTH = (
os.environ.get("VESTIGE_HEALTH_ENDPOINT") or f"{VESTIGE_BASE_URL}/api/health"
)
MODEL = os.environ.get("VESTIGE_SANDWICH_MODEL") or "mlx-community/Qwen3.6-35B-A3B-4bit"
MLX_TIMEOUT = env_int("MLX_TIMEOUT", 45)
MODEL = (
os.environ.get("VESTIGE_SANHEDRIN_MODEL")
or os.environ.get("VESTIGE_SANDWICH_MODEL")
or "mlx-community/Qwen3.6-35B-A3B-4bit"
)
SANHEDRIN_TIMEOUT = env_int("VESTIGE_SANHEDRIN_TIMEOUT", env_int("MLX_TIMEOUT", 45))
VESTIGE_TIMEOUT = env_int("VESTIGE_TIMEOUT", 5)
THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
@ -289,7 +297,7 @@ def judge(draft: str, evidence: str) -> str:
"\n\nOn second thought", "\n\nOh wait",
],
}
resp = post_json(MLX_ENDPOINT, body, MLX_TIMEOUT)
resp = post_json(SANHEDRIN_ENDPOINT, body, SANHEDRIN_TIMEOUT)
if not isinstance(resp, dict):
return ""
try:

View file

@ -17,25 +17,37 @@
# sanhedrin.sh (2-8s Haiku subagent, may block) →
# synthesis-stop-validator.sh (existing regex hedge check, may block)
#
# Opt-in: set VESTIGE_SANHEDRIN_ENABLED=1 in parent shell.
# Opt-in: set VESTIGE_SANHEDRIN_ENABLED=1 in parent shell, or install with
# scripts/install-sandwich.sh --enable-sanhedrin.
# Re-entrancy lock: VESTIGE_EXECUTIONER_ACTIVE=1 inside the subagent.
#
# Ship date 2026-04-20.
set -u
# === OPT-OUT GATE ===
# Post-Cognitive Sanhedrin is ON by default as of 2026-04-21 (birthday
# launch day). To disable, set VESTIGE_SANHEDRIN_ENABLED=0 in your
# environment. Default-on guarantees the Cognitive Sandwich fires on
# fresh machines, Docker containers, GUI-launched Claude Code, and
# shells without .zshrc — any case where the Claude Code process lacks
# a sourced profile. The re-entrancy guard (VESTIGE_EXECUTIONER_ACTIVE)
# below still prevents fork-bombs from the subagent's own Stop hook.
if [ "${VESTIGE_SANHEDRIN_ENABLED:-1}" = "0" ]; then
exit 0
# === OPT-IN GATE ===
# Sanhedrin is heavyweight: the default local backend is a ~19 GB model and
# needs roughly 20+ GB of free RAM. Keep it disabled unless the user explicitly
# opts in. The installer writes this env file only for --enable-sanhedrin.
SANHEDRIN_ENV="${VESTIGE_SANHEDRIN_ENV:-$HOME/.claude/hooks/vestige-sanhedrin.env}"
if [ -f "$SANHEDRIN_ENV" ]; then
set +u
set -a
# shellcheck disable=SC1090
. "$SANHEDRIN_ENV" 2>/dev/null || {
set +a
set -u
exit 0
}
set +a
set -u
fi
case "${VESTIGE_SANHEDRIN_ENABLED:-0}" in
1|true|TRUE|yes|YES|on|ON) ;;
*) exit 0 ;;
esac
# === RE-ENTRANCY GUARD ===
# The Executioner's own Stop hook will fire when it returns — prevent
# recursive spawns that would fork-bomb the quota.
@ -114,11 +126,11 @@ if [ -z "$DRAFT" ]; then
fi
# === VERIFY local executioner bridge available ===
# 2026-04-25: switched from Haiku 4.5 subagent to local Qwen3.6-35B-A3B
# via mlx_lm.server (launchd com.vestige.mlx-server). Bridge script
# fetches Vestige evidence via HTTP API (VESTIGE_DASHBOARD_PORT, default 3927)
# then judges via MLX_ENDPOINT (default port 8080). Zero per-token cost, fully offline,
# sub-second-to-15s verdict latency. Fail-open if mlx-server unreachable.
# 2026-04-25: switched from Haiku 4.5 subagent to an OpenAI-compatible
# local/remote endpoint. On Apple Silicon the optional launchd path starts
# mlx_lm.server; on x86 users can point VESTIGE_SANHEDRIN_ENDPOINT at vLLM,
# Ollama, llama.cpp, or any compatible /v1/chat/completions endpoint.
# Fail-open if the endpoint is unreachable.
BRIDGE="$HOME/.claude/hooks/sanhedrin-local.py"
if [ ! -x "$BRIDGE" ] && [ ! -f "$BRIDGE" ]; then
exit 0
@ -191,15 +203,15 @@ case "$TRIMMED" in
$REASON
The Executioner (local Qwen3.6-35B-A3B via mlx_lm.server, fresh context,
fed Vestige deep_reference evidence over HTTP) judged your draft and
The Executioner (Sanhedrin endpoint, fresh context, fed Vestige
deep_reference evidence over HTTP) judged your draft and
found a contradiction against a high-trust memory.
You may NOT stop. Rewrite WITHOUT the contradicted claim. Use
mcp__vestige__deep_reference to inspect the cited memory and cite the
correct replacement pattern from its \`recommended\` field.
Local-only, zero API cost, fully offline. Bridge script:
Bridge script:
~/.claude/hooks/sanhedrin-local.py
SANHEDRIN_MSG
exit 2

View file

@ -14,7 +14,6 @@
{
"hooks": [
{ "type": "command", "command": "$HOME/.claude/hooks/veto-detector.sh", "timeout": 6 },
{ "type": "command", "command": "$HOME/.claude/hooks/sanhedrin.sh", "timeout": 70 },
{ "type": "command", "command": "$HOME/.claude/hooks/synthesis-stop-validator.sh", "timeout": 6 }
]
}

View file

@ -0,0 +1,23 @@
{
"hooks": {
"UserPromptSubmit": [
{
"hooks": [
{ "type": "command", "command": "$HOME/.claude/hooks/synthesis-preflight.sh", "timeout": 8 },
{ "type": "command", "command": "$HOME/.claude/hooks/cwd-state-injector.sh", "timeout": 8 },
{ "type": "command", "command": "$HOME/.claude/hooks/vestige-pulse-daemon.sh", "timeout": 6 },
{ "type": "command", "command": "$HOME/.claude/hooks/preflight-swarm.sh", "timeout": 45 }
]
}
],
"Stop": [
{
"hooks": [
{ "type": "command", "command": "$HOME/.claude/hooks/veto-detector.sh", "timeout": 6 },
{ "type": "command", "command": "VESTIGE_SANHEDRIN_ENABLED=1 $HOME/.claude/hooks/sanhedrin.sh", "timeout": 70 },
{ "type": "command", "command": "$HOME/.claude/hooks/synthesis-stop-validator.sh", "timeout": 6 }
]
}
]
}
}