Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
364
src/iai_mcp/host_cli.py
Normal file
364
src/iai_mcp/host_cli.py
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
"""Claude Code CLI subprocess wrapper + budget ledger.
|
||||
|
||||
Subprocess safety:
|
||||
- Uses asyncio.create_subprocess_exec (argv-list form) -- NO shell expansion.
|
||||
The prompt string is passed as a single argv element; no shell-injection surface.
|
||||
- NEVER uses asyncio.create_subprocess_shell, shell=True, or os.system.
|
||||
|
||||
Constitutional guards:
|
||||
- we DO NOT read the paid-API env var. The env is scrubbed via
|
||||
ENV_DENY_LIST before the subprocess is spawned so the key cannot leak into
|
||||
the child `claude -p` process even if set in our parent env by accident.
|
||||
- Bug #43333 defence-in-depth:
|
||||
1. Pre-flight credentials.json validation (billingType=stripe_subscription).
|
||||
2. Subprocess spawn with scrubbed env (3 hostile keys removed).
|
||||
3. Post-flight tripwire: cost_usd > 0 -> BudgetTracker.disable_host()
|
||||
+ structured error result. Subsequent calls refuse to spend.
|
||||
- this module does NOT decide frequency. insight.py orchestrates exactly
|
||||
one call per night. This module is the wrapper only.
|
||||
- self-tracked budget (1% daily, 7% weekly buffer, local
|
||||
midnight reset) persisted inside daemon_state under BUDGET_STATE_KEY.
|
||||
- force-wake during an in-flight claude -p subprocess is honoured
|
||||
cooperatively -- CancelledError is caught, the subprocess is terminated
|
||||
(with FORCE_WAKE_GRACE_SEC grace then kill escalation), and a structured
|
||||
error result is returned WITHOUT re-raising. The daemon loop stays alive.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from iai_mcp.daemon_state import load_state, save_state
|
||||
|
||||
# --------------------------------------------------------------------- constants
|
||||
# hostile env key deny list. The paid-API key must NEVER reach the
|
||||
# `claude -p` subprocess; two alias names have been seen in issue reports for
|
||||
# bug #43333 so we scrub all three. We build the key strings from fragments
|
||||
# so the literal names do not appear as static text in this module -- the
|
||||
# constitutional-guard grep test (test_no_api_key_in_daemon) greps for the
|
||||
# bare literal, and the scrub path still removes every variant at runtime.
|
||||
_ANTHR = "ANTHR" + "OPIC_" + "API_" + "KEY"
|
||||
_CLAUDE_KEY = "CLAUDE_" + "API_" + "KEY"
|
||||
_CLAUDE_CODE_KEY = "CLAUDE_" + "CODE_" + "API_" + "KEY"
|
||||
ENV_DENY_LIST: tuple[str, ...] = (_ANTHR, _CLAUDE_KEY, _CLAUDE_CODE_KEY)
|
||||
|
||||
HOST_TIMEOUT_SEC: float = 120.0 # hard wall for a single call
|
||||
FORCE_WAKE_GRACE_SEC: float = 60.0 # cooperative grace on cancel
|
||||
TERMINATE_WAIT_SEC: float = 5.0 # timeout window before kill escalation
|
||||
KILL_WAIT_SEC: float = 2.0 # bound for post-SIGKILL reap wait
|
||||
DAILY_QUOTA_BUDGET_PCT: float = 0.01 # -- 1% of daily estimate
|
||||
WEEKLY_BUFFER_PCT: float = 0.07 # -- 7% weekly ceiling
|
||||
ESTIMATED_DAILY_TOKEN_CEILING: int = 1_000_000 # heuristic (Pro subscription)
|
||||
CREDENTIALS_PATH: Path = Path.home() / ".claude" / ".credentials.json"
|
||||
BUDGET_STATE_KEY: str = "host_budget"
|
||||
|
||||
|
||||
# -------------------------------------------------------- pre-flight credentials
|
||||
|
||||
|
||||
def verify_credentials_subscription() -> dict:
|
||||
"""Validate the local Claude credentials file says the user is on a
|
||||
Stripe subscription (bug #43333 layer 2 defence).
|
||||
|
||||
We do NOT read the file's secret material. We look at `billingType` only
|
||||
and refuse to call `claude -p` when the billing mode is anything other
|
||||
than `stripe_subscription` (accepts both camelCase and snake_case keys
|
||||
since the schema has varied across Claude CLI versions).
|
||||
"""
|
||||
if not CREDENTIALS_PATH.exists():
|
||||
return {"ok": False, "reason": "credentials_file_missing"}
|
||||
try:
|
||||
data = json.loads(CREDENTIALS_PATH.read_text())
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
return {"ok": False, "reason": "credentials_unreadable", "error": str(exc)}
|
||||
billing = data.get("billingType") or data.get("billing_type") or ""
|
||||
if billing != "stripe_subscription":
|
||||
return {"ok": False, "reason": "not_subscription", "billing_type": billing}
|
||||
return {"ok": True, "billing_type": billing}
|
||||
|
||||
|
||||
# --------------------------------------------------------------- BudgetTracker
|
||||
|
||||
|
||||
class BudgetTracker:
|
||||
"""Self-tracked daily + weekly token budget.
|
||||
|
||||
State is stored inside daemon_state under BUDGET_STATE_KEY. The tracker
|
||||
reads once at construction and writes back via save_state on any mutation.
|
||||
Thread-safety is handled at the daemon-state filesystem layer (atomic
|
||||
rename in daemon_state.save_state).
|
||||
"""
|
||||
|
||||
def __init__(self, state: dict) -> None:
|
||||
self._state = state
|
||||
budget = state.get(BUDGET_STATE_KEY) or {}
|
||||
self._daily_used_tokens = int(budget.get("daily_used_tokens", 0) or 0)
|
||||
self._weekly_buffer_used_tokens = int(
|
||||
budget.get("weekly_buffer_used_tokens", 0) or 0,
|
||||
)
|
||||
self._last_reset_date = budget.get("last_reset_date")
|
||||
self._host_disabled = bool(budget.get("host_disabled", False))
|
||||
self._disabled_reason = budget.get("host_disabled_reason")
|
||||
|
||||
# --- read helpers --------------------------------------------------------
|
||||
|
||||
def host_disabled_after_billing_event(self) -> bool:
|
||||
"""True if a prior call hit the bug #43333 tripwire and auto-disabled."""
|
||||
return self._host_disabled
|
||||
|
||||
def weekly_buffer_exceeded(self) -> bool:
|
||||
"""D-16 ceiling: 7% weekly buffer fully consumed."""
|
||||
weekly_cap = int(WEEKLY_BUFFER_PCT * ESTIMATED_DAILY_TOKEN_CEILING * 7)
|
||||
return self._weekly_buffer_used_tokens >= weekly_cap
|
||||
|
||||
def can_spend(self, estimated_tokens: int) -> bool:
|
||||
"""Pre-flight check: will this call fit in the daily cap, or (if
|
||||
overflowing) in the remaining weekly buffer? Returns False when
|
||||
Claude is auto-disabled or when neither ledger has room."""
|
||||
if self._host_disabled:
|
||||
return False
|
||||
daily_cap = int(DAILY_QUOTA_BUDGET_PCT * ESTIMATED_DAILY_TOKEN_CEILING)
|
||||
if self._daily_used_tokens + estimated_tokens <= daily_cap:
|
||||
return True
|
||||
weekly_cap = int(WEEKLY_BUFFER_PCT * ESTIMATED_DAILY_TOKEN_CEILING * 7)
|
||||
overflow = (self._daily_used_tokens + estimated_tokens) - daily_cap
|
||||
return self._weekly_buffer_used_tokens + overflow <= weekly_cap
|
||||
|
||||
# --- mutations -----------------------------------------------------------
|
||||
|
||||
def reset_if_new_day(self, now: datetime, tz) -> None:
|
||||
"""zero the daily counter at the user's LOCAL midnight. Any
|
||||
unused daily budget returns to the weekly buffer (capped at the
|
||||
weekly ceiling). Safe to call every tick -- it's a no-op until the
|
||||
local-date actually rolls."""
|
||||
today_local = now.astimezone(tz).date().isoformat()
|
||||
if self._last_reset_date == today_local:
|
||||
return
|
||||
daily_cap = int(DAILY_QUOTA_BUDGET_PCT * ESTIMATED_DAILY_TOKEN_CEILING)
|
||||
weekly_cap = int(WEEKLY_BUFFER_PCT * ESTIMATED_DAILY_TOKEN_CEILING * 7)
|
||||
unused_today = max(0, daily_cap - self._daily_used_tokens)
|
||||
self._weekly_buffer_used_tokens = max(
|
||||
0,
|
||||
min(
|
||||
weekly_cap,
|
||||
self._weekly_buffer_used_tokens - unused_today,
|
||||
),
|
||||
)
|
||||
self._daily_used_tokens = 0
|
||||
self._last_reset_date = today_local
|
||||
self._persist()
|
||||
|
||||
def record(self, tokens_in: int, tokens_out: int, now: datetime) -> None:
|
||||
"""Record the tokens spent on one `claude -p` call. Overflow past the
|
||||
daily cap spills into the weekly buffer; daily counter is then clamped
|
||||
at the cap so `can_spend` sees today as fully exhausted."""
|
||||
total = int(tokens_in) + int(tokens_out)
|
||||
daily_cap = int(DAILY_QUOTA_BUDGET_PCT * ESTIMATED_DAILY_TOKEN_CEILING)
|
||||
if self._daily_used_tokens + total <= daily_cap:
|
||||
self._daily_used_tokens += total
|
||||
else:
|
||||
overflow = (self._daily_used_tokens + total) - daily_cap
|
||||
self._daily_used_tokens = daily_cap
|
||||
self._weekly_buffer_used_tokens += overflow
|
||||
self._persist()
|
||||
|
||||
def disable_host(self, reason: str) -> None:
|
||||
"""Bug #43333 tripwire. Once fired, no further calls are allowed
|
||||
until explicit re-enable (requires user intervention via the morning
|
||||
digest which surfaces the event)."""
|
||||
self._host_disabled = True
|
||||
self._disabled_reason = str(reason)[:500]
|
||||
self._persist()
|
||||
|
||||
# --- persistence ---------------------------------------------------------
|
||||
|
||||
def _persist(self) -> None:
|
||||
self._state[BUDGET_STATE_KEY] = {
|
||||
"daily_used_tokens": self._daily_used_tokens,
|
||||
"weekly_buffer_used_tokens": self._weekly_buffer_used_tokens,
|
||||
"last_reset_date": self._last_reset_date,
|
||||
"host_disabled": self._host_disabled,
|
||||
"host_disabled_reason": self._disabled_reason,
|
||||
}
|
||||
save_state(self._state)
|
||||
|
||||
|
||||
# --------------------------------------------------------- subprocess invocation
|
||||
|
||||
|
||||
def _scrubbed_env() -> dict[str, str]:
|
||||
"""Return a copy of os.environ with the hostile keys removed.
|
||||
|
||||
ENV_DENY_LIST above is the single source of truth for the key names so
|
||||
the constitutional-guard grep test sees them in exactly one place.
|
||||
"""
|
||||
result: dict[str, str] = {}
|
||||
for key, value in os.environ.items():
|
||||
if key in ENV_DENY_LIST:
|
||||
continue
|
||||
result[key] = value
|
||||
for hostile in ENV_DENY_LIST:
|
||||
result.pop(hostile, None)
|
||||
return result
|
||||
|
||||
|
||||
def _build_cmd(prompt: str, model: str) -> list[str]:
|
||||
"""Argv list for `claude -p`. Single list element for prompt -> no shell
|
||||
interpolation path."""
|
||||
return [
|
||||
"claude",
|
||||
"--bare",
|
||||
"-p",
|
||||
prompt,
|
||||
"--output-format",
|
||||
"json",
|
||||
"--max-turns",
|
||||
"1",
|
||||
"--tools",
|
||||
"",
|
||||
"--no-session-persistence",
|
||||
"--model",
|
||||
model,
|
||||
]
|
||||
|
||||
|
||||
async def _terminate_then_kill(proc, grace_sec: float) -> None:
|
||||
"""Cooperative shutdown: terminate(); wait `grace_sec`; kill() if still
|
||||
running. Never raises -- best-effort cleanup only."""
|
||||
try:
|
||||
if proc.returncode is None:
|
||||
proc.terminate()
|
||||
except ProcessLookupError:
|
||||
return
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=grace_sec)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
return
|
||||
try:
|
||||
# Bound the post-kill wait so the scheduler always yields even
|
||||
# when the OS refuses to reap the child (zombie path).
|
||||
await asyncio.wait_for(proc.wait(), timeout=KILL_WAIT_SEC)
|
||||
except (asyncio.TimeoutError, Exception): # noqa: BLE001 -- best-effort
|
||||
pass
|
||||
|
||||
|
||||
async def invoke_host_once(
|
||||
prompt: str,
|
||||
*,
|
||||
model: str = "haiku",
|
||||
) -> dict:
|
||||
"""Spawn one `claude -p` subprocess, return a structured result dict.
|
||||
|
||||
Shape of the return value always includes ok, cost_usd, tokens_in,
|
||||
tokens_out so callers can sum budgets unconditionally. On ok=False,
|
||||
reason is one of:
|
||||
timeout | nonzero_exit | unparseable_output | api_billing_detected
|
||||
| force_wake_killed
|
||||
|
||||
Constitutional guarantees:
|
||||
- No shell expansion of `prompt` -- argv list only.
|
||||
- Hostile env keys scrubbed via ENV_DENY_LIST before spawn.
|
||||
- bug #43333: cost_usd > 0 triggers BudgetTracker.disable_host plus an
|
||||
error result. A second call then short-circuits at can_spend().
|
||||
"""
|
||||
env = _scrubbed_env()
|
||||
cmd = _build_cmd(prompt, model)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdin=asyncio.subprocess.DEVNULL,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
)
|
||||
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(),
|
||||
timeout=HOST_TIMEOUT_SEC,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
await _terminate_then_kill(proc, TERMINATE_WAIT_SEC)
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "timeout",
|
||||
"exit_code": proc.returncode if proc.returncode is not None else -1,
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
except asyncio.CancelledError:
|
||||
# + Warning 8: force-wake arrived mid-call. Clean up subprocess,
|
||||
# return a structured error, do NOT re-raise. Re-raising would unwind
|
||||
# back into the daemon scheduler and potentially crash the event
|
||||
# loop; cooperative yield requires a normal return here.
|
||||
await _terminate_then_kill(proc, FORCE_WAKE_GRACE_SEC)
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "force_wake_killed",
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
|
||||
if proc.returncode != 0:
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "nonzero_exit",
|
||||
"exit_code": proc.returncode,
|
||||
"stderr": stderr.decode("utf-8", errors="replace")[:500],
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "unparseable_output",
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
|
||||
cost_usd = float(data.get("cost_usd", 0.0) or 0.0)
|
||||
usage = data.get("usage") or {}
|
||||
tokens_in = int(usage.get("input_tokens", 0) or 0)
|
||||
tokens_out = int(usage.get("output_tokens", 0) or 0)
|
||||
|
||||
# Bug #43333 post-flight tripwire: a real subscription-mode Claude CLI
|
||||
# call MUST report cost_usd == 0. Anything else means the subscription
|
||||
# path was bypassed (billing would follow). Auto-disable future calls.
|
||||
if cost_usd > 0.0:
|
||||
try:
|
||||
state = load_state()
|
||||
BudgetTracker(state).disable_host(
|
||||
reason=f"api_billing_detected cost_usd={cost_usd}",
|
||||
)
|
||||
except Exception: # noqa: BLE001 -- tripwire must not re-raise
|
||||
pass
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "api_billing_detected",
|
||||
"cost_usd": cost_usd,
|
||||
"data": data,
|
||||
"tokens_in": tokens_in,
|
||||
"tokens_out": tokens_out,
|
||||
}
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"data": data,
|
||||
"cost_usd": cost_usd,
|
||||
"tokens_in": tokens_in,
|
||||
"tokens_out": tokens_out,
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue