iai-mcp-opencode/src/iai_mcp/guard.py

"""D-GUARD: graceful-degradation ladder before any LLM call.

Every LLM-dependent operation must pass through `should_call_llm`
BEFORE making an API call. The 7-step ladder (D-GUARD):

1. sleep.llm_enabled=true? else Tier 0
2. API key present? else Tier 0
3. BudgetLedger daily cap OK? else Tier 0
4. BudgetLedger monthly cap OK? else Tier 0
5. RateLimitLedger: last 429 > 15 min ago? else Tier 0 this cycle
6. API call with retry(max=2, exp backoff) + timeout(60s)  -- caller's job
7. On 429/400/401/5xx -> record in ledger, Tier 0 this cycle  -- caller's job

Write & read paths (memory_recall/reinforce/contradict, profile_get/set,
session_start) NEVER block on LLM failure. LLM failures only reduce the QUALITY
of semantic consolidation, schema induction, and identity refinement.

Budget defaults: daily_usd_cap=$0.10, monthly_usd_cap=$3.00,
cooldown=15min, on_cap_hit=fallback_to_local.

BudgetLedger + RateLimitLedger persist in LanceDB tables (budget_ledger,
ratelimit_ledger) created by MemoryStore._ensure_tables.
"""
from __future__ import annotations

from datetime import datetime, timedelta, timezone

from iai_mcp.store import BUDGET_TABLE, RATELIMIT_TABLE, MemoryStore


# D-GUARD defaults
BUDGET_DAILY_USD_DEFAULT = 0.10
BUDGET_MONTHLY_USD_DEFAULT = 3.00
RATELIMIT_COOLDOWN_MIN = 15


class BudgetLedger:
    """LanceDB-backed daily + monthly USD spend tracker (D-GUARD).

    Caps default to $0.10/day and $3.00/month. Both are advisory (no OS-level
    enforcement); caller inspects can_spend() before invoking an LLM API.
    """

    def __init__(
        self,
        store: MemoryStore,
        daily_usd_cap: float = BUDGET_DAILY_USD_DEFAULT,
        monthly_usd_cap: float = BUDGET_MONTHLY_USD_DEFAULT,
    ) -> None:
        self.store = store
        self.daily_cap = float(daily_usd_cap)
        self.monthly_cap = float(monthly_usd_cap)

    # ---- internal helpers

    def _today_utc(self) -> str:
        return datetime.now(timezone.utc).strftime("%Y-%m-%d")

    def _this_month(self) -> str:
        return datetime.now(timezone.utc).strftime("%Y-%m")

    # ---- queries

    def daily_used(self) -> float:
        """Sum of usd_spent rows for today (UTC)."""
        tbl = self.store.db.open_table(BUDGET_TABLE)
        df = tbl.to_pandas()
        if df.empty:
            return 0.0
        today = df[df["date"] == self._today_utc()]
        return float(today["usd_spent"].sum()) if not today.empty else 0.0

    def monthly_used(self) -> float:
        """Sum of usd_spent rows for the current month (UTC)."""
        tbl = self.store.db.open_table(BUDGET_TABLE)
        df = tbl.to_pandas()
        if df.empty:
            return 0.0
        mo = df[df["date"].str.startswith(self._this_month())]
        return float(mo["usd_spent"].sum()) if not mo.empty else 0.0

    def can_spend(self, usd: float) -> tuple[bool, str]:
        """Return (ok, reason). reason is "" on success."""
        daily = self.daily_used()
        if daily + float(usd) > self.daily_cap:
            return (
                False,
                f"daily cap exceeded (used {daily:.4f} + {float(usd):.4f} "
                f"> {self.daily_cap:.4f})",
            )
        monthly = self.monthly_used()
        if monthly + float(usd) > self.monthly_cap:
            return (
                False,
                f"monthly cap exceeded (used {monthly:.4f} + {float(usd):.4f} "
                f"> {self.monthly_cap:.4f})",
            )
        return True, ""

    # ---- writes

    def record_spend(self, usd: float, kind: str = "llm") -> None:
        """Persist a spend event to the ledger."""
        tbl = self.store.db.open_table(BUDGET_TABLE)
        tbl.add(
            [
                {
                    "date": self._today_utc(),
                    "usd_spent": float(usd),
                    "kind": kind,
                    "ts": datetime.now(timezone.utc),
                }
            ]
        )


class RateLimitLedger:
    """LanceDB-backed 429 history with 15-min cooldown (D-GUARD)."""

    def __init__(
        self,
        store: MemoryStore,
        cooldown_minutes: int = RATELIMIT_COOLDOWN_MIN,
    ) -> None:
        self.store = store
        self.cooldown = timedelta(minutes=int(cooldown_minutes))

    def in_cooldown(self) -> bool:
        """True iff the most recent 429 was less than `cooldown_minutes` ago."""
        tbl = self.store.db.open_table(RATELIMIT_TABLE)
        df = tbl.to_pandas()
        if df.empty:
            return False
        latest = df["ts"].max()
        # Pandas timestamp -> python datetime; may be naive on some backends.
        try:
            py = latest.to_pydatetime()
        except AttributeError:
            py = latest
        if py.tzinfo is None:
            py = py.replace(tzinfo=timezone.utc)
        return (datetime.now(timezone.utc) - py) < self.cooldown

    def record_429(self, endpoint: str = "anthropic") -> None:
        """Record a 429 hit; subsequent in_cooldown() calls will see it."""
        tbl = self.store.db.open_table(RATELIMIT_TABLE)
        tbl.add(
            [
                {
                    "ts": datetime.now(timezone.utc),
                    "status_code": 429,
                    "endpoint": endpoint,
                }
            ]
        )


def should_call_llm(
    budget: BudgetLedger,
    rate: RateLimitLedger,
    llm_enabled: bool,
    has_api_key: bool,
    estimated_usd: float = 0.001,
) -> tuple[bool, str]:
    """D-GUARD 7-step ladder.

    Returns (ok, reason). reason is "ok" on success or a short diagnostic
    describing which ladder step blocked the call.

    Ordering is constitutional: downstream plans rely on this exact
    precedence. Changing the order without updating test_should_call_llm_ordering_*
    tests is a spec violation.
    """
    # Step 1: sleep.llm_enabled toggle.
    if not llm_enabled:
        return False, "sleep.llm_enabled=false"
    # Step 2: credentials.
    if not has_api_key:
        return False, "no api key"
    # Step 3 + 4: budget caps (daily, then monthly). can_spend tests both.
    ok, reason = budget.can_spend(estimated_usd)
    if not ok:
        return False, reason
    # Step 5: rate-limit cooldown.
    if rate.in_cooldown():
        return False, "ratelimit cooldown (last 429 < 15min)"
    # Steps 6-7 are caller's responsibility (retry + 429 recording).
    return True, "ok"
Initial release: iai-mcp v0.1.0 Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com> 2026-05-06 01:04:47 -07:00			`"""D-GUARD: graceful-degradation ladder before any LLM call.`

			Every LLM-dependent operation must pass through `should_call_llm`
			`BEFORE making an API call. The 7-step ladder (D-GUARD):`

			`1. sleep.llm_enabled=true? else Tier 0`
			`2. API key present? else Tier 0`
			`3. BudgetLedger daily cap OK? else Tier 0`
			`4. BudgetLedger monthly cap OK? else Tier 0`
			`5. RateLimitLedger: last 429 > 15 min ago? else Tier 0 this cycle`
			`6. API call with retry(max=2, exp backoff) + timeout(60s) -- caller's job`
			`7. On 429/400/401/5xx -> record in ledger, Tier 0 this cycle -- caller's job`

			`Write & read paths (memory_recall/reinforce/contradict, profile_get/set,`
			`session_start) NEVER block on LLM failure. LLM failures only reduce the QUALITY`
			`of semantic consolidation, schema induction, and identity refinement.`

			`Budget defaults: daily_usd_cap=$0.10, monthly_usd_cap=$3.00,`
			`cooldown=15min, on_cap_hit=fallback_to_local.`

			`BudgetLedger + RateLimitLedger persist in LanceDB tables (budget_ledger,`
			`ratelimit_ledger) created by MemoryStore._ensure_tables.`
			`"""`
			`from __future__ import annotations`

			`from datetime import datetime, timedelta, timezone`

			`from iai_mcp.store import BUDGET_TABLE, RATELIMIT_TABLE, MemoryStore`


			`# D-GUARD defaults`
			`BUDGET_DAILY_USD_DEFAULT = 0.10`
			`BUDGET_MONTHLY_USD_DEFAULT = 3.00`
			`RATELIMIT_COOLDOWN_MIN = 15`


			`class BudgetLedger:`
			`"""LanceDB-backed daily + monthly USD spend tracker (D-GUARD).`

			`Caps default to $0.10/day and $3.00/month. Both are advisory (no OS-level`
			`enforcement); caller inspects can_spend() before invoking an LLM API.`
			`"""`

			`def __init__(`
			`self,`
			`store: MemoryStore,`
			`daily_usd_cap: float = BUDGET_DAILY_USD_DEFAULT,`
			`monthly_usd_cap: float = BUDGET_MONTHLY_USD_DEFAULT,`
			`) -> None:`
			`self.store = store`
			`self.daily_cap = float(daily_usd_cap)`
			`self.monthly_cap = float(monthly_usd_cap)`

			`# ---- internal helpers`

			`def _today_utc(self) -> str:`
			`return datetime.now(timezone.utc).strftime("%Y-%m-%d")`

			`def _this_month(self) -> str:`
			`return datetime.now(timezone.utc).strftime("%Y-%m")`

			`# ---- queries`

			`def daily_used(self) -> float:`
			`"""Sum of usd_spent rows for today (UTC)."""`
			`tbl = self.store.db.open_table(BUDGET_TABLE)`
			`df = tbl.to_pandas()`
			`if df.empty:`
			`return 0.0`
			`today = df[df["date"] == self._today_utc()]`
			`return float(today["usd_spent"].sum()) if not today.empty else 0.0`

			`def monthly_used(self) -> float:`
			`"""Sum of usd_spent rows for the current month (UTC)."""`
			`tbl = self.store.db.open_table(BUDGET_TABLE)`
			`df = tbl.to_pandas()`
			`if df.empty:`
			`return 0.0`
			`mo = df[df["date"].str.startswith(self._this_month())]`
			`return float(mo["usd_spent"].sum()) if not mo.empty else 0.0`

			`def can_spend(self, usd: float) -> tuple[bool, str]:`
			`"""Return (ok, reason). reason is "" on success."""`
			`daily = self.daily_used()`
			`if daily + float(usd) > self.daily_cap:`
			`return (`
			`False,`
			`f"daily cap exceeded (used {daily:.4f} + {float(usd):.4f} "`
			`f"> {self.daily_cap:.4f})",`
			`)`
			`monthly = self.monthly_used()`
			`if monthly + float(usd) > self.monthly_cap:`
			`return (`
			`False,`
			`f"monthly cap exceeded (used {monthly:.4f} + {float(usd):.4f} "`
			`f"> {self.monthly_cap:.4f})",`
			`)`
			`return True, ""`

			`# ---- writes`

			`def record_spend(self, usd: float, kind: str = "llm") -> None:`
			`"""Persist a spend event to the ledger."""`
			`tbl = self.store.db.open_table(BUDGET_TABLE)`
			`tbl.add(`
			`[`
			`{`
			`"date": self._today_utc(),`
			`"usd_spent": float(usd),`
			`"kind": kind,`
			`"ts": datetime.now(timezone.utc),`
			`}`
			`]`
			`)`


			`class RateLimitLedger:`
			`"""LanceDB-backed 429 history with 15-min cooldown (D-GUARD)."""`

			`def __init__(`
			`self,`
			`store: MemoryStore,`
			`cooldown_minutes: int = RATELIMIT_COOLDOWN_MIN,`
			`) -> None:`
			`self.store = store`
			`self.cooldown = timedelta(minutes=int(cooldown_minutes))`

			`def in_cooldown(self) -> bool:`
			"""True iff the most recent 429 was less than `cooldown_minutes` ago."""
			`tbl = self.store.db.open_table(RATELIMIT_TABLE)`
			`df = tbl.to_pandas()`
			`if df.empty:`
			`return False`
			`latest = df["ts"].max()`
			`# Pandas timestamp -> python datetime; may be naive on some backends.`
			`try:`
			`py = latest.to_pydatetime()`
			`except AttributeError:`
			`py = latest`
			`if py.tzinfo is None:`
			`py = py.replace(tzinfo=timezone.utc)`
			`return (datetime.now(timezone.utc) - py) < self.cooldown`

			`def record_429(self, endpoint: str = "anthropic") -> None:`
			`"""Record a 429 hit; subsequent in_cooldown() calls will see it."""`
			`tbl = self.store.db.open_table(RATELIMIT_TABLE)`
			`tbl.add(`
			`[`
			`{`
			`"ts": datetime.now(timezone.utc),`
			`"status_code": 429,`
			`"endpoint": endpoint,`
			`}`
			`]`
			`)`


			`def should_call_llm(`
			`budget: BudgetLedger,`
			`rate: RateLimitLedger,`
			`llm_enabled: bool,`
			`has_api_key: bool,`
			`estimated_usd: float = 0.001,`
			`) -> tuple[bool, str]:`
			`"""D-GUARD 7-step ladder.`

			`Returns (ok, reason). reason is "ok" on success or a short diagnostic`
			`describing which ladder step blocked the call.`

			`Ordering is constitutional: downstream plans rely on this exact`
			`precedence. Changing the order without updating test_should_call_llm_ordering_*`
			`tests is a spec violation.`
			`"""`
			`# Step 1: sleep.llm_enabled toggle.`
			`if not llm_enabled:`
			`return False, "sleep.llm_enabled=false"`
			`# Step 2: credentials.`
			`if not has_api_key:`
			`return False, "no api key"`
			`# Step 3 + 4: budget caps (daily, then monthly). can_spend tests both.`
			`ok, reason = budget.can_spend(estimated_usd)`
			`if not ok:`
			`return False, reason`
			`# Step 5: rate-limit cooldown.`
			`if rate.in_cooldown():`
			`return False, "ratelimit cooldown (last 429 < 15min)"`
			`# Steps 6-7 are caller's responsibility (retry + 429 recording).`
			`return True, "ok"`