Initial release: iai-mcp v0.1.0

Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
2026-05-06 01:04:47 -07:00 · 2026-05-06 01:04:47 -07:00 · f6b876fbe7
commit f6b876fbe7
332 changed files with 97258 additions and 0 deletions
--- a/src/iai_mcp/insight.py
+++ b/src/iai_mcp/insight.py
@ -0,0 +1,267 @@
+"""Lucid moment orchestration -- (D-13 Option A).
+
+The "main insight of the day": exactly ONE `claude -p` subprocess call per
+night, at the end of the last REM cycle. The prompt is built from 3 locally-
+extracted schema patterns + 1 surprising episode; Claude distils them into a
+single unifying insight of 1-2 sentences which we store as a semantic-tier
+record tagged `overnight_insight`.
+
+Constitutional guards:
+- LOCAL is the primary worker. This module owns the single surgical
+  Claude call; all other consolidation work is pure-numpy/NetworkX/TF-IDF.
+- the call goes through host_cli.invoke_host_once which scrubs
+  the paid-API env var and validates the credentials.json subscription mode
+  before spawning the subprocess. This module NEVER references the paid-API
+  env var by name.
+- pre-flight budget gate via BudgetTracker.can_spend. A call that
+  would exceed the daily cap (overflow into weekly buffer) is silently
+  skipped, queued implicitly for the next night.
+- Bug #43333: cost_usd > 0 from invoke_host_once is recorded by the wrapper
+  (BudgetTracker.disable_host). This module short-circuits on host_disabled
+  so the bad call never repeats.
+- / C5: the inserted MemoryRecord is assembled once from Claude's
+  text response; we do NOT rewrite literal_surface after insert.
+"""
+from __future__ import annotations
+
+import asyncio
+import uuid
+from datetime import datetime, timezone
+from typing import Any
+from uuid import uuid4
+
+from iai_mcp.host_cli import (
+    BudgetTracker,
+    invoke_host_once,
+    verify_credentials_subscription,
+)
+from iai_mcp.daemon_state import load_state
+from iai_mcp.events import query_events, write_event
+from iai_mcp.schema import induce_schemas_tier0
+from iai_mcp.tz import load_user_tz
+from iai_mcp.types import MemoryRecord
+
+# Option A prompt template. The fragments "3 locally-found patterns",
+# "1 surprising episode", "unifying insight", and "1-2 sentences" are verbatim
+# per the locked decision; grep tests assert they appear unmodified.
+INSIGHT_PROMPT_TEMPLATE: str = (
+    "Here are 3 locally-found patterns from today + 1 surprising episode. "
+    "What is the unifying insight? Reply in 1-2 sentences.\n\n"
+    "Patterns:\n{patterns}\n\n"
+    "Surprise:\n{surprise}"
+)
+
+# Conservative pre-flight token estimate for the one nightly call -- covers
+# the prompt frame + patterns + surprise payload. Actual spend is recorded
+# post-call via BudgetTracker.record(tokens_in, tokens_out).
+PROMPT_ESTIMATE_TOKENS: int = 500
+
+# Kinds of events considered "surprising" for the prompt.
+_SURPRISE_KINDS: frozenset[str] = frozenset({
+    "art_gate_high_novelty",
+    "contradiction_detected",
+    "s4_contradiction",
+    "s5_drift",
+})
+
+
+def _gather_patterns(store) -> list[str]:
+    """Top-3 recent schema candidates by confidence. Graceful on empty."""
+    try:
+        schemas = induce_schemas_tier0(store) or []
+    except Exception:  # noqa: BLE001 -- pattern extraction must never crash insight
+        schemas = []
+
+    def _conf(s: Any) -> float:
+        # SchemaCandidate has .confidence; dicts may use the same key.
+        val = getattr(s, "confidence", None)
+        if val is None and isinstance(s, dict):
+            val = s.get("confidence")
+        try:
+            return float(val or 0.0)
+        except (TypeError, ValueError):
+            return 0.0
+
+    def _text(s: Any) -> str:
+        # SchemaCandidate exposes .pattern; dicts use "pattern" / "description".
+        for attr in ("pattern", "description", "summary"):
+            val = getattr(s, attr, None)
+            if val:
+                return str(val)
+            if isinstance(s, dict) and s.get(attr):
+                return str(s[attr])
+        return str(s)
+
+    schemas_sorted = sorted(schemas, key=_conf, reverse=True)
+    top3 = schemas_sorted[:3]
+    if not top3:
+        return ["[no patterns yet]"]
+    return [_text(s) for s in top3]
+
+
+def _gather_surprise(store) -> str:
+    """Most recent surprising event over the last 24h. Graceful on empty."""
+    try:
+        since = datetime.now(timezone.utc).replace(
+            hour=0, minute=0, second=0, microsecond=0,
+        )
+        candidates = query_events(store, since=since, limit=1000) or []
+    except Exception:  # noqa: BLE001 -- event query must never crash insight
+        candidates = []
+
+    for event in candidates:
+        if event.get("kind") in _SURPRISE_KINDS:
+            data = event.get("data") or event
+            return str(data)[:500]
+    return "[no surprise yet]"
+
+
+async def generate_overnight_insight(store, session_id: str) -> dict:
+    """Orchestrate the Option A Claude call.
+
+    Returns a structured dict. Shape (always present): ok (bool), reason
+    (str | None), text (str | None). Success result also carries
+    tokens_in / tokens_out for the caller's bookkeeping.
+
+    Pre-flight gate sequence (every one MUST pass before spawning subprocess):
+        1. verify_credentials_subscription (bug #43333 layer 2)
+        2. BudgetTracker.host_disabled_after_billing_event (bug #43333 layer 3)
+        3. BudgetTracker.can_spend(PROMPT_ESTIMATE_TOKENS) (D-15 budget)
+    """
+    creds = verify_credentials_subscription()
+    if not creds.get("ok"):
+        return {
+            "ok": False,
+            "reason": "credentials_check_failed",
+            "text": None,
+            "details": creds,
+        }
+
+    state = load_state()
+    tracker = BudgetTracker(state)
+
+    try:
+        tz = load_user_tz()
+    except Exception:  # noqa: BLE001 -- tz lookup never crashes the call path
+        tz = timezone.utc  # naive fallback; reset_if_new_day handles both
+
+    now = datetime.now(timezone.utc)
+    tracker.reset_if_new_day(now, tz)
+
+    if tracker.host_disabled_after_billing_event():
+        return {"ok": False, "reason": "host_disabled_c3", "text": None}
+
+    if not tracker.can_spend(PROMPT_ESTIMATE_TOKENS):
+        return {"ok": False, "reason": "budget_exceeded", "text": None}
+
+    patterns = _gather_patterns(store)
+    surprise = _gather_surprise(store)
+    prompt = INSIGHT_PROMPT_TEMPLATE.format(
+        patterns="\n".join(f"- {p}" for p in patterns),
+        surprise=surprise,
+    )
+
+    result = await invoke_host_once(prompt, model="haiku")
+
+    # Record any tokens the call actually spent (host_cli returns tokens
+    # even on non-ok paths when the subprocess completed).
+    tokens_in = int(result.get("tokens_in", 0) or 0)
+    tokens_out = int(result.get("tokens_out", 0) or 0)
+    if tokens_in + tokens_out > 0:
+        tracker.record(tokens_in, tokens_out, now)
+
+    if not result.get("ok"):
+        return {
+            "ok": False,
+            "reason": result.get("reason", "claude_call_failed"),
+            "text": None,
+            "details": {k: v for k, v in result.items() if k != "data"},
+        }
+
+    data = result.get("data") or {}
+    insight_text = str(data.get("result", "")).strip()
+    if not insight_text:
+        return {"ok": False, "reason": "empty_insight", "text": None}
+
+    # Build the L1-tier record. MemoryRecord requires a large
+    # set of fields per schema; we default every non-essential field
+    # to a neutral value so the shield/crypto pipeline treats the insight as
+    # a plain semantic record subject to S4/S5 on-read contradiction.
+    embed_dim = getattr(store, "embed_dim", None) or 384
+    record = MemoryRecord(
+        id=uuid4(),
+        tier="semantic",
+        literal_surface=insight_text,
+        aaak_index="",
+        embedding=[0.0] * int(embed_dim),
+        community_id=None,
+        centrality=0.0,
+        detail_level=2,
+        pinned=False,
+        stability=0.0,
+        difficulty=0.0,
+        last_reviewed=None,
+        never_decay=False,
+        never_merge=False,
+        provenance=[{
+            "ts": now.isoformat(),
+            "cue": "overnight_insight",
+            "session_id": session_id,
+        }],
+        created_at=now,
+        updated_at=now,
+        tags=["overnight_insight"],
+        language="en",  # the prompt is English-framed; insight is English.
+    )
+    # Dataclass has `tags` (list) not `tag` (scalar); we also expose `tag`
+    # via attribute assignment for callers that prefer the scalar form. This
+    # is NOT a literal_surface mutation so it does not violate C5 MEM-01.
+    try:
+        object.__setattr__(record, "tag", "overnight_insight")
+    except Exception:  # noqa: BLE001 -- attribute attach is best-effort
+        pass
+
+    try:
+        # R4 (researcher finding #3): wrap bare-sync store.insert
+        # to avoid blocking the asyncio event loop. Reached from
+        # dream.run_rem_cycle when claude_enabled=True (last cycle of REM).
+        # store.insert touches LanceDB write + encryption — not safe-fast.
+        await asyncio.to_thread(store.insert, record)
+    except Exception as exc:  # noqa: BLE001 -- store errors must not crash daemon
+        try:
+            write_event(
+                store,
+                "overnight_insight_store_error",
+                {"error": str(exc)[:500]},
+                severity="warning",
+            )
+        except Exception:
+            pass
+        return {
+            "ok": False,
+            "reason": "store_insert_failed",
+            "text": insight_text,
+            "error": str(exc)[:500],
+        }
+
+    try:
+        write_event(
+            store,
+            "overnight_insight_generated",
+            {
+                "session_id": session_id,
+                "text_len": len(insight_text),
+                "tokens_in": tokens_in,
+                "tokens_out": tokens_out,
+            },
+        )
+    except Exception:  # noqa: BLE001 -- event emission failure is non-fatal
+        pass
+
+    return {
+        "ok": True,
+        "text": insight_text,
+        "reason": None,
+        "tokens_in": tokens_in,
+        "tokens_out": tokens_out,
+    }