Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
19
src/iai_mcp/__init__.py
Normal file
19
src/iai_mcp/__init__.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
"""IAI-MCP -- autistic-style persistent memory MCP server."""
|
||||
from iai_mcp.types import (
|
||||
MemoryRecord,
|
||||
MemoryHit,
|
||||
RecallResponse,
|
||||
EdgeUpdate,
|
||||
ReconsolidationReceipt,
|
||||
TIER_ENUM,
|
||||
)
|
||||
|
||||
__version__ = "0.1.0"
|
||||
__all__ = [
|
||||
"MemoryRecord",
|
||||
"MemoryHit",
|
||||
"RecallResponse",
|
||||
"EdgeUpdate",
|
||||
"ReconsolidationReceipt",
|
||||
"TIER_ENUM",
|
||||
]
|
||||
245
src/iai_mcp/aaak.py
Normal file
245
src/iai_mcp/aaak.py
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
"""AAAK index generator + English-Only storage enforcement.
|
||||
|
||||
Phase 1 constitutional rule:
|
||||
Storage is raw verbatim English always. AAAK is a RETRIEVAL VIEW only.
|
||||
|
||||
Phase 2 (superseded):
|
||||
Storage was briefly amended to raw verbatim in the user's original language.
|
||||
Every MemoryRecord carries an ISO-639-1 `language` tag retained as a column
|
||||
on legacy rows from that era.
|
||||
|
||||
Plan 05-08 (2026-04-19) restored the English-Only Brain (D-08 spirit):
|
||||
The surface (Claude) translates inbound text to English; storage holds the
|
||||
English form. The `language` column is retained for legacy compatibility;
|
||||
new records default to "en". Embedding default is bge-small-en-v1.5 (384d,
|
||||
English) per Plan 05-08.
|
||||
|
||||
This module provides:
|
||||
|
||||
- `generate_aaak_index(record)` -- builds a `W:<wing>/R:<room>/E:<entities>/T:<tags>`
|
||||
metadata string from a MemoryRecord's tier, community_id and tags. The returned
|
||||
string is guaranteed to contain none of record.literal_surface.
|
||||
|
||||
- `parse_aaak_index(idx)` -- inverse of the generator, returning a
|
||||
{wing, room, entities, tags} dict. Round-trips the entities/tags lists.
|
||||
|
||||
- `enforce_language_tagged(record, detect=False)` -- guard.
|
||||
Raises ValueError if record.language is empty and detect is False. When
|
||||
detect=True, runs langdetect on literal_surface; mutates record.language
|
||||
with the detected code if confidence >= 0.7, else raises. Empty text with
|
||||
detect=True defaults to "en" without raising.
|
||||
|
||||
- `enforce_english_raw(record)` -- shim retained for backward compat.
|
||||
Delegates to enforce_language_tagged for records with a language tag set;
|
||||
preserves Cyrillic/CJK rejection for records without one unless
|
||||
`raw:<lang>` tag is present.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
# constitutional: confidence threshold below which langdetect refuses.
|
||||
LANGDETECT_MIN_CONFIDENCE = 0.7
|
||||
|
||||
|
||||
# --------------------------------------------------------------- script regex
|
||||
# Covered: Cyrillic (Russian et al), Hiragana, Katakana, CJK Unified Ideographs.
|
||||
# Sufficient for (the three scripts the project explicitly documents
|
||||
# as needing `raw:<lang>` handling). Extend the alphabet list in only
|
||||
# if a genuine storage bug surfaces -- don't speculate.
|
||||
CYRILLIC = re.compile(r"[\u0400-\u04FF]") # U+0400..U+04FF
|
||||
HIRAGANA_KATAKANA = re.compile(r"[\u3040-\u30FF]") # U+3040..U+30FF
|
||||
CJK = re.compile(r"[\u4E00-\u9FFF]") # U+4E00..U+9FFF Unified Ideographs
|
||||
|
||||
|
||||
# ---------------------------------------------- tier -> wing alphabet (TOK-10)
|
||||
_TIER_TO_WING = {
|
||||
"working": "W",
|
||||
"episodic": "E",
|
||||
"semantic": "S",
|
||||
"procedural": "P",
|
||||
"parametric": "\u03a0", # Pi glyph -- distinct from Latin P
|
||||
}
|
||||
|
||||
|
||||
def _wing_from_tier(tier: str) -> str:
|
||||
return _TIER_TO_WING.get(tier, "unknown")
|
||||
|
||||
|
||||
def _room_from_community(record: "MemoryRecord") -> str:
|
||||
"""First 8 chars of community UUID; "unknown" if community not yet assigned.
|
||||
|
||||
Plan 02 assigns community_id; Plan 03 L0/L1 pinned records may still have
|
||||
community_id=None (they're pinned by UUID, not graph position).
|
||||
"""
|
||||
if record.community_id is None:
|
||||
return "unknown"
|
||||
return str(record.community_id)[:8]
|
||||
|
||||
|
||||
def _entities_from_tags(tags: list[str]) -> str:
|
||||
"""Up to 10 tags prefixed `entity:` (prefix stripped), joined by `,`.
|
||||
|
||||
`"-"` if none found, so the generator output has a stable shape with
|
||||
exactly 3 `/` separators regardless of tag content.
|
||||
"""
|
||||
ents = [t[len("entity:"):] for t in tags if t.startswith("entity:")][:10]
|
||||
if not ents:
|
||||
return "-"
|
||||
return ",".join(ents)
|
||||
|
||||
|
||||
def _tagline(tags: list[str]) -> str:
|
||||
"""Up to 10 non-entity tags joined by `,`. `"-"` if none."""
|
||||
non_ents = [t for t in tags if not t.startswith("entity:")][:10]
|
||||
if not non_ents:
|
||||
return "-"
|
||||
return ",".join(non_ents)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- public API
|
||||
|
||||
|
||||
def generate_aaak_index(record: "MemoryRecord") -> str:
|
||||
"""Build the AAAK index string for a record (D-08, TOK-10).
|
||||
|
||||
Format: `W:<wing>/R:<room>/E:<entities>/T:<tags>`
|
||||
|
||||
Guarantees:
|
||||
- Exactly 3 `/` separators regardless of content.
|
||||
- Contains NO substring of `record.literal_surface`. Verified by
|
||||
`tests/test_aaak.py::test_aaak_index_does_not_contain_literal_surface`.
|
||||
- Deterministic: same record -> same index on repeat calls.
|
||||
"""
|
||||
wing = _wing_from_tier(record.tier)
|
||||
room = _room_from_community(record)
|
||||
entities = _entities_from_tags(record.tags)
|
||||
tags = _tagline(record.tags)
|
||||
return f"W:{wing}/R:{room}/E:{entities}/T:{tags}"
|
||||
|
||||
|
||||
def parse_aaak_index(idx: str) -> dict[str, list[str]]:
|
||||
"""Inverse of generate_aaak_index. Returns wing/room/entities/tags lists.
|
||||
|
||||
Each value is a list (even wing/room which are single strings) so callers
|
||||
have a uniform shape. Unknown keys are ignored. Empty-value `-` becomes [].
|
||||
"""
|
||||
out: dict[str, list[str]] = {
|
||||
"wing": [],
|
||||
"room": [],
|
||||
"entities": [],
|
||||
"tags": [],
|
||||
}
|
||||
key_map = {"W": "wing", "R": "room", "E": "entities", "T": "tags"}
|
||||
for seg in idx.split("/"):
|
||||
if ":" not in seg:
|
||||
continue
|
||||
k, _, v = seg.partition(":")
|
||||
if k not in key_map:
|
||||
continue
|
||||
name = key_map[k]
|
||||
if v == "-" or v == "":
|
||||
out[name] = []
|
||||
else:
|
||||
# Wing/Room are single-token; entities/tags are comma-separated.
|
||||
if name in ("wing", "room"):
|
||||
out[name] = [v]
|
||||
else:
|
||||
out[name] = v.split(",")
|
||||
return out
|
||||
|
||||
|
||||
def enforce_language_tagged(
|
||||
record: "MemoryRecord",
|
||||
*,
|
||||
detect: bool = False,
|
||||
) -> None:
|
||||
"""D-08a constitutional: every Phase-2+ record MUST carry a language tag.
|
||||
|
||||
When record.language is a non-empty string, the guard passes unconditionally
|
||||
(the column is retained for legacy compatibility; the English-Only Brain
|
||||
pivot in means new records default to "en").
|
||||
|
||||
When record.language is empty/missing and detect is False, raises
|
||||
ValueError("constitutional violation: ...") because storage is
|
||||
tag-addressable -- not defaulting to English.
|
||||
|
||||
When detect=True and language is empty:
|
||||
- If literal_surface is empty/whitespace, sets language="en" and returns.
|
||||
- Else runs langdetect; if top candidate has probability >= 0.7 (D-08a
|
||||
threshold), mutates record.language with the detected code.
|
||||
- If langdetect fails or confidence < 0.7, raises ValueError.
|
||||
|
||||
The seed for langdetect's DetectorFactory is fixed at 42 so the same text
|
||||
always produces the same language code across runs.
|
||||
"""
|
||||
if record.language and isinstance(record.language, str) and record.language.strip():
|
||||
return # already tagged; accept
|
||||
|
||||
if not detect:
|
||||
raise ValueError(
|
||||
"constitutional violation: record.language is required. "
|
||||
"Pass detect=True to auto-detect via langdetect."
|
||||
)
|
||||
|
||||
text = record.literal_surface or ""
|
||||
if not text.strip():
|
||||
record.language = "en" # empty -> default en
|
||||
return
|
||||
|
||||
try:
|
||||
from langdetect import DetectorFactory, detect_langs
|
||||
DetectorFactory.seed = 42 # determinism
|
||||
candidates = detect_langs(text)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"constitutional violation: langdetect failed on record text: {e}"
|
||||
)
|
||||
|
||||
if not candidates or candidates[0].prob < LANGDETECT_MIN_CONFIDENCE:
|
||||
top = candidates[0] if candidates else None
|
||||
raise ValueError(
|
||||
f"constitutional violation: langdetect confidence too low "
|
||||
f"(<{LANGDETECT_MIN_CONFIDENCE}); top candidate={top}"
|
||||
)
|
||||
|
||||
record.language = candidates[0].lang
|
||||
|
||||
|
||||
def enforce_english_raw(record: "MemoryRecord") -> None:
|
||||
"""Phase 1 shim -- preserves the original script-based guard.
|
||||
|
||||
semantics (retained byte-for-byte for backward compatibility):
|
||||
- `raw:<lang>` tag present on record -> accept (explicit raw capture)
|
||||
- literal_surface contains Cyrillic / Hiragana / Katakana / CJK codepoints
|
||||
and no `raw:<lang>` tag -> raise ValueError("constitutional ...")
|
||||
- else -> accept
|
||||
|
||||
The guard is exposed as `enforce_language_tagged`. Downstream
|
||||
plans that want native-language storage should import that directly
|
||||
instead of this shim. This function is kept so the test fixtures
|
||||
(tests/test_aaak.py, tests/test_provenance.py) continue to assert the
|
||||
exact rejection behaviour they documented.
|
||||
"""
|
||||
text = record.literal_surface or ""
|
||||
has_non_english = bool(
|
||||
CYRILLIC.search(text)
|
||||
or HIRAGANA_KATAKANA.search(text)
|
||||
or CJK.search(text)
|
||||
)
|
||||
if not has_non_english:
|
||||
return
|
||||
|
||||
# Caller opted in via `raw:<lang>` tag -> accept.
|
||||
if any(t.startswith("raw:") for t in record.tags):
|
||||
return
|
||||
|
||||
raise ValueError(
|
||||
"constitutional violation: literal_surface contains non-English "
|
||||
"characters; storage must be English raw verbatim (D-08, TOK-10). "
|
||||
"Add 'raw:<lang>' tag to declare explicit raw capture."
|
||||
)
|
||||
155
src/iai_mcp/batch.py
Normal file
155
src/iai_mcp/batch.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
"""TOK-09 Batch API consolidation (Plan 02-04 Task 3, D-29).
|
||||
|
||||
D-29 (unified daily process): when Tier 1 is enabled + credentials + budget
|
||||
+ rate-limit all green (D-GUARD ladder via should_call_llm), submit a batch
|
||||
to Anthropic's Batch API at 50% discount vs synchronous calls. Falls back
|
||||
to Tier 0 stub results on any gate failure or SDK absence.
|
||||
|
||||
Plan 02-04 scope: the D-GUARD gate + budget side-effect + llm_health event
|
||||
emission are load-bearing. The actual anthropic.batches.create call is
|
||||
scaffolded behind a lazy import; when the SDK surface differs from what the
|
||||
Python core expects (e.g. version skew), the stub returns an empty result
|
||||
list and records llm_health fallback. Plan 03 / future phases own the real
|
||||
wire-up once the SDK API settles.
|
||||
|
||||
Pricing model:
|
||||
- Haiku 4.5 approx sync cost: prompt $0.25 / 1M tokens + output $1.25 / 1M
|
||||
- Batch discount: 50% off sync cost.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
|
||||
|
||||
# 50% discount vs sync tier.
|
||||
BATCH_DISCOUNT = 0.5
|
||||
|
||||
# scope: we do not poll in-process. Real-world Batch API can take
|
||||
# up to ~24h. The dispatch path is "submit -> return (True, 'ok', stub)" with
|
||||
# the actual results arriving via a future polling job. Tests assert the
|
||||
# gate + side-effects; the stub list is empty in Phase 2.
|
||||
BATCH_POLL_TIMEOUT_SEC = 60
|
||||
|
||||
# Haiku 4.5 approximate sync pricing (USD per 1M tokens).
|
||||
_HAIKU_PROMPT_USD_PER_MTOK = 0.25
|
||||
_HAIKU_OUTPUT_USD_PER_MTOK = 1.25
|
||||
|
||||
|
||||
def _sync_tier_cost(prompt_tokens: int, output_tokens: int) -> float:
|
||||
"""Approximate sync-tier USD cost for Haiku 4.5.
|
||||
|
||||
uses Haiku 4.5 for consolidation. Pricing is approximate and may
|
||||
drift; the gate uses this only for budget-cap decisions (D-GUARD step
|
||||
3+4), never for billing reconciliation.
|
||||
"""
|
||||
p = (float(prompt_tokens) / 1_000_000.0) * _HAIKU_PROMPT_USD_PER_MTOK
|
||||
o = (float(output_tokens) / 1_000_000.0) * _HAIKU_OUTPUT_USD_PER_MTOK
|
||||
return float(p + o)
|
||||
|
||||
|
||||
def _aggregate_estimated_usd(tasks: list[dict]) -> float:
|
||||
total_sync = 0.0
|
||||
for t in tasks:
|
||||
total_sync += _sync_tier_cost(
|
||||
int(t.get("prompt_tok", 0)),
|
||||
int(t.get("output_tok", 0)),
|
||||
)
|
||||
return total_sync * BATCH_DISCOUNT
|
||||
|
||||
|
||||
def submit_batch_consolidation(
|
||||
store,
|
||||
tasks: list[dict],
|
||||
budget: BudgetLedger,
|
||||
rate: RateLimitLedger,
|
||||
llm_enabled: bool = True,
|
||||
) -> tuple[bool, str, list[dict]]:
|
||||
"""Submit a batch of consolidation tasks to the Anthropic Batch API.
|
||||
|
||||
Returns (ok, reason, results). On any D-GUARD fallback, ok=False and
|
||||
results is an empty list; the caller falls back to local Tier 0 output.
|
||||
|
||||
Gate ordering (D-GUARD):
|
||||
1. llm_enabled toggle
|
||||
2. API key present
|
||||
3. Budget daily + monthly caps (can_spend)
|
||||
4. Rate-limit cooldown (last 429 < 15 min)
|
||||
5. SDK import path
|
||||
6. Real batch submission (Plan 02-04 stub; see module docstring)
|
||||
"""
|
||||
has_key = bool(os.environ.get("ANTHROPIC_API_KEY"))
|
||||
estimated_usd = _aggregate_estimated_usd(tasks)
|
||||
|
||||
ok, reason = should_call_llm(
|
||||
budget=budget,
|
||||
rate=rate,
|
||||
llm_enabled=llm_enabled,
|
||||
has_api_key=has_key,
|
||||
estimated_usd=estimated_usd,
|
||||
)
|
||||
if not ok:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "batch_consolidation",
|
||||
"tier": "fallback",
|
||||
"reason": reason,
|
||||
"task_count": len(tasks),
|
||||
"estimated_usd": estimated_usd,
|
||||
},
|
||||
severity="warning",
|
||||
)
|
||||
return False, reason, []
|
||||
|
||||
# Eligible path: lazy import the SDK. On ImportError or any runtime
|
||||
# failure, log critical and fall back. This is also how the current Plan
|
||||
# 02-04 scaffold returns -- the real batch submission is stubbed (the
|
||||
# SDK surface for batches.create has changed across minor versions).
|
||||
try:
|
||||
import anthropic # noqa: F401
|
||||
except Exception as exc:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "batch_consolidation",
|
||||
"tier": "fallback",
|
||||
"error": f"import anthropic: {exc}",
|
||||
},
|
||||
severity="critical",
|
||||
)
|
||||
return False, f"SDK unavailable: {exc}", []
|
||||
|
||||
# H-02 FIX (Phase 2 gap closure): budget stays untouched and
|
||||
# effective_tier stays tier0 until a REAL successful anthropic.batches.create
|
||||
# response lands. The previous behaviour called budget.record_spend + returned
|
||||
# (True, "ok", []), which caused run_heavy_consolidation to flip
|
||||
# effective_tier=tier1 and debit the BudgetLedger on a stub producing zero
|
||||
# output -- corrupts D-GUARD audit honesty + cost accounting.
|
||||
#
|
||||
# Real SDK wire-up is scope. Until then the scaffold is honestly
|
||||
# documented via an info-severity llm_health event so `iai-mcp audit`
|
||||
# observers can see the gap explicitly.
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "batch_consolidation",
|
||||
"tier": "fallback",
|
||||
"task_count": len(tasks),
|
||||
"estimated_usd": estimated_usd,
|
||||
"note": (
|
||||
"Plan 02-06 disables the scaffold-true return; "
|
||||
"real anthropic.batches.create wire-up is Phase 3. Budget "
|
||||
"stays untouched and effective_tier stays tier0 until a "
|
||||
"real successful SDK response lands."
|
||||
),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
return False, "stub: batch API not yet wired", []
|
||||
301
src/iai_mcp/bedtime.py
Normal file
301
src/iai_mcp/bedtime.py
Normal file
|
|
@ -0,0 +1,301 @@
|
|||
"""Phase 4 -- bedtime wind-down detection (DAEMON-06, D-08/D-09/D-11).
|
||||
|
||||
Dual-gate bedtime suggestion emitter:
|
||||
Gate A: wind-down phrase regex match per language (D-11, 8 languages)
|
||||
Gate B: late in learned quiet window (inside OR within 30min of start, D-09)
|
||||
|
||||
When BOTH gates pass, `detect_wind_down` returns a small dict that `core.py`
|
||||
injects into `memory_recall` responses as `sleep_suggestion`. Claude (the
|
||||
LLM in the active session) decides social framing -- our code NEVER hardcodes
|
||||
user-facing phrasing.
|
||||
|
||||
Constitutional guard:
|
||||
- C2: this module does NOT initiate sleep. It only suggests. The only path
|
||||
that moves the daemon into SLEEP is `core.handle_initiate_sleep_mode`
|
||||
with `consent=True`. No auto-start in this file.
|
||||
- C5 / this module is read-only w.r.t. records. It reads `cue`
|
||||
strings; it NEVER mutates `literal_surface`.
|
||||
- C6: no fcntl, no daemon state mutation. All logic is pure in-process.
|
||||
|
||||
Patterns mirror `shield.py`'s 8-language dict style (same language set:
|
||||
en/ru/ja/ar/de/fr/es/zh per global-product mandate). Latin-script
|
||||
languages use `\b` word boundaries; CJK / Arabic use character-class
|
||||
proximity and whitespace-tolerant forms since Unicode `\b` is unreliable
|
||||
across scripts.
|
||||
|
||||
ReDoS-safe: every pattern uses bounded quantifiers only. No nested `(.+)+`
|
||||
constructs, no `.*.*`. Stress-tested against 10KB of "a"s under 100ms total.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Optional, Tuple
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from iai_mcp.quiet_window import BUCKET_MINUTES
|
||||
|
||||
|
||||
# ------------------------------------------------------------ constants
|
||||
|
||||
# dual-gate: within this many minutes of the learned quiet-window start
|
||||
# also counts as "late" (a user who says "good night" 25 minutes before their
|
||||
# usual quiet window is winding down, not speaking rhetorically).
|
||||
WIND_DOWN_GATE_MINUTES_BEFORE: int = 30
|
||||
|
||||
|
||||
# ------------------------------------------------------------ per-language regex
|
||||
|
||||
# English wind-down phrases. Case-insensitive match.
|
||||
WIND_DOWN_EN: list[str] = [
|
||||
r"\bgood\s*night\b",
|
||||
r"\bgoodnight\b",
|
||||
r"\bnight[,!.]?\s*$",
|
||||
r"\bI'?m\s+(heading|going)\s+to\s+bed\b",
|
||||
r"\b(time\s+(to|for)\s+bed|bedtime)\b",
|
||||
r"\bI'?m\s+(tired|exhausted|sleepy)\b",
|
||||
r"\b(catch\s+you\s+tomorrow|see\s+you\s+tomorrow)\b",
|
||||
r"\blet'?s\s+(continue|pick\s+up)\s+tomorrow\b",
|
||||
r"\bgoing\s+to\s+sleep\b",
|
||||
]
|
||||
|
||||
# Russian (same 8-language set as shield.py).
|
||||
WIND_DOWN_RU: list[str] = [
|
||||
r"спокойной\s+ночи",
|
||||
r"пойду\s+(спать|в\s+постель)",
|
||||
r"(я\s+)?(устал|устала|вымотан[аы]?|засыпаю)",
|
||||
r"пора\s+(спать|ложиться)",
|
||||
r"до\s+завтра",
|
||||
r"давай\s+завтра",
|
||||
r"ухожу\s+спать",
|
||||
r"(окей|ок|ладно),?\s+сплю",
|
||||
r"ложусь",
|
||||
]
|
||||
|
||||
# Japanese -- NREM cues + "see you tomorrow". No \b; lookaround on adjacent
|
||||
# punctuation / kana / CJK characters.
|
||||
WIND_DOWN_JA: list[str] = [
|
||||
r"お\s*や\s*す\s*み(なさい)?", # おやすみ / おやすみなさい
|
||||
r"寝\s*ます", # 寝ます
|
||||
r"(眠|ねむ)い", # 眠い / ねむい
|
||||
r"(寝る|ねる)(ね|よ|わ)?", # 寝る / ねる / 寝るね
|
||||
r"また\s*(明日|あした)", # また明日
|
||||
r"(疲|つか)れた", # 疲れた / つかれた
|
||||
r"ベッド\s*に\s*(入る|はいる)", # ベッドに入る
|
||||
]
|
||||
|
||||
# Arabic -- RTL script; use direct patterns.
|
||||
WIND_DOWN_AR: list[str] = [
|
||||
r"تصبح\s+على\s+خير",
|
||||
r"ليلة\s+سعيدة",
|
||||
r"أنا\s+(ذاهب|ذاهبة)\s+(للنوم|إلى\s+النوم)",
|
||||
r"أنا\s+(متعب|متعبة|تعبان[ةه]?)",
|
||||
r"سأنام",
|
||||
r"وقت\s+النوم",
|
||||
r"إلى\s+(الغد|اللقاء\s+غدا)",
|
||||
]
|
||||
|
||||
WIND_DOWN_DE: list[str] = [
|
||||
r"\bgute\s+nacht\b",
|
||||
r"\bgn8\b",
|
||||
r"\bich\s+gehe\s+(jetzt\s+)?(ins\s+bett|schlafen)\b",
|
||||
r"\b(ich\s+bin\s+)?(müde|kaputt|fertig)\b",
|
||||
r"\bschlafenszeit\b",
|
||||
r"\bbis\s+morgen\b",
|
||||
r"\blass\s+uns\s+morgen\s+weitermachen\b",
|
||||
]
|
||||
|
||||
WIND_DOWN_FR: list[str] = [
|
||||
r"\bbonne\s+nuit\b",
|
||||
r"\bje\s+(vais|pars)\s+(me\s+coucher|dormir)\b",
|
||||
r"\b(je\s+suis\s+)?(fatigu[ée]|[ée]puis[ée])\b",
|
||||
r"\b(il\s+est\s+)?l'?heure\s+de\s+(dormir|me\s+coucher)\b",
|
||||
r"\b[aà]\s+demain\b",
|
||||
r"\bon\s+reprend\s+demain\b",
|
||||
]
|
||||
|
||||
WIND_DOWN_ES: list[str] = [
|
||||
r"\bbuenas\s+noches\b",
|
||||
r"\bme\s+voy\s+a\s+(dormir|la\s+cama|descansar)\b",
|
||||
r"\b(estoy\s+)?(cansad[oa]|agotad[oa])\b",
|
||||
r"\bhora\s+de\s+dormir\b",
|
||||
r"\bhasta\s+ma[ñn]ana\b",
|
||||
r"\bseguimos\s+ma[ñn]ana\b",
|
||||
]
|
||||
|
||||
WIND_DOWN_ZH: list[str] = [
|
||||
r"晚\s*安", # 晚安
|
||||
r"我\s*(要|去)\s*睡\s*(觉|了)", # 我要睡觉 / 我去睡了
|
||||
r"累\s*了", # 累了
|
||||
r"(该|到)\s*睡\s*(觉)?\s*了", # 该睡了 / 到睡觉了
|
||||
r"明\s*天\s*见", # 明天见
|
||||
r"明\s*天\s*继\s*续", # 明天继续
|
||||
]
|
||||
|
||||
# language coverage: exactly the 8 languages shield.py supports.
|
||||
WIND_DOWN_BY_LANG: dict[str, list[str]] = {
|
||||
"en": WIND_DOWN_EN,
|
||||
"ru": WIND_DOWN_RU,
|
||||
"ja": WIND_DOWN_JA,
|
||||
"ar": WIND_DOWN_AR,
|
||||
"de": WIND_DOWN_DE,
|
||||
"fr": WIND_DOWN_FR,
|
||||
"es": WIND_DOWN_ES,
|
||||
"zh": WIND_DOWN_ZH,
|
||||
}
|
||||
|
||||
# Pre-compile every pattern once. IGNORECASE is safe on non-Latin scripts
|
||||
# (lowercasing is identity-preserving for CJK; Cyrillic handles cleanly).
|
||||
_COMPILED: dict[str, list[re.Pattern]] = {
|
||||
lang: [re.compile(p, re.IGNORECASE) for p in pats]
|
||||
for lang, pats in WIND_DOWN_BY_LANG.items()
|
||||
}
|
||||
|
||||
# Authoritative language set -- downstream greps against this constant.
|
||||
WIND_DOWN_LANGUAGES_SUPPORTED: frozenset[str] = frozenset(WIND_DOWN_BY_LANG.keys())
|
||||
|
||||
|
||||
# ------------------------------------------------------------ gate A: phrase match
|
||||
|
||||
|
||||
def detect_wind_down_phrase(cue: str, language: str) -> Tuple[bool, str]:
|
||||
"""Gate A: does the cue contain a wind-down phrase?
|
||||
|
||||
Policy mirrors shield.py: primary language is always tried; ALSO try
|
||||
English regardless of `language` because users cross-lingual mid-sentence
|
||||
("ok, going to sleep" in a Russian conversation is still a wind-down
|
||||
signal). We do NOT fall back to any other language beyond EN -- that
|
||||
would explode the FPR.
|
||||
|
||||
Returns (matched, matched_pattern). matched_pattern is the source regex
|
||||
string (not the compiled object) for audit/logging purposes.
|
||||
"""
|
||||
if not cue:
|
||||
return False, ""
|
||||
|
||||
# Primary language (when different from "en").
|
||||
for p in _COMPILED.get(language or "", []):
|
||||
if p.search(cue):
|
||||
return True, p.pattern
|
||||
|
||||
# Always also try EN if we haven't already.
|
||||
if language != "en":
|
||||
for p in _COMPILED["en"]:
|
||||
if p.search(cue):
|
||||
return True, p.pattern
|
||||
|
||||
return False, ""
|
||||
|
||||
|
||||
# ------------------------------------------------------------ gate B: late in quiet window
|
||||
|
||||
|
||||
def is_late_in_quiet_window(
|
||||
window: Optional[Tuple[int, int]],
|
||||
now: datetime,
|
||||
tz: ZoneInfo,
|
||||
) -> bool:
|
||||
"""Gate B: is `now` inside the quiet window OR within 30min of its start?
|
||||
|
||||
`window` is the (start_bucket, duration_buckets) pair emitted by
|
||||
`quiet_window.learn_quiet_window` -- start_bucket is an index into the
|
||||
48-bucket local-time day (30min each) and duration is the number of
|
||||
buckets. Returns False if no window is set (learn_quiet_window returned
|
||||
None, caller should be using the bootstrap 2h-idle trigger instead).
|
||||
|
||||
Wrap-around: a window starting at 22:00 and lasting 8h crosses local
|
||||
midnight; "inside" then means `cur >= start_minutes` OR `cur < end_minutes`.
|
||||
"""
|
||||
if not window:
|
||||
return False
|
||||
|
||||
start_bucket, duration = window
|
||||
try:
|
||||
now_local = now.astimezone(tz)
|
||||
except Exception:
|
||||
# DST edge or bad tz -- fail closed (don't suggest bedtime on
|
||||
# malformed input).
|
||||
return False
|
||||
|
||||
cur_minutes = now_local.hour * 60 + now_local.minute
|
||||
start_minutes = start_bucket * BUCKET_MINUTES
|
||||
end_minutes = (start_bucket + duration) * BUCKET_MINUTES
|
||||
|
||||
# Handle wrap-around midnight explicitly.
|
||||
if end_minutes > 24 * 60:
|
||||
wrapped_end = end_minutes - 24 * 60
|
||||
inside = cur_minutes >= start_minutes or cur_minutes < wrapped_end
|
||||
else:
|
||||
inside = start_minutes <= cur_minutes < end_minutes
|
||||
|
||||
if inside:
|
||||
return True
|
||||
|
||||
# Within 30min of start (cyclic -- a 21:45 cue for a 22:00 window counts).
|
||||
minutes_until_start = (start_minutes - cur_minutes) % (24 * 60)
|
||||
return 0 <= minutes_until_start <= WIND_DOWN_GATE_MINUTES_BEFORE
|
||||
|
||||
|
||||
# ------------------------------------------------------------ dual-gate detector
|
||||
|
||||
|
||||
def detect_wind_down(
|
||||
cue: str,
|
||||
language: str,
|
||||
state: dict,
|
||||
now: datetime,
|
||||
tz: ZoneInfo,
|
||||
) -> Optional[dict]:
|
||||
"""D-09 dual-gate bedtime detector.
|
||||
|
||||
Returns a `sleep_suggestion` dict when BOTH gates pass:
|
||||
Gate A: wind-down phrase match (primary lang + EN fallback)
|
||||
Gate B: late-in-learned-quiet-window (inside OR within 30min of start)
|
||||
|
||||
Returns None otherwise -- never a partial / fuzzy signal. Downstream
|
||||
consumers (`core._inject_sleep_suggestion`) key on the presence of the
|
||||
key, so None means the response simply does not carry `sleep_suggestion`.
|
||||
|
||||
Payload shape (small, no PII beyond the matched regex pattern):
|
||||
{
|
||||
"message_hint": "user_wind_down_detected",
|
||||
"matched_pattern": str,
|
||||
"quiet_window_start_bucket": int,
|
||||
"quiet_window_duration": int,
|
||||
}
|
||||
"""
|
||||
matched, pattern = detect_wind_down_phrase(cue, language)
|
||||
if not matched:
|
||||
return None
|
||||
|
||||
window = state.get("quiet_window") if isinstance(state, dict) else None
|
||||
if not window:
|
||||
return None
|
||||
if not is_late_in_quiet_window(window, now, tz):
|
||||
return None
|
||||
|
||||
start_bucket, duration = window
|
||||
return {
|
||||
"message_hint": "user_wind_down_detected",
|
||||
"matched_pattern": pattern,
|
||||
"quiet_window_start_bucket": int(start_bucket),
|
||||
"quiet_window_duration": int(duration),
|
||||
}
|
||||
|
||||
|
||||
__all__ = [
|
||||
"WIND_DOWN_AR",
|
||||
"WIND_DOWN_BY_LANG",
|
||||
"WIND_DOWN_DE",
|
||||
"WIND_DOWN_EN",
|
||||
"WIND_DOWN_ES",
|
||||
"WIND_DOWN_FR",
|
||||
"WIND_DOWN_GATE_MINUTES_BEFORE",
|
||||
"WIND_DOWN_JA",
|
||||
"WIND_DOWN_LANGUAGES_SUPPORTED",
|
||||
"WIND_DOWN_RU",
|
||||
"WIND_DOWN_ZH",
|
||||
"detect_wind_down",
|
||||
"detect_wind_down_phrase",
|
||||
"is_late_in_quiet_window",
|
||||
]
|
||||
179
src/iai_mcp/camouflaging.py
Normal file
179
src/iai_mcp/camouflaging.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
"""Plan 03-03 — camouflaging detector + register relaxer (ecological self-regulation).
|
||||
|
||||
Constitutional anchor:
|
||||
- Observes the user's SURFACE formality over a weekly sliding 5-point window.
|
||||
- On a sustained over-formal trajectory, adjusts OUR register (the 14th profile
|
||||
knob `camouflaging_relaxation`). NEVER pushes the user to change. NEVER models
|
||||
user internal-state (Cook 2021 / Raymaker 2020 — masking is out-of-scope).
|
||||
- Chapman 2021 ecological self-regulation framing: the system relaxes ITS OWN
|
||||
response register so the user does not have to match ours.
|
||||
|
||||
Detection (D-AUTIST13-03): sliding 5-point weekly window. Trigger condition:
|
||||
linear-regression slope > 0.05/week AND current mean > 0.6. Both must hold.
|
||||
|
||||
Event kinds emitted (new in Phase 3):
|
||||
- `formality_score_weekly` — weekly aggregate of the user's formality scores.
|
||||
- `camouflaging_detected` — the detector fired (over-formal trajectory confirmed).
|
||||
- `register_relaxed` — OUR `camouflaging_relaxation` knob was bumped UP (toward
|
||||
informal register in OUR responses).
|
||||
|
||||
Knob semantics: `camouflaging_relaxation` in [0, 1]. Higher = more relaxed OUR register.
|
||||
relax_register INCREMENTS the knob (pushing OUR output toward informal) when the user
|
||||
is observed to be over-formal. The user is never modified or nudged.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import numpy as np
|
||||
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.formality import formality_score
|
||||
from iai_mcp.profile import profile_get, profile_set
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- constants
|
||||
DEFAULT_WINDOW_SIZE: int = 5 # D-AUTIST13-03 sliding 5-point window
|
||||
DEFAULT_CADENCE_DAYS: int = 7 # weekly
|
||||
TRIGGER_SLOPE: float = 0.05 # formality delta per week floor
|
||||
TRIGGER_MEAN: float = 0.6 # absolute formality floor
|
||||
DEFAULT_DELTA: float = 0.1 # knob step per relaxation
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- detector
|
||||
def detect_camouflaging(
|
||||
store,
|
||||
*,
|
||||
window_size: int = DEFAULT_WINDOW_SIZE,
|
||||
cadence_days: int = DEFAULT_CADENCE_DAYS,
|
||||
) -> dict:
|
||||
"""Sliding 5-point weekly window detector (D-AUTIST13-03).
|
||||
|
||||
Reads the last `window_size` `formality_score_weekly` events, computes the
|
||||
linear-regression slope (numpy.polyfit deg=1), and the current mean. Detected
|
||||
iff slope > TRIGGER_SLOPE AND mean > TRIGGER_MEAN (both required).
|
||||
|
||||
Args:
|
||||
store: open MemoryStore.
|
||||
window_size: number of weekly points to consider (default 5).
|
||||
cadence_days: cadence label (default 7 = weekly); not used arithmetically
|
||||
but stored in event metadata by callers.
|
||||
|
||||
Returns:
|
||||
{detected: bool, trajectory_slope: float, current_mean: float, sample_count: int}.
|
||||
"""
|
||||
events = query_events(store, kind="formality_score_weekly", limit=window_size)
|
||||
# Events are newest-first; we want chronological order for slope.
|
||||
events = list(reversed(events))
|
||||
sample_count = len(events)
|
||||
|
||||
if sample_count < window_size:
|
||||
return {
|
||||
"detected": False,
|
||||
"trajectory_slope": 0.0,
|
||||
"current_mean": 0.0,
|
||||
"sample_count": sample_count,
|
||||
}
|
||||
|
||||
scores = np.asarray(
|
||||
[float(e["data"].get("score", 0.0)) for e in events], dtype=np.float64
|
||||
)
|
||||
xs = np.arange(len(scores), dtype=np.float64)
|
||||
slope, _intercept = np.polyfit(xs, scores, 1)
|
||||
current_mean = float(scores.mean())
|
||||
|
||||
detected = bool(slope > TRIGGER_SLOPE and current_mean > TRIGGER_MEAN)
|
||||
|
||||
return {
|
||||
"detected": detected,
|
||||
"trajectory_slope": float(slope),
|
||||
"current_mean": current_mean,
|
||||
"sample_count": sample_count,
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- relaxer
|
||||
def relax_register(store, *, delta: float = DEFAULT_DELTA) -> None:
|
||||
"""Bump profile.camouflaging_relaxation by delta (capped at 1.0).
|
||||
|
||||
Writes go through `profile.profile_set(..., store=store)` so the existing
|
||||
`profile_updated` event also fires alongside `register_relaxed`. This is the
|
||||
ONE pathway the system uses to relax its own register in response to a
|
||||
detected over-formal user trajectory (D-AUTIST13-02).
|
||||
"""
|
||||
import iai_mcp.core as core
|
||||
|
||||
current = core._profile_state.get("camouflaging_relaxation", 0.0)
|
||||
new_value = min(1.0, max(0.0, current + delta))
|
||||
|
||||
# Only call profile_set if the value actually changes; otherwise profile_set
|
||||
# will silently no-op and NOT emit profile_updated (correct behaviour).
|
||||
if new_value != current:
|
||||
profile_set(
|
||||
"camouflaging_relaxation",
|
||||
new_value,
|
||||
core._profile_state,
|
||||
store=store,
|
||||
)
|
||||
|
||||
write_event(
|
||||
store,
|
||||
kind="register_relaxed",
|
||||
data={
|
||||
"from": float(current),
|
||||
"to": float(new_value),
|
||||
"delta": float(delta),
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- recorder
|
||||
def record_user_formality(store, text: str, lang: str) -> None:
|
||||
"""Compute formality on USER surface text and emit a formality_score_weekly event.
|
||||
|
||||
Called on every user turn. Constitutional guard: the scorer sees ONLY the
|
||||
user's surface output; no inferred state is computed or persisted.
|
||||
"""
|
||||
score = formality_score(text, lang)
|
||||
now = datetime.now(timezone.utc)
|
||||
# Simple per-turn emit; aggregation is done at query time in detect_camouflaging
|
||||
# (taking last window_size). Per-week aggregation via week_iso tag for audit.
|
||||
week_iso = f"{now.year}-W{now.isocalendar()[1]:02d}"
|
||||
write_event(
|
||||
store,
|
||||
kind="formality_score_weekly",
|
||||
data={
|
||||
"score": float(score),
|
||||
"lang": lang,
|
||||
"week_iso": week_iso,
|
||||
"samples": 1,
|
||||
"timestamp": now.isoformat(),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- weekly pass
|
||||
def run_weekly_pass(store) -> dict:
|
||||
"""Convenience entry: detect_camouflaging; if detected, emit
|
||||
`camouflaging_detected` event AND call relax_register.
|
||||
|
||||
Returns the detection result dict (same shape as detect_camouflaging).
|
||||
"""
|
||||
result = detect_camouflaging(store)
|
||||
if result["detected"]:
|
||||
write_event(
|
||||
store,
|
||||
kind="camouflaging_detected",
|
||||
data={
|
||||
"slope": result["trajectory_slope"],
|
||||
"mean": result["current_mean"],
|
||||
"window_size": DEFAULT_WINDOW_SIZE,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
relax_register(store)
|
||||
return result
|
||||
520
src/iai_mcp/capture.py
Normal file
520
src/iai_mcp/capture.py
Normal file
|
|
@ -0,0 +1,520 @@
|
|||
"""Plan 06 memory_capture (WRITE-side ambient gap closure).
|
||||
|
||||
Context: prior phases shipped ambient READ (session_start compact handle) and
|
||||
ambient daemon (sleep cycles, REM, overnight digest). WRITE-side capture of
|
||||
conversation content was architectural gap — nothing in iai-mcp automatically
|
||||
ingested what the user said or what Claude decided during a session.
|
||||
|
||||
This module closes that gap with two entry points:
|
||||
|
||||
1. `capture_turn(store, cue, text, tier, session_id)`:
|
||||
in-session, explicit. Called via MCP tool `memory_capture` when Claude
|
||||
detects a surprising correction, load-bearing decision, or lesson.
|
||||
|
||||
2. `capture_transcript(store, transcript_path, session_id)`:
|
||||
end-of-session, ambient. Called by `~/.claude/hooks/iai-mcp-session-capture.sh`
|
||||
Stop-hook on SessionEnd. Reads Claude Code JSONL transcript, extracts
|
||||
user + assistant turns, filters through shield + dedup, inserts records.
|
||||
|
||||
Both paths respect:
|
||||
- Shield: HARD_BLOCK drops the record; FLAG_FOR_REVIEW stores with tag
|
||||
(policy: user chose visibility over paranoia, 2026-04-20).
|
||||
- Dedup: if query_similar returns a hit with cos >= DEDUP_THRESHOLD
|
||||
(0.95), we reinforce instead of insert (boost Hebbian edge).
|
||||
- Language: detected via langdetect; falls back to 'en' on ambiguity.
|
||||
- Encryption: goes through the standard store.insert() path which handles
|
||||
AES-256-GCM column encryption.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
# R3 deviation [Rule 3 - blocking import cost]: `iai_mcp.embed` pulls
|
||||
# in transformers + torch (~2.9s cold import). Loading capture.py for the
|
||||
# `--no-spawn` deferred path (which never embeds anything) blew the R3 2s
|
||||
# wall-clock budget. Moved to lazy import inside `capture_turn` — keeps the
|
||||
# write_deferred_captures cold path under ~1s. `from __future__ import
|
||||
# annotations` (line 29) keeps type hints intact without runtime import.
|
||||
# `MemoryStore` left at module top — its 0.4s import is acceptable.
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import (
|
||||
SCHEMA_VERSION_CURRENT,
|
||||
TIER_ENUM,
|
||||
MemoryRecord,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
DEDUP_COS_THRESHOLD = 0.95
|
||||
MIN_CAPTURE_LEN = 12
|
||||
MAX_CAPTURE_LEN = 8000
|
||||
|
||||
|
||||
def _detect_language(text: str) -> str:
|
||||
"""Best-effort ISO-639-1 via langdetect; 'en' on any failure."""
|
||||
try:
|
||||
from langdetect import detect # lazy: already a project dep
|
||||
|
||||
code = detect(text[:500])
|
||||
return code if len(code) == 2 else "en"
|
||||
except Exception:
|
||||
return "en"
|
||||
|
||||
|
||||
def _run_shield(text: str) -> tuple[str, list[str]]:
|
||||
"""Run shield; return (verdict, tags) where verdict in HARD_BLOCK|FLAG|OK."""
|
||||
try:
|
||||
from iai_mcp.shield import evaluate
|
||||
|
||||
result = evaluate(text)
|
||||
verdict = getattr(result, "verdict", "OK")
|
||||
tags = list(getattr(result, "tags", []) or [])
|
||||
return verdict, tags
|
||||
except Exception:
|
||||
return "OK", []
|
||||
|
||||
|
||||
def capture_turn(
|
||||
store: MemoryStore,
|
||||
*,
|
||||
cue: str,
|
||||
text: str,
|
||||
tier: str = "episodic",
|
||||
session_id: str = "-",
|
||||
role: str = "user",
|
||||
) -> dict[str, Any]:
|
||||
"""Write a single conversation turn to the iai-mcp store.
|
||||
|
||||
Returns {"status": "inserted|reinforced|skipped", "record_id": uuid-or-null,
|
||||
"reason": short-string}.
|
||||
"""
|
||||
if tier not in TIER_ENUM:
|
||||
return {"status": "skipped", "record_id": None, "reason": f"invalid tier {tier!r}"}
|
||||
|
||||
text = (text or "").strip()
|
||||
if len(text) < MIN_CAPTURE_LEN:
|
||||
return {"status": "skipped", "record_id": None, "reason": "too short"}
|
||||
if len(text) > MAX_CAPTURE_LEN:
|
||||
text = text[:MAX_CAPTURE_LEN]
|
||||
|
||||
verdict, shield_tags = _run_shield(text)
|
||||
if verdict == "HARD_BLOCK":
|
||||
return {"status": "skipped", "record_id": None, "reason": "shield HARD_BLOCK"}
|
||||
|
||||
# Lazy import: keeps the cold module-import cost low for the
|
||||
# `--no-spawn` deferred path (Phase 7.1 R3) which never embeds.
|
||||
from iai_mcp.embed import embedder_for_store
|
||||
|
||||
emb = embedder_for_store(store).embed(cue or text)
|
||||
embedding = list(emb)
|
||||
|
||||
# Dedup: query_similar against existing records at the same tier.
|
||||
# Phase 07.11-01 / query_similar accepts a `tier` kwarg natively
|
||||
# (Bug A fix), returns list[tuple[MemoryRecord, float]] (legacy contract,
|
||||
# unchanged shape -- we unpack the tuple correctly in the loop body, Bug B
|
||||
# fix), and the dedup hit reinforces via the typed `reinforce_record`
|
||||
# wrapper (Bug C fix -- single-uuid argument shape against a single-uuid
|
||||
# API).
|
||||
try:
|
||||
neighbours = store.query_similar(embedding, k=3, tier=tier)
|
||||
except (ValueError, IOError) as exc:
|
||||
# Genuinely-recoverable cases only: bad tier validation surfaces as
|
||||
# ValueError (already caught by query_similar's pre-I/O guard); transient
|
||||
# LanceDB I/O surfaces as IOError. A TypeError from a wrong call shape
|
||||
# MUST surface in tests -- the silent `except Exception: pass` blanket
|
||||
# is removed deliberately (D-01 contract).
|
||||
log.warning(
|
||||
"capture_dedup_query_failed",
|
||||
extra={"err_type": type(exc).__name__, "err": str(exc)[:120]},
|
||||
)
|
||||
neighbours = []
|
||||
|
||||
for record, score in neighbours: # tuple-unpack -- fix for Bug B
|
||||
if score >= DEDUP_COS_THRESHOLD:
|
||||
# Single-record reinforcement: route through reinforce_record
|
||||
#, NOT boost_edges([UUID(...)]) which expects pairs.
|
||||
try:
|
||||
store.reinforce_record(record.id)
|
||||
except (ValueError, IOError) as exc:
|
||||
# Reinforce is best-effort observability; log and continue
|
||||
# so the duplicate is still detected even if the LTP write
|
||||
# fails. Same narrowed-except discipline as the query above.
|
||||
log.warning(
|
||||
"capture_dedup_reinforce_failed",
|
||||
extra={
|
||||
"err_type": type(exc).__name__,
|
||||
"record_id": str(record.id),
|
||||
},
|
||||
)
|
||||
return {
|
||||
"status": "reinforced",
|
||||
"record_id": str(record.id),
|
||||
"reason": f"cos={score:.3f} >= {DEDUP_COS_THRESHOLD}",
|
||||
}
|
||||
|
||||
tags = ["capture", f"role:{role}"]
|
||||
if verdict == "FLAG_FOR_REVIEW":
|
||||
tags.append("shield:flagged")
|
||||
tags.extend(f"shield:{t}" for t in shield_tags[:3])
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
rec = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier=tier,
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=embedding,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[{"ts": now.isoformat(), "cue": cue or "(auto-capture)",
|
||||
"session_id": session_id, "role": role}],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=tags,
|
||||
language=_detect_language(text),
|
||||
s5_trust_score=0.5,
|
||||
profile_modulation_gain={},
|
||||
schema_version=SCHEMA_VERSION_CURRENT,
|
||||
)
|
||||
|
||||
try:
|
||||
store.insert(rec)
|
||||
except Exception as e:
|
||||
log.exception("capture_turn insert failed")
|
||||
return {"status": "skipped", "record_id": None, "reason": f"insert-failed: {type(e).__name__}"}
|
||||
|
||||
return {"status": "inserted", "record_id": str(rec.id), "reason": f"tier={tier}"}
|
||||
|
||||
|
||||
def capture_transcript(
|
||||
store: MemoryStore,
|
||||
transcript_path: Path | str,
|
||||
*,
|
||||
session_id: str = "-",
|
||||
max_turns: int = 200,
|
||||
) -> dict[str, Any]:
|
||||
"""Read a Claude Code JSONL transcript, capture user + assistant turns.
|
||||
|
||||
Returns {"inserted": N, "reinforced": M, "skipped": K, "errors": E}.
|
||||
"""
|
||||
path = Path(transcript_path).expanduser()
|
||||
if not path.exists():
|
||||
return {"inserted": 0, "reinforced": 0, "skipped": 0, "errors": 1,
|
||||
"reason": f"transcript not found: {path}"}
|
||||
|
||||
counts = {"inserted": 0, "reinforced": 0, "skipped": 0, "errors": 0}
|
||||
seen = 0
|
||||
with path.open() as fh:
|
||||
for line in fh:
|
||||
if seen >= max_turns:
|
||||
break
|
||||
seen += 1
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except Exception:
|
||||
counts["errors"] += 1
|
||||
continue
|
||||
msg = obj.get("message") if isinstance(obj.get("message"), dict) else obj
|
||||
role = obj.get("type") or msg.get("role", "")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
# Claude Code messages use block format; collect text blocks
|
||||
text_parts = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text_parts.append(block.get("text", ""))
|
||||
text = "\n".join(text_parts).strip()
|
||||
else:
|
||||
text = str(content).strip()
|
||||
if not text:
|
||||
continue
|
||||
result = capture_turn(
|
||||
store,
|
||||
cue=f"session {session_id} turn {seen}",
|
||||
text=text,
|
||||
tier="episodic",
|
||||
session_id=session_id,
|
||||
role=role,
|
||||
)
|
||||
status = result.get("status", "skipped")
|
||||
if status in counts:
|
||||
counts[status] += 1
|
||||
else:
|
||||
counts["skipped"] += 1
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R3 / D7.1-04: deferred-captures writer for `--no-spawn` hook mode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def write_deferred_captures(
|
||||
session_id: str,
|
||||
transcript_path: Path | str,
|
||||
*,
|
||||
cwd: str | None = None,
|
||||
max_turns: int = 200,
|
||||
) -> Path:
|
||||
"""Defer transcript capture by writing events to a JSONL file under
|
||||
``~/.iai-mcp/.deferred-captures/``. Returns the path written.
|
||||
|
||||
Used by ``iai-mcp capture-transcript --no-spawn`` (R3, D7.1-04) when the
|
||||
daemon is unreachable. The Stop hook calls this so it never blocks
|
||||
session teardown waiting for a daemon spawn (the third spawn vector
|
||||
forensic anomaly #3 from ``report-20260426-150300.md``).
|
||||
|
||||
The daemon's drain loop (Plan 07.1-05b, in daemon.py / WAKE handler)
|
||||
consumes these on next WAKE. Format is JSONL v1 per D7.1-04:
|
||||
|
||||
- Line 1: header ``{"version":1,"deferred_at":<ISO>,"session_id":<id>,"cwd":<path>}``
|
||||
- Lines 2..N: one event per user/assistant turn
|
||||
``{"text":<verbatim>,"cue":<short>,"tier":"episodic","role":<u|a>,"ts":<ISO>}``
|
||||
|
||||
Pure-write: no MemoryStore touch, no socket touch, no daemon import.
|
||||
Uses ``Path.home()`` at call time so HOME-monkeypatched tests get the
|
||||
right tmp dir. Idempotent ``mkdir(parents=True, exist_ok=True)``.
|
||||
|
||||
Args:
|
||||
session_id: Claude Code session id (provenance + filename component).
|
||||
transcript_path: path to the JSONL transcript file (or non-existent —
|
||||
we write the header then return; daemon drain treats as no-op).
|
||||
cwd: optional CWD override for the header (defaults to ``os.getcwd()``).
|
||||
max_turns: cap on transcript turns to emit (default 200, matches
|
||||
``capture_transcript`` semantics).
|
||||
|
||||
Returns:
|
||||
``Path`` of the written ``.jsonl`` file.
|
||||
|
||||
Notes:
|
||||
- Filename pattern ``{session_id}-{int(time.time())}.jsonl`` — the
|
||||
unix-ts suffix avoids collisions if the same session captures
|
||||
multiple times.
|
||||
- Reuses the same parsing logic as ``capture_transcript`` so the
|
||||
deferred path and the inline path stay consistent.
|
||||
- Returns even on missing transcript (writes header only) — daemon
|
||||
drain treats as no-op. Hook MUST never raise here.
|
||||
- Stdlib only: ``json``, ``time``, ``pathlib.Path``, ``datetime``, ``os``.
|
||||
"""
|
||||
deferred_dir = Path.home() / ".iai-mcp" / ".deferred-captures"
|
||||
deferred_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = deferred_dir / f"{session_id}-{int(time.time())}.jsonl"
|
||||
with out_path.open("w") as fh:
|
||||
# Header (line 1, version=1 forward-compat marker per D7.1-04).
|
||||
header = {
|
||||
"version": 1,
|
||||
"deferred_at": datetime.now(timezone.utc).isoformat(),
|
||||
"session_id": session_id,
|
||||
"cwd": cwd or os.getcwd(),
|
||||
}
|
||||
fh.write(json.dumps(header, ensure_ascii=False) + "\n")
|
||||
# Read transcript and emit one event per user/assistant turn.
|
||||
path = Path(transcript_path).expanduser()
|
||||
if not path.exists():
|
||||
return out_path # empty body — daemon drain will treat as no-op
|
||||
seen = 0
|
||||
with path.open() as src:
|
||||
for line in src:
|
||||
if seen >= max_turns:
|
||||
break
|
||||
seen += 1
|
||||
try:
|
||||
obj = json.loads(line)
|
||||
except Exception:
|
||||
continue
|
||||
msg = obj.get("message") if isinstance(obj.get("message"), dict) else obj
|
||||
role = obj.get("type") or msg.get("role", "")
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
text_parts = [
|
||||
b.get("text", "")
|
||||
for b in content
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
]
|
||||
text = "\n".join(text_parts).strip()
|
||||
else:
|
||||
text = str(content).strip()
|
||||
if not text:
|
||||
continue
|
||||
event = {
|
||||
"text": text,
|
||||
"cue": f"session {session_id} turn {seen}",
|
||||
"tier": "episodic",
|
||||
"role": role,
|
||||
"ts": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
fh.write(json.dumps(event, ensure_ascii=False) + "\n")
|
||||
return out_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R3 / D7.1-04: deferred-captures drain (READ side, daemon-resident)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def drain_deferred_captures(store: MemoryStore) -> dict[str, int]:
|
||||
"""Consume ``~/.iai-mcp/.deferred-captures/*.jsonl`` produced by
|
||||
``iai-mcp capture-transcript --no-spawn`` (Plan 07.1-05 WRITE side).
|
||||
|
||||
For each ``.jsonl`` file in the deferred-captures dir:
|
||||
|
||||
* Read line 1 (header). If ``version > 1`` (forward-compat guard), log a
|
||||
"skip" line to ``~/.iai-mcp/logs/deferred-drain-YYYY-MM-DD.log`` and
|
||||
leave the file in place — a future daemon version will know how to
|
||||
handle it.
|
||||
* For each event line (lines 2..N), call ``capture_turn(store, ...)``
|
||||
and inspect its return-status dict. W2 / D-02:
|
||||
- status="inserted" → events_inserted += 1
|
||||
- status="reinforced" → events_reinforced += 1
|
||||
- status="skipped" with reason matching ^insert-failed:* (capture_turn
|
||||
path where store.insert raised) → events_skipped_insert_failed += 1
|
||||
and the WHOLE FILE is treated as failed: renamed to
|
||||
.failed-<ts>.jsonl, NOT unlinked.
|
||||
- status="skipped" with any other reason (shield HARD_BLOCK, too short,
|
||||
invalid tier — all *intentional* drops) → events_skipped_intentional
|
||||
+= 1.
|
||||
* On full success (zero insert-failed events): delete the file,
|
||||
files_drained += 1.
|
||||
* On any insert-failed event: rename the file to
|
||||
``<basename>.failed-<unix_ts>.jsonl`` (preserves evidence for manual
|
||||
inspection), log a "insert-failed" line with the first error,
|
||||
files_failed += 1.
|
||||
* On parser/header exception: same outer rename + log path as before
|
||||
(existing behaviour), files_failed += 1.
|
||||
* On 0-byte / empty file: delete it (no-op header-only deferral).
|
||||
|
||||
Idempotent: re-running on a directory with no ``.jsonl`` files (or no
|
||||
deferred-captures dir at all) returns zero counts without error.
|
||||
|
||||
Returns dict with keys:
|
||||
files_drained, files_failed,
|
||||
events_inserted, events_reinforced,
|
||||
events_skipped_intentional, events_skipped_insert_failed.
|
||||
|
||||
Notes:
|
||||
- Uses ``Path.home()`` at call time so HOME-monkeypatched tests get
|
||||
the right tmp dir.
|
||||
- Stdlib only — no new deps.
|
||||
- Caller (daemon.main / _tick_body) MUST wrap in try/except so a
|
||||
drain crash never propagates into the asyncio event loop. This
|
||||
function itself catches per-file exceptions defensively.
|
||||
- The ``store`` argument is the same MemoryStore instance the
|
||||
daemon uses for all other writes (so connection/lock semantics
|
||||
are consistent). Drain MUST run inside ``asyncio.to_thread`` from
|
||||
async callers because ``capture_turn`` does sync LanceDB I/O.
|
||||
"""
|
||||
deferred_dir = Path.home() / ".iai-mcp" / ".deferred-captures"
|
||||
log_dir = Path.home() / ".iai-mcp" / "logs"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_path = (
|
||||
log_dir / f"deferred-drain-{datetime.now(timezone.utc).strftime('%Y-%m-%d')}.log"
|
||||
)
|
||||
counts = {
|
||||
"files_drained": 0,
|
||||
"files_failed": 0,
|
||||
"events_inserted": 0,
|
||||
"events_reinforced": 0,
|
||||
"events_skipped_intentional": 0,
|
||||
"events_skipped_insert_failed": 0,
|
||||
}
|
||||
if not deferred_dir.exists():
|
||||
return counts
|
||||
for fpath in sorted(deferred_dir.glob("*.jsonl")):
|
||||
file_had_insert_failure = False
|
||||
file_first_error: str | None = None
|
||||
try:
|
||||
with fpath.open() as fh:
|
||||
lines = [ln.rstrip("\n") for ln in fh if ln.strip()]
|
||||
if not lines:
|
||||
# Empty file (e.g. partial write that never got header) — drop.
|
||||
fpath.unlink()
|
||||
continue
|
||||
header = json.loads(lines[0])
|
||||
if header.get("version", 0) > 1:
|
||||
# Forward-compat guard: leave the file in place; a future
|
||||
# daemon revision will know the format. Log + continue.
|
||||
with log_path.open("a") as logf:
|
||||
logf.write(
|
||||
f"{datetime.now(timezone.utc).isoformat()} skip {fpath.name}: "
|
||||
f"version={header.get('version')}\n"
|
||||
)
|
||||
continue
|
||||
session_id = header.get("session_id", "-")
|
||||
event_lines = lines[1:]
|
||||
for ln in event_lines:
|
||||
ev = json.loads(ln)
|
||||
# Reuse capture_turn so the deferred path lands in the same
|
||||
# shield + dedup + encryption pipeline as live captures.
|
||||
result = capture_turn(
|
||||
store,
|
||||
cue=ev.get("cue", ""),
|
||||
text=ev.get("text", ""),
|
||||
tier=ev.get("tier", "episodic"),
|
||||
session_id=session_id,
|
||||
role=ev.get("role", "user"),
|
||||
)
|
||||
status = result.get("status", "skipped")
|
||||
reason = result.get("reason", "")
|
||||
if status == "inserted":
|
||||
counts["events_inserted"] += 1
|
||||
elif status == "reinforced":
|
||||
counts["events_reinforced"] += 1
|
||||
elif status == "skipped" and reason.startswith("insert-failed:"):
|
||||
counts["events_skipped_insert_failed"] += 1
|
||||
file_had_insert_failure = True
|
||||
if file_first_error is None:
|
||||
file_first_error = reason
|
||||
else:
|
||||
counts["events_skipped_intentional"] += 1
|
||||
if file_had_insert_failure:
|
||||
# preserve the file as evidence — at least one
|
||||
# event hit the insert-failed code path inside capture_turn
|
||||
# (store.insert raised, capture_turn swallowed and returned
|
||||
# status=skipped reason=insert-failed:*). Pre-07.9 the file
|
||||
# was unlinked here and the data was silently lost.
|
||||
failed_path = fpath.with_suffix(f".failed-{int(time.time())}.jsonl")
|
||||
fpath.rename(failed_path)
|
||||
with log_path.open("a") as logf:
|
||||
logf.write(
|
||||
f"{datetime.now(timezone.utc).isoformat()} insert-failed "
|
||||
f"{fpath.name}: first_error={file_first_error}\n"
|
||||
)
|
||||
counts["files_failed"] += 1
|
||||
else:
|
||||
fpath.unlink()
|
||||
counts["files_drained"] += 1
|
||||
except Exception as e: # noqa: BLE001 -- per-file isolation, never raise
|
||||
try:
|
||||
# Preserve evidence: rename so the next drain pass skips it
|
||||
# AND a human can inspect the failure.
|
||||
failed_path = fpath.with_suffix(f".failed-{int(time.time())}.jsonl")
|
||||
fpath.rename(failed_path)
|
||||
with log_path.open("a") as logf:
|
||||
logf.write(
|
||||
f"{datetime.now(timezone.utc).isoformat()} failed "
|
||||
f"{fpath.name}: {type(e).__name__}: {e}\n"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
counts["files_failed"] += 1
|
||||
return counts
|
||||
522
src/iai_mcp/capture_queue.py
Normal file
522
src/iai_mcp/capture_queue.py
Normal file
|
|
@ -0,0 +1,522 @@
|
|||
"""Phase 10.2 -- persistent capture queue with atomic append + idempotent ingest.
|
||||
|
||||
The capture queue is the durable buffer that makes the L1 hibernation contract
|
||||
viable. Wrapper writes to ``~/.iai-mcp/pending/`` whenever the daemon socket
|
||||
is unreachable (Hibernation, mid-restart, crashed). On the next Wake transition
|
||||
the daemon drains the queue via ``ingest_pending(handler)`` -- the handler
|
||||
plugs into the existing ``iai_mcp.capture`` path so the verbatim contract
|
||||
(Phase 5/6) is preserved end-to-end.
|
||||
|
||||
Storage layout under ``~/.iai-mcp/pending/``::
|
||||
|
||||
pending-<ulid>.json -- one queued record (committed file)
|
||||
pending-<ulid>.json.tmp -- transient temp file before atomic rename
|
||||
pending-<ulid>.lock -- present only during in-flight ingest of <ulid>
|
||||
.overflow-audit.log -- JSONL append-only log of dropped-oldest events
|
||||
|
||||
Hard guarantees:
|
||||
|
||||
- **Atomic append**: writes go to ``.tmp`` then ``os.replace`` to final name
|
||||
(POSIX atomic rename). A crash mid-write leaves a stray ``.tmp`` but never
|
||||
a half-written final file. ``pending_count`` and ``list_pending`` ignore
|
||||
``.tmp``.
|
||||
- **Idempotent ingest**: each pending file is claimed via ``fcntl.flock`` on
|
||||
the matching ``.lock`` file. Lock contention => skip (another worker has
|
||||
it). Handler success => delete pending + lock atomically. Handler raises
|
||||
=> leave both intact for next-call retry.
|
||||
- **Bounded queue**: ``append`` triggers ``prune_oldest`` once
|
||||
``pending_count > max_size``. Drops the oldest ``max_size - 9_900`` files
|
||||
in one batch (amortised I/O) and writes one JSONL line per drop to the
|
||||
audit log.
|
||||
- **Verbatim round-trip**: the JSON payload uses ``ensure_ascii=False`` so
|
||||
``record["surface"]`` round-trips byte-identically including UTF-8 BMP +
|
||||
astral characters and combining marks.
|
||||
- **No new deps**: stdlib only -- ``os, pathlib, json, uuid, fcntl, secrets,
|
||||
time, datetime, threading, errno``.
|
||||
|
||||
ULID derivation: 48-bit millisecond unix timestamp (big-endian) + 80 bits of
|
||||
``secrets.token_bytes`` randomness, encoded with Crockford base32 per the
|
||||
ulid spec (https://github.com/ulid/spec). The result is 26 characters,
|
||||
lexicographically sortable by time, and collision-resistant for thousands of
|
||||
appends per millisecond. Implemented inline -- the project deliberately
|
||||
avoids a ``python-ulid`` dependency.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import threading
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Defaults / configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_QUEUE_DIR: Path = Path.home() / ".iai-mcp" / "pending"
|
||||
"""Production location for the persistent queue."""
|
||||
|
||||
DEFAULT_MAX_SIZE: int = 10_000
|
||||
"""Default ceiling before ``prune_oldest`` kicks in."""
|
||||
|
||||
# Drop ~100 oldest at once when overflowing so the I/O cost is amortised
|
||||
# across many subsequent appends rather than paid on every single overflow.
|
||||
_PRUNE_BATCH_HEADROOM: int = 100
|
||||
|
||||
SCHEMA_VERSION: int = 1
|
||||
"""Bumped only when the on-disk pending-<ulid>.json layout changes."""
|
||||
|
||||
_AUDIT_LOG_NAME: str = ".overflow-audit.log"
|
||||
|
||||
# Crockford base32 alphabet (no I, L, O, U) per ulid spec.
|
||||
_CROCKFORD: str = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CaptureQueueError(Exception):
|
||||
"""Base class for all capture-queue errors."""
|
||||
|
||||
|
||||
class CaptureQueueSchemaError(CaptureQueueError):
|
||||
"""Raised when a pending file declares a ``schema_version`` we don't grok."""
|
||||
|
||||
|
||||
class CaptureQueueLocked(CaptureQueueError):
|
||||
"""Raised when an in-flight ingest cannot acquire the per-record lock.
|
||||
|
||||
Currently only used internally; ``ingest_pending`` swallows lock contention
|
||||
and treats the file as "claimed by another worker" rather than raising.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ULID generator (stdlib-only, time-sortable)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Monotonic-ish guard: if two ULIDs would land in the same millisecond, bump
|
||||
# the timestamp by 1ms so lexicographic sort matches insertion order. The
|
||||
# bump is bounded -- once wall clock advances past the bumped value the
|
||||
# guard resets. Threadsafe via a module-level lock.
|
||||
_ulid_lock = threading.Lock()
|
||||
_last_ms: int = 0
|
||||
|
||||
|
||||
def _now_ms() -> int:
|
||||
"""Current wall-clock time in unix milliseconds (UTC)."""
|
||||
return int(time.time() * 1000)
|
||||
|
||||
|
||||
def _b32_encode(data: bytes, length: int) -> str:
|
||||
"""Crockford base32 encode ``data`` to exactly ``length`` characters.
|
||||
|
||||
``data`` is treated as an unsigned big-endian integer. Result is
|
||||
zero-padded on the left if the integer would naturally render to
|
||||
fewer characters. Caller is responsible for sizing ``length``
|
||||
correctly: 10 chars for the 48-bit timestamp prefix, 16 chars for
|
||||
the 80-bit randomness suffix.
|
||||
"""
|
||||
n = int.from_bytes(data, "big")
|
||||
out = []
|
||||
for _ in range(length):
|
||||
out.append(_CROCKFORD[n & 0x1F])
|
||||
n >>= 5
|
||||
return "".join(reversed(out))
|
||||
|
||||
|
||||
def generate_ulid() -> str:
|
||||
"""Return a fresh 26-character Crockford-base32 ULID.
|
||||
|
||||
The first 10 chars encode the millisecond unix timestamp; the next 16
|
||||
encode 80 bits of random data. Lexicographic sort = chronological sort
|
||||
(with millisecond resolution; finer ordering within a millisecond is
|
||||
not guaranteed by ULID itself but the monotonic guard below preserves
|
||||
insertion order in practice).
|
||||
"""
|
||||
global _last_ms
|
||||
with _ulid_lock:
|
||||
ms = _now_ms()
|
||||
if ms <= _last_ms:
|
||||
ms = _last_ms + 1
|
||||
_last_ms = ms
|
||||
|
||||
ts_bytes = ms.to_bytes(6, "big") # 48 bits
|
||||
rand_bytes = secrets.token_bytes(10) # 80 bits
|
||||
return _b32_encode(ts_bytes, 10) + _b32_encode(rand_bytes, 16)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CaptureQueue
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CaptureQueue:
|
||||
"""Persistent on-disk FIFO buffer for ``memory_capture`` records.
|
||||
|
||||
See module docstring for storage layout and guarantees.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
queue_dir: Path | None = None,
|
||||
max_size: int = DEFAULT_MAX_SIZE,
|
||||
) -> None:
|
||||
if max_size <= 0:
|
||||
raise ValueError(f"max_size must be positive, got {max_size}")
|
||||
self._queue_dir = (
|
||||
Path(queue_dir) if queue_dir is not None else DEFAULT_QUEUE_DIR
|
||||
)
|
||||
self._queue_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._max_size = max_size
|
||||
self._audit_log = self._queue_dir / _AUDIT_LOG_NAME
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Read accessors
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def queue_dir(self) -> Path:
|
||||
"""Filesystem location of the queue directory."""
|
||||
return self._queue_dir
|
||||
|
||||
@property
|
||||
def max_size(self) -> int:
|
||||
"""Maximum number of pending records before overflow pruning kicks in."""
|
||||
return self._max_size
|
||||
|
||||
@property
|
||||
def audit_log_path(self) -> Path:
|
||||
"""Path to ``.overflow-audit.log`` (may not exist if no overflows happened)."""
|
||||
return self._audit_log
|
||||
|
||||
def pending_count(self) -> int:
|
||||
"""Return number of committed pending files (ignores ``.tmp`` and ``.lock``)."""
|
||||
return sum(1 for _ in self._iter_pending_files())
|
||||
|
||||
def list_pending(self) -> list[Path]:
|
||||
"""Return committed pending files sorted by ULID (oldest first)."""
|
||||
return sorted(self._iter_pending_files(), key=lambda p: p.name)
|
||||
|
||||
def _iter_pending_files(self):
|
||||
"""Yield every ``pending-<ulid>.json`` (no ``.tmp``, no ``.lock``)."""
|
||||
for entry in self._queue_dir.iterdir():
|
||||
name = entry.name
|
||||
if (
|
||||
entry.is_file()
|
||||
and name.startswith("pending-")
|
||||
and name.endswith(".json")
|
||||
and not name.endswith(".json.tmp")
|
||||
):
|
||||
yield entry
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Append (atomic temp + rename)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def append(self, record: dict) -> str:
|
||||
"""Append a record to the queue. Returns the assigned ULID.
|
||||
|
||||
Atomic: writes ``pending-<ulid>.json.tmp`` then ``os.replace`` to
|
||||
``pending-<ulid>.json``. A crash between write and rename leaves a
|
||||
stray ``.tmp`` (cleaned up by future ``prune_oldest`` if it ever
|
||||
looks at the directory listing -- but ``pending_count`` already
|
||||
ignores it). Triggers ``prune_oldest`` once the post-append count
|
||||
exceeds ``max_size``.
|
||||
"""
|
||||
if not isinstance(record, dict):
|
||||
raise TypeError(f"record must be a dict, got {type(record).__name__}")
|
||||
|
||||
ulid = generate_ulid()
|
||||
appended_at = datetime.now(timezone.utc).isoformat()
|
||||
envelope: dict = {
|
||||
"ulid": ulid,
|
||||
"appended_at": appended_at,
|
||||
"record": record,
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
}
|
||||
|
||||
final_path = self._queue_dir / f"pending-{ulid}.json"
|
||||
tmp_path = self._queue_dir / f"pending-{ulid}.json.tmp"
|
||||
|
||||
# Open with O_CREAT|O_EXCL|O_WRONLY so a colliding ULID is detected
|
||||
# rather than silently overwriting (collision => generate_ulid bug).
|
||||
# 0o600 keeps records user-only on disk.
|
||||
fd = os.open(
|
||||
str(tmp_path),
|
||||
os.O_WRONLY | os.O_CREAT | os.O_EXCL,
|
||||
0o600,
|
||||
)
|
||||
try:
|
||||
payload = json.dumps(
|
||||
envelope,
|
||||
ensure_ascii=False, # verbatim Unicode round-trip
|
||||
separators=(",", ":"),
|
||||
).encode("utf-8")
|
||||
os.write(fd, payload)
|
||||
os.fsync(fd)
|
||||
except Exception:
|
||||
# On any failure between open and rename, drop the temp file so
|
||||
# we don't accumulate orphans. If the unlink itself fails (very
|
||||
# unlikely on a file we just created) re-raise the original.
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
# POSIX-atomic rename: visible-or-not, never half-visible.
|
||||
os.replace(tmp_path, final_path)
|
||||
|
||||
# Overflow check happens AFTER the rename so the new record is
|
||||
# never the one we drop -- prune_oldest by definition drops the
|
||||
# oldest, not the newest.
|
||||
if self.pending_count() > self._max_size:
|
||||
target = max(0, self._max_size - _PRUNE_BATCH_HEADROOM)
|
||||
self.prune_oldest(target_size=target)
|
||||
|
||||
return ulid
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Ingest (idempotent, lock-claimed)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def ingest_pending(self, handler: Callable[[dict], None]) -> int:
|
||||
"""Drain pending records via ``handler``. Returns count successfully ingested.
|
||||
|
||||
For each pending file (oldest first):
|
||||
|
||||
1. ``open`` ``pending-<ulid>.lock`` (creating if needed).
|
||||
2. ``fcntl.flock(LOCK_EX | LOCK_NB)`` -- if already locked, skip.
|
||||
3. Read + JSON-decode ``pending-<ulid>.json``; raise
|
||||
``CaptureQueueSchemaError`` on schema mismatch.
|
||||
4. Call ``handler(record)`` where ``record`` is the inner dict
|
||||
(not the envelope).
|
||||
5. On success: ``unlink`` the pending file FIRST (so a crash
|
||||
between unlink calls cannot resurrect a deleted record), then
|
||||
release the lock and unlink the lock file.
|
||||
6. On handler exception: release the lock fd but leave the lock
|
||||
file AND the pending file on disk. Future calls retry.
|
||||
|
||||
Schema errors propagate to the caller after closing fds for the
|
||||
offending file -- we do NOT swallow them, because a schema bump
|
||||
is a deploy-time event the caller needs to see.
|
||||
"""
|
||||
if not callable(handler):
|
||||
raise TypeError("handler must be callable")
|
||||
|
||||
ingested = 0
|
||||
for pending_path in self.list_pending():
|
||||
ulid = self._ulid_from_path(pending_path)
|
||||
lock_path = self._queue_dir / f"pending-{ulid}.lock"
|
||||
|
||||
# Open (or create) the lock file. 0o600 to keep it user-only.
|
||||
try:
|
||||
lock_fd = os.open(
|
||||
str(lock_path),
|
||||
os.O_WRONLY | os.O_CREAT,
|
||||
0o600,
|
||||
)
|
||||
except OSError:
|
||||
# Cannot even create the lock -- skip this record. Leave
|
||||
# the pending file in place so a future retry can pick
|
||||
# it up once the disk situation clears.
|
||||
continue
|
||||
|
||||
try:
|
||||
try:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except OSError as exc:
|
||||
# EWOULDBLOCK / EAGAIN => another worker has the lock.
|
||||
# Anything else: surface it; we don't expect it here.
|
||||
if exc.errno in (errno.EWOULDBLOCK, errno.EAGAIN):
|
||||
continue
|
||||
raise
|
||||
|
||||
# Lock acquired. The pending file may have been deleted
|
||||
# between list_pending and now (rare race with another
|
||||
# worker that claimed-and-finished), so re-check.
|
||||
if not pending_path.exists():
|
||||
continue
|
||||
|
||||
envelope = self._read_envelope(pending_path)
|
||||
# Schema check -- raise loud so deploys notice.
|
||||
version = envelope.get("schema_version")
|
||||
if version != SCHEMA_VERSION:
|
||||
raise CaptureQueueSchemaError(
|
||||
f"unsupported schema_version={version!r} in "
|
||||
f"{pending_path.name}; expected {SCHEMA_VERSION}",
|
||||
)
|
||||
|
||||
record = envelope["record"]
|
||||
# Handler runs OUTSIDE any try/except below: if it raises,
|
||||
# we explicitly leave the pending file + lock file on disk
|
||||
# for the next call to retry.
|
||||
handler(record)
|
||||
|
||||
# Handler returned cleanly: delete pending FIRST to make
|
||||
# the success durable; lock cleanup is best-effort.
|
||||
try:
|
||||
os.unlink(pending_path)
|
||||
except FileNotFoundError:
|
||||
# Already gone -- another worker raced us. Treat as
|
||||
# success since the record is no longer pending.
|
||||
pass
|
||||
ingested += 1
|
||||
finally:
|
||||
# Always release + unlink the lock fd. If the handler
|
||||
# raised, the bare ``finally`` runs before the exception
|
||||
# propagates, so the lock fd never leaks.
|
||||
try:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
os.close(lock_fd)
|
||||
# Only unlink the lock file if we ALSO unlinked the pending
|
||||
# file (i.e. a clean handler success). On handler exception
|
||||
# we want the lock file to remain so a follow-up
|
||||
# ``ingest_pending`` can detect mid-flight crash state.
|
||||
if not pending_path.exists():
|
||||
try:
|
||||
os.unlink(lock_path)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
return ingested
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Overflow pruning
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def prune_oldest(self, target_size: int | None = None) -> int:
|
||||
"""Drop oldest pending files until count <= ``target_size``.
|
||||
|
||||
``target_size`` defaults to ``max_size`` -- in normal overflow flow
|
||||
``append`` passes ``max_size - 100`` so the next 99 appends amortise
|
||||
the I/O cost. Each dropped file produces one JSONL line in
|
||||
``.overflow-audit.log``.
|
||||
"""
|
||||
if target_size is None:
|
||||
target_size = self._max_size
|
||||
if target_size < 0:
|
||||
raise ValueError(f"target_size must be >= 0, got {target_size}")
|
||||
|
||||
oldest_first = self.list_pending()
|
||||
excess = len(oldest_first) - target_size
|
||||
if excess <= 0:
|
||||
return 0
|
||||
|
||||
queue_size_before = len(oldest_first)
|
||||
dropped = 0
|
||||
for pending_path in oldest_first[:excess]:
|
||||
ulid = self._ulid_from_path(pending_path)
|
||||
try:
|
||||
envelope = self._read_envelope(pending_path)
|
||||
appended_at = envelope.get("appended_at", "")
|
||||
except (FileNotFoundError, json.JSONDecodeError, CaptureQueueError):
|
||||
# Read failure is non-fatal for pruning: we still drop the
|
||||
# file and log "unknown" appended_at to audit.
|
||||
appended_at = ""
|
||||
|
||||
try:
|
||||
os.unlink(pending_path)
|
||||
except FileNotFoundError:
|
||||
# Someone else raced us (concurrent prune?) -- skip
|
||||
# without auditing since we didn't actually drop it.
|
||||
continue
|
||||
|
||||
self._audit_drop(
|
||||
dropped_ulid=ulid,
|
||||
appended_at=appended_at,
|
||||
queue_size_before_prune=queue_size_before,
|
||||
)
|
||||
dropped += 1
|
||||
return dropped
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _ulid_from_path(path: Path) -> str:
|
||||
"""Extract the ULID from a ``pending-<ulid>.json`` filename."""
|
||||
# ``stem`` for ``pending-XYZ.json`` is ``pending-XYZ``.
|
||||
return path.stem[len("pending-"):]
|
||||
|
||||
@staticmethod
|
||||
def _read_envelope(path: Path) -> dict:
|
||||
"""Read + JSON-decode a pending file. Raises ``json.JSONDecodeError``
|
||||
or ``FileNotFoundError`` on read failure; caller decides handling."""
|
||||
with path.open("rb") as f:
|
||||
raw = f.read()
|
||||
return json.loads(raw.decode("utf-8"))
|
||||
|
||||
def _audit_drop(
|
||||
self,
|
||||
*,
|
||||
dropped_ulid: str,
|
||||
appended_at: str,
|
||||
queue_size_before_prune: int,
|
||||
) -> None:
|
||||
"""Append one JSONL line to ``.overflow-audit.log``.
|
||||
|
||||
Uses ``O_APPEND`` + ``flock`` for cross-process safety, mirroring
|
||||
``LifecycleEventLog.append``. Failures are swallowed: the audit
|
||||
log is observability, not authoritative state -- a failed audit
|
||||
write must not abort the prune.
|
||||
"""
|
||||
line = (
|
||||
json.dumps(
|
||||
{
|
||||
"ts": datetime.now(timezone.utc).isoformat(),
|
||||
"dropped_ulid": dropped_ulid,
|
||||
"appended_at": appended_at,
|
||||
"reason": "queue_overflow",
|
||||
"queue_size_before_prune": queue_size_before_prune,
|
||||
},
|
||||
ensure_ascii=False,
|
||||
separators=(",", ":"),
|
||||
)
|
||||
+ "\n"
|
||||
)
|
||||
try:
|
||||
fd = os.open(
|
||||
str(self._audit_log),
|
||||
os.O_WRONLY | os.O_APPEND | os.O_CREAT,
|
||||
0o600,
|
||||
)
|
||||
except OSError:
|
||||
return
|
||||
try:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_EX)
|
||||
os.write(fd, line.encode("utf-8"))
|
||||
os.fsync(fd)
|
||||
finally:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CaptureQueue",
|
||||
"CaptureQueueError",
|
||||
"CaptureQueueLocked",
|
||||
"CaptureQueueSchemaError",
|
||||
"DEFAULT_MAX_SIZE",
|
||||
"DEFAULT_QUEUE_DIR",
|
||||
"SCHEMA_VERSION",
|
||||
"generate_ulid",
|
||||
]
|
||||
2896
src/iai_mcp/cli.py
Normal file
2896
src/iai_mcp/cli.py
Normal file
File diff suppressed because it is too large
Load diff
321
src/iai_mcp/community.py
Normal file
321
src/iai_mcp/community.py
Normal file
|
|
@ -0,0 +1,321 @@
|
|||
"""Hierarchical community detection (D-05 bootstrap + stable UUIDs + CONN-01/04).
|
||||
|
||||
Policy:
|
||||
- N < SMALL_N_FLAT (200): single flat community. Rich-club coefficient is too noisy
|
||||
below this per van den Heuvel & Sporns 2011; Leiden output is unstable too.
|
||||
- SMALL_N_FLAT <= N < MID_N_LEIDEN (500): run Leiden; accept only if Q >= 0.2
|
||||
(MODULARITY_FLOOR), else fall back to flat. Protects against Leiden producing
|
||||
visible but unjustified communities in sparse graphs.
|
||||
- N >= MID_N_LEIDEN: always run Leiden; accept result regardless of Q
|
||||
(graph is big enough that any modular structure is meaningful).
|
||||
|
||||
Stable UUIDs:
|
||||
- Every community gets a persistent UUID at creation.
|
||||
- On re-run, each new community's centroid is matched against prior centroids;
|
||||
the highest cosine >= UUID_ROTATE_COSINE (0.7) reuses the prior UUID.
|
||||
If no prior centroid passes the 0.7 bar, a fresh UUID is allocated.
|
||||
- This prevents ID churn on re-runs where Leiden re-orders labels but the
|
||||
cluster membership is essentially the same.
|
||||
|
||||
CONN-01 three-level parcellation (Phase 1 approximation):
|
||||
- Level 1: top_communities -- top 7 (Yeo-like) by member count.
|
||||
- Level 2: mid_regions -- community UUID -> member node UUIDs
|
||||
(Schaefer-scale 200-400 sub-parcellation is a Phase-2 refinement;
|
||||
for we expose the community -> members mapping).
|
||||
- Level 3: node_to_community -- every leaf record's community assignment.
|
||||
|
||||
CONN-04 refresh threshold:
|
||||
- needs_refresh(prior, current_Q) returns True iff |prior.Q - current_Q| > 0.05.
|
||||
The pipeline or session-start assembler decides when to re-run detect_communities
|
||||
based on this signal.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import numpy as np
|
||||
|
||||
from iai_mcp.graph import _HAS_IGRAPH, IGRAPH_THRESHOLD, MemoryGraph
|
||||
|
||||
# bootstrap thresholds
|
||||
SMALL_N_FLAT = 200
|
||||
MID_N_LEIDEN = 500
|
||||
MODULARITY_FLOOR = 0.2
|
||||
|
||||
# CONN-04 refresh trigger
|
||||
REFRESH_DELTA = 0.05
|
||||
|
||||
# stable-UUID cosine floor
|
||||
UUID_ROTATE_COSINE = 0.7
|
||||
|
||||
# CONN-01 level-1 cap (Yeo-like 7 networks)
|
||||
MAX_TOP_COMMUNITIES = 7
|
||||
|
||||
|
||||
@dataclass
|
||||
class CommunityAssignment:
|
||||
"""Output of detect_communities -- consumed by pipeline.pipeline_recall.
|
||||
|
||||
- node_to_community: leaf UUID -> community UUID
|
||||
- community_centroids: community UUID -> mean of member embeddings
|
||||
- modularity: Leiden Q (0.0 for flat)
|
||||
- backend: "flat" | "leiden-networkx" | "leiden-igraph"
|
||||
- top_communities: up to MAX_TOP_COMMUNITIES by member count (CONN-01 L1)
|
||||
- mid_regions: community UUID -> list of member leaf UUIDs (CONN-01 L2)
|
||||
"""
|
||||
|
||||
node_to_community: dict[UUID, UUID] = field(default_factory=dict)
|
||||
community_centroids: dict[UUID, list[float]] = field(default_factory=dict)
|
||||
modularity: float = 0.0
|
||||
backend: str = "flat"
|
||||
top_communities: list[UUID] = field(default_factory=list)
|
||||
mid_regions: dict[UUID, list[UUID]] = field(default_factory=dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- math helpers
|
||||
|
||||
|
||||
def _cosine(a: list[float], b: list[float]) -> float:
|
||||
av = np.asarray(a, dtype=np.float32)
|
||||
bv = np.asarray(b, dtype=np.float32)
|
||||
na = float(np.linalg.norm(av))
|
||||
nb = float(np.linalg.norm(bv))
|
||||
if na == 0 or nb == 0:
|
||||
return 0.0
|
||||
return float(np.dot(av, bv) / (na * nb))
|
||||
|
||||
|
||||
def _compute_centroid(embeddings: list[list[float]]) -> list[float]:
|
||||
if not embeddings:
|
||||
return []
|
||||
arr = np.asarray(embeddings, dtype=np.float32)
|
||||
centroid = arr.mean(axis=0)
|
||||
norm = float(np.linalg.norm(centroid))
|
||||
if norm > 0:
|
||||
centroid = centroid / norm
|
||||
return centroid.tolist()
|
||||
|
||||
|
||||
def _map_to_stable_uuids(
|
||||
raw_partition: dict[UUID, int],
|
||||
graph: MemoryGraph,
|
||||
prior: CommunityAssignment | None,
|
||||
) -> tuple[dict[UUID, UUID], dict[UUID, list[float]]]:
|
||||
"""assign UUIDs to raw integer community labels, reusing prior UUIDs
|
||||
when a new centroid matches a prior centroid with cosine >= UUID_ROTATE_COSINE.
|
||||
|
||||
Matching is greedy (descending best-match-first) and one-to-one: each prior
|
||||
UUID is claimed by at most one new community.
|
||||
"""
|
||||
# Group nodes by raw integer label.
|
||||
groups: dict[int, list[UUID]] = {}
|
||||
for node, grp in raw_partition.items():
|
||||
groups.setdefault(grp, []).append(node)
|
||||
|
||||
# Compute new centroids per group. Filter out nodes with no embedding
|
||||
# (e.g. sentinel UUIDs like PROFILE_SENTINEL) and zero-pad the remaining
|
||||
# members to the *current* store dim rather than a hardcoded 384d, so the
|
||||
# centroid input stays homogeneous after a 384d -> 1024d re-embed migration.
|
||||
new_centroids: dict[int, list[float]] = {}
|
||||
for grp, nodes in groups.items():
|
||||
valid = [e for n in nodes if (e := graph.get_embedding(n))]
|
||||
if not valid:
|
||||
continue
|
||||
dim = len(valid[0])
|
||||
embs = [graph.get_embedding(n) or [0.0] * dim for n in nodes]
|
||||
new_centroids[grp] = _compute_centroid(embs)
|
||||
|
||||
# Greedy one-to-one assignment: for each new group, pick the best unused
|
||||
# prior UUID with cosine >= UUID_ROTATE_COSINE.
|
||||
uuid_for_group: dict[int, UUID] = {}
|
||||
used_prior: set[UUID] = set()
|
||||
if prior:
|
||||
# Stable ordering: by group id ascending so tie-breaks are deterministic.
|
||||
for grp in sorted(new_centroids.keys()):
|
||||
cent = new_centroids[grp]
|
||||
best_prior: UUID | None = None
|
||||
best_sim: float = -1.0
|
||||
for prior_uuid, prior_cent in prior.community_centroids.items():
|
||||
if prior_uuid in used_prior:
|
||||
continue
|
||||
s = _cosine(cent, prior_cent)
|
||||
if s > best_sim:
|
||||
best_sim = s
|
||||
best_prior = prior_uuid
|
||||
if best_prior is not None and best_sim >= UUID_ROTATE_COSINE:
|
||||
uuid_for_group[grp] = best_prior
|
||||
used_prior.add(best_prior)
|
||||
|
||||
# Allocate fresh UUIDs for groups that didn't match any prior.
|
||||
for grp in groups:
|
||||
if grp not in uuid_for_group:
|
||||
uuid_for_group[grp] = uuid4()
|
||||
|
||||
# Build final maps.
|
||||
node_to_community: dict[UUID, UUID] = {}
|
||||
community_centroids: dict[UUID, list[float]] = {}
|
||||
for grp, nodes in groups.items():
|
||||
u = uuid_for_group[grp]
|
||||
community_centroids[u] = new_centroids[grp]
|
||||
for n in nodes:
|
||||
node_to_community[n] = u
|
||||
|
||||
return node_to_community, community_centroids
|
||||
|
||||
|
||||
# ------------------------------------------------------------- flat assignment
|
||||
|
||||
|
||||
def _flat_assignment(
|
||||
graph: MemoryGraph, prior: CommunityAssignment | None
|
||||
) -> CommunityAssignment:
|
||||
"""Single flat community covering every node."""
|
||||
nodes: list[UUID] = []
|
||||
valid_embs: list[list[float]] = []
|
||||
for node in graph._nx.nodes():
|
||||
u = UUID(node)
|
||||
nodes.append(u)
|
||||
emb = graph.get_embedding(u)
|
||||
if emb:
|
||||
valid_embs.append(emb)
|
||||
if not nodes:
|
||||
return CommunityAssignment(backend="flat")
|
||||
|
||||
# Zero-pad any sentinel nodes to the detected store dim so centroid math
|
||||
# stays homogeneous post-re-embed (was hardcoded 384d before 1024d support).
|
||||
dim = len(valid_embs[0]) if valid_embs else 0
|
||||
embs: list[list[float]] = []
|
||||
for node in graph._nx.nodes():
|
||||
u = UUID(node)
|
||||
emb = graph.get_embedding(u)
|
||||
embs.append(emb if emb else [0.0] * dim)
|
||||
centroid = _compute_centroid(embs) if dim else []
|
||||
|
||||
# Stable UUID across flat runs: reuse prior's single UUID if centroid matches.
|
||||
flat_uuid: UUID | None = None
|
||||
if prior and len(prior.community_centroids) == 1:
|
||||
prior_uuid, prior_cent = next(iter(prior.community_centroids.items()))
|
||||
if _cosine(centroid, prior_cent) >= UUID_ROTATE_COSINE:
|
||||
flat_uuid = prior_uuid
|
||||
if flat_uuid is None:
|
||||
flat_uuid = uuid4()
|
||||
|
||||
node_to_community = {n: flat_uuid for n in nodes}
|
||||
community_centroids = {flat_uuid: centroid}
|
||||
return CommunityAssignment(
|
||||
node_to_community=node_to_community,
|
||||
community_centroids=community_centroids,
|
||||
modularity=0.0,
|
||||
backend="flat",
|
||||
top_communities=[flat_uuid],
|
||||
mid_regions={flat_uuid: nodes},
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ leiden run
|
||||
|
||||
|
||||
def _run_leiden(graph: MemoryGraph) -> tuple[dict[UUID, int], float, str]:
|
||||
"""Run leidenalg on a NetworkX graph via an igraph mirror.
|
||||
|
||||
Returns (node_uuid -> int label, modularity Q, backend_label).
|
||||
Backend label reflects which library owns the hot path per D-04:
|
||||
"leiden-igraph" for N >= IGRAPH_THRESHOLD, "leiden-networkx" for smaller graphs
|
||||
(both internally use leidenalg since python-louvain is Louvain, not Leiden).
|
||||
Seed=42 for determinism across calls.
|
||||
"""
|
||||
import igraph as ig # local import so leiden dep is lazy
|
||||
import leidenalg
|
||||
|
||||
g = graph._nx
|
||||
nodes = list(g.nodes())
|
||||
idx = {n: i for i, n in enumerate(nodes)}
|
||||
edges = [(idx[u], idx[v]) for u, v in g.edges()]
|
||||
weights = [float(g[u][v].get("weight", 1.0)) for u, v in g.edges()]
|
||||
|
||||
ih = ig.Graph(n=len(nodes), edges=edges, directed=False)
|
||||
if weights:
|
||||
ih.es["weight"] = weights
|
||||
|
||||
part = leidenalg.find_partition(
|
||||
ih,
|
||||
leidenalg.ModularityVertexPartition,
|
||||
seed=42,
|
||||
weights="weight" if weights else None,
|
||||
)
|
||||
q = float(part.modularity)
|
||||
mapping = {
|
||||
UUID(nodes[i]): int(part.membership[i]) for i in range(len(nodes))
|
||||
}
|
||||
|
||||
# Backend label matches split even though both paths use leidenalg.
|
||||
if _HAS_IGRAPH and graph.node_count() >= IGRAPH_THRESHOLD:
|
||||
return mapping, q, "leiden-igraph"
|
||||
return mapping, q, "leiden-networkx"
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ public API
|
||||
|
||||
|
||||
def detect_communities(
|
||||
graph: MemoryGraph,
|
||||
prior: CommunityAssignment | None = None,
|
||||
) -> CommunityAssignment:
|
||||
"""D-05 bootstrap + stable UUIDs + CONN-01 three-level parcellation.
|
||||
|
||||
Empty graph -> empty CommunityAssignment(backend="flat").
|
||||
"""
|
||||
n = graph.node_count()
|
||||
if n == 0:
|
||||
return CommunityAssignment(backend="flat")
|
||||
if n < SMALL_N_FLAT:
|
||||
return _flat_assignment(graph, prior)
|
||||
|
||||
try:
|
||||
raw_partition, q, backend = _run_leiden(graph)
|
||||
except Exception:
|
||||
# Leiden unavailable or graph pathological -> degrade gracefully.
|
||||
return _flat_assignment(graph, prior)
|
||||
|
||||
# Mid-N guard: Leiden output only acceptable if Q >= 0.2.
|
||||
if n < MID_N_LEIDEN and q < MODULARITY_FLOOR:
|
||||
return _flat_assignment(graph, prior)
|
||||
|
||||
node_to_community, community_centroids = _map_to_stable_uuids(
|
||||
raw_partition, graph, prior
|
||||
)
|
||||
|
||||
# CONN-01 level 1: top 7 communities by member count.
|
||||
counts: dict[UUID, int] = {}
|
||||
for c in node_to_community.values():
|
||||
counts[c] = counts.get(c, 0) + 1
|
||||
top = sorted(counts.items(), key=lambda kv: kv[1], reverse=True)[
|
||||
:MAX_TOP_COMMUNITIES
|
||||
]
|
||||
top_communities = [u for u, _ in top]
|
||||
|
||||
# CONN-01 level 2 (mid-regions): community UUID -> member node UUIDs.
|
||||
mid_regions: dict[UUID, list[UUID]] = {}
|
||||
for node, comm in node_to_community.items():
|
||||
mid_regions.setdefault(comm, []).append(node)
|
||||
|
||||
return CommunityAssignment(
|
||||
node_to_community=node_to_community,
|
||||
community_centroids=community_centroids,
|
||||
modularity=q,
|
||||
backend=backend,
|
||||
top_communities=top_communities,
|
||||
mid_regions=mid_regions,
|
||||
)
|
||||
|
||||
|
||||
def needs_refresh(
|
||||
prior: CommunityAssignment, current_modularity: float
|
||||
) -> bool:
|
||||
"""CONN-04: refresh signal when |Δ modularity| > REFRESH_DELTA (0.05).
|
||||
|
||||
Consumer (session-start assembler / maintenance job) calls this on each
|
||||
new Leiden run; a True return triggers a re-assignment + cache invalidation.
|
||||
"""
|
||||
return abs(prior.modularity - current_modularity) > REFRESH_DELTA
|
||||
199
src/iai_mcp/compress.py
Normal file
199
src/iai_mcp/compress.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
"""TOK-04 LLMLingua-2 compression (Plan 02-04 Task 2, D-25).
|
||||
|
||||
Compression is allowed ONLY on retrieval views and summaries, NEVER on raw
|
||||
content. Enforcement lives in `is_compressible`:
|
||||
|
||||
Forbidden:
|
||||
- pinned records (includes L0 identity)
|
||||
- invariant_anchor records (s5_trust_score >= 0.9)
|
||||
- user-tagged raw: records (raw:en, raw:ru, ...)
|
||||
- normal episodic records (default reject; literal_surface is constitutional
|
||||
per MEM-01)
|
||||
|
||||
Allowed:
|
||||
- records tagged cls_summary (CLS consolidation output)
|
||||
- records tagged schema (LEARN-03 induction output)
|
||||
- records tagged session_summary
|
||||
|
||||
Runtime fallback: when `llmlingua` is not installed, `compress_llmlingua2`
|
||||
returns the input unchanged and emits an llm_health event. This keeps the
|
||||
Tier-0 path green on minimal installs (CI, fresh user machines).
|
||||
|
||||
Constants:
|
||||
- COMPRESSION_TARGET_L2 = 0.5 (community descriptors)
|
||||
- COMPRESSION_TARGET_SUMMARY = 0.3 (session summaries)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
|
||||
|
||||
# ratio targets.
|
||||
COMPRESSION_TARGET_L2 = 0.5
|
||||
COMPRESSION_TARGET_SUMMARY = 0.3
|
||||
|
||||
# threshold -- records at or above this trust score are invariant anchors.
|
||||
INVARIANT_TRUST_THRESHOLD = 0.9
|
||||
|
||||
|
||||
# ----------------------------------------------------------- scope gate
|
||||
|
||||
|
||||
def is_compressible(record) -> tuple[bool, str]:
|
||||
"""Return (allowed, reason) for a given MemoryRecord.
|
||||
|
||||
Reason is a short English diagnostic consumed only in tests / debug logs.
|
||||
"""
|
||||
if getattr(record, "pinned", False):
|
||||
return False, "pinned record (D-14 L0 / user-pinned)"
|
||||
|
||||
trust = getattr(record, "s5_trust_score", 0.5)
|
||||
try:
|
||||
if float(trust) >= INVARIANT_TRUST_THRESHOLD:
|
||||
return False, (
|
||||
f"invariant anchor (trust={float(trust):.2f} >= "
|
||||
f"{INVARIANT_TRUST_THRESHOLD}); forbids compression"
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
tags = getattr(record, "tags", None) or []
|
||||
for tag in tags:
|
||||
if tag.startswith("raw:"):
|
||||
return False, f"raw-tagged record ({tag}); user flagged as raw"
|
||||
|
||||
# Explicit allowlist.
|
||||
allow_tags = {"cls_summary", "schema", "session_summary"}
|
||||
for tag in tags:
|
||||
if tag in allow_tags:
|
||||
return True, ""
|
||||
|
||||
return False, "literal_surface constitutional (D-25 default deny)"
|
||||
|
||||
|
||||
# ----------------------------------------------------------- llmlingua loader
|
||||
|
||||
|
||||
_LLMLINGUA_LOCK = threading.Lock()
|
||||
_LLMLINGUA_CACHE: dict[str, Any] = {}
|
||||
|
||||
|
||||
def _load_llmlingua2():
|
||||
"""Lazy-load llmlingua's PromptCompressor (LLMLingua-2 model).
|
||||
|
||||
Returns the compressor instance on success; None if the package is absent
|
||||
or fails to instantiate. Callers log a fallback event and passthrough.
|
||||
"""
|
||||
with _LLMLINGUA_LOCK:
|
||||
if "instance" in _LLMLINGUA_CACHE:
|
||||
return _LLMLINGUA_CACHE["instance"]
|
||||
try:
|
||||
from llmlingua import PromptCompressor # type: ignore
|
||||
except Exception:
|
||||
_LLMLINGUA_CACHE["instance"] = None
|
||||
return None
|
||||
try:
|
||||
# Device auto-detection: CUDA if available (Linux GPU), else MPS on
|
||||
# Apple Silicon (torch.backends.mps), else CPU. llmlingua's default
|
||||
# assumes CUDA which breaks on macOS ARM64.
|
||||
import torch # type: ignore
|
||||
if torch.cuda.is_available():
|
||||
device_map = "cuda"
|
||||
elif getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
|
||||
device_map = "mps"
|
||||
else:
|
||||
device_map = "cpu"
|
||||
# microsoft/llmlingua-2-xlm-roberta-large-meetingbank (default in
|
||||
# llmlingua>=0.2). Although this compressor is multilingual-capable,
|
||||
# the IAI-MCP brain itself is English-only; the
|
||||
# multilingual support is incidental and only matters for the
|
||||
# opt-in bge-m3 path.
|
||||
compressor = PromptCompressor(
|
||||
model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
|
||||
use_llmlingua2=True,
|
||||
device_map=device_map,
|
||||
)
|
||||
except Exception:
|
||||
_LLMLINGUA_CACHE["instance"] = None
|
||||
return None
|
||||
_LLMLINGUA_CACHE["instance"] = compressor
|
||||
return compressor
|
||||
|
||||
|
||||
# ----------------------------------------------------------- core compression
|
||||
|
||||
|
||||
def compress_llmlingua2(
|
||||
text: str,
|
||||
target_ratio: float = 0.5,
|
||||
store=None,
|
||||
) -> str:
|
||||
"""Compress `text` to approximately `target_ratio` of original tokens.
|
||||
|
||||
On any failure (package missing, model load error, runtime exception):
|
||||
- Return `text` unchanged (passthrough).
|
||||
- If `store` is provided, emit an llm_health event of kind
|
||||
'compression_fallback' with severity='warning'.
|
||||
|
||||
scope is the caller's responsibility (is_compressible must be
|
||||
consulted BEFORE reaching this function).
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
compressor = _load_llmlingua2()
|
||||
if compressor is None:
|
||||
if store is not None:
|
||||
try:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "compress_llmlingua2",
|
||||
"tier": "fallback",
|
||||
"reason": "llmlingua package unavailable or model load failed",
|
||||
},
|
||||
severity="warning",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return text
|
||||
|
||||
try:
|
||||
result = compressor.compress_prompt(text, rate=float(target_ratio))
|
||||
if isinstance(result, dict):
|
||||
return str(result.get("compressed_prompt", text))
|
||||
return str(result)
|
||||
except Exception as exc: # pragma: no cover -- runtime failure passthrough
|
||||
if store is not None:
|
||||
try:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "compress_llmlingua2",
|
||||
"tier": "fallback",
|
||||
"error": str(exc),
|
||||
},
|
||||
severity="warning",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return text
|
||||
|
||||
|
||||
def compress_l2_descriptor(descriptor: str, store=None) -> str:
|
||||
"""Compress an L2 community descriptor (D-25 target ratio 0.5)."""
|
||||
return compress_llmlingua2(
|
||||
descriptor, target_ratio=COMPRESSION_TARGET_L2, store=store,
|
||||
)
|
||||
|
||||
|
||||
def compress_summary(summary: str, store=None) -> str:
|
||||
"""Compress a session summary (D-25 target ratio 0.3)."""
|
||||
return compress_llmlingua2(
|
||||
summary, target_ratio=COMPRESSION_TARGET_SUMMARY, store=store,
|
||||
)
|
||||
499
src/iai_mcp/concurrency.py
Normal file
499
src/iai_mcp/concurrency.py
Normal file
|
|
@ -0,0 +1,499 @@
|
|||
"""Phase 4 daemon concurrency primitives (DAEMON-04, DAEMON-05).
|
||||
|
||||
Persistent-fd flock wrapper. Hold one instance for process lifetime.
|
||||
fcntl.flock (NOT lockf) -- fd-close does not release (see apenwarr 2010, Pitfall 2).
|
||||
|
||||
Constitutional guard:
|
||||
- C1 HUMAN-FIRST: ProcessLock.try_acquire_exclusive is non-blocking; daemon
|
||||
yields immediately when any shared lockholder exists.
|
||||
- C-USER-CONSENT (formerly C2 per D7-16): the user_initiated_sleep
|
||||
branch of _dispatch_socket_request only sets pending flags after receiving
|
||||
an explicit consent payload from the wrapper; the FSM transition itself is
|
||||
performed by _tick_body, never by the dispatcher (C-DISPATCHER-FSM-ISOLATION).
|
||||
- C-DISPATCHER-FSM-ISOLATION (Phase 7 structural; supersedes the bare `C2`
|
||||
inline-comment shorthand previously used at the FSM-yield call sites): the
|
||||
socket dispatcher MUST NOT transition the FSM directly; it only sets pending
|
||||
flags consumed by _tick_body under the FSM lock. New socket_server
|
||||
inherits this invariant.
|
||||
- T-04-06 mitigation: flock is bound to process + open-file-description,
|
||||
so closing an unrelated fd (e.g. /etc/passwd) does NOT release our lock.
|
||||
- T-04-02 mitigation: cleanup_stale_socket + asyncio cleanup_socket kwarg
|
||||
survive SIGKILL-orphaned sockets.
|
||||
- T-04-07 mitigation: lock + socket created with mode 0o600 so cross-user
|
||||
access requires OS privilege escalation (out of scope).
|
||||
|
||||
This module has NO LLM code and NO paid-API env var references.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import errno
|
||||
import fcntl
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Awaitable, Callable
|
||||
|
||||
LOCK_PATH: Path = Path.home() / ".iai-mcp" / ".lock"
|
||||
SOCKET_PATH: Path = Path.home() / ".iai-mcp" / ".daemon.sock"
|
||||
|
||||
|
||||
class ProcessLock:
|
||||
"""Persistent-fd flock wrapper.
|
||||
|
||||
Hold one instance per process for the entire process lifetime.
|
||||
fcntl.flock (BSD) NOT lockf (POSIX) -- closing an unrelated fd does NOT
|
||||
release our lock (see apenwarr 2010, Pitfall 2).
|
||||
|
||||
Semantics:
|
||||
- acquire_shared(): blocking LOCK_SH (MCP pattern)
|
||||
- try_acquire_exclusive(): LOCK_EX | LOCK_NB (daemon heavy-op pattern)
|
||||
- holds_exclusive_nb(): cooperative-yield probe
|
||||
- release(): LOCK_UN (release without closing fd)
|
||||
- close(): os.close() the fd (shutdown only)
|
||||
"""
|
||||
|
||||
def __init__(self, path: Path = LOCK_PATH) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# O_CREAT so lock file is created if missing; mode 0o600 keeps it user-only.
|
||||
self._fd: int | None = os.open(path, os.O_RDWR | os.O_CREAT, 0o600)
|
||||
# Ensure mode is actually 0o600 even if umask altered it on create.
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
self._path = path
|
||||
|
||||
def acquire_shared(self) -> None:
|
||||
"""Blocking LOCK_SH. MCP sessions call this at session start."""
|
||||
if self._fd is None:
|
||||
raise RuntimeError("ProcessLock closed; cannot acquire")
|
||||
fcntl.flock(self._fd, fcntl.LOCK_SH)
|
||||
|
||||
def try_acquire_exclusive(self) -> bool:
|
||||
"""Non-blocking LOCK_EX | LOCK_NB.
|
||||
|
||||
Returns True if acquired, False if any shared holder blocks us.
|
||||
Daemon calls this before heavy ops; False -> yield to MCP.
|
||||
"""
|
||||
if self._fd is None:
|
||||
return False
|
||||
try:
|
||||
fcntl.flock(self._fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
return True
|
||||
except OSError as exc:
|
||||
if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK):
|
||||
return False
|
||||
raise
|
||||
|
||||
def holds_exclusive_nb(self) -> bool:
|
||||
"""D-06 cooperative-yield probe.
|
||||
|
||||
Non-blocking check: do we still hold the exclusive lock?
|
||||
|
||||
Returns True if our fd has the exclusive lock. Returns False if
|
||||
another process (e.g., MCP) acquired a shared lock while we were
|
||||
working between REM cycles.
|
||||
|
||||
Implementation: fcntl.flock with LOCK_EX | LOCK_NB on our existing fd.
|
||||
On Linux/macOS, re-acquiring an already-held lock is a no-op success.
|
||||
On contention (shared lock held by another process), raises BlockingIOError
|
||||
which we catch and translate to False. EWOULDBLOCK/EAGAIN may surface as
|
||||
OSError on some platforms -- caught the same way.
|
||||
"""
|
||||
if self._fd is None:
|
||||
return False
|
||||
try:
|
||||
fcntl.flock(self._fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
return True
|
||||
except BlockingIOError:
|
||||
return False
|
||||
except OSError as exc:
|
||||
if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK):
|
||||
return False
|
||||
raise
|
||||
|
||||
def release(self) -> None:
|
||||
"""LOCK_UN: release lock but keep fd open for later reacquisition."""
|
||||
if self._fd is None:
|
||||
return
|
||||
fcntl.flock(self._fd, fcntl.LOCK_UN)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close fd. Only call at process shutdown -- closing releases the lock."""
|
||||
if self._fd is not None:
|
||||
try:
|
||||
os.close(self._fd)
|
||||
finally:
|
||||
self._fd = None
|
||||
|
||||
|
||||
def cleanup_stale_socket(path: Path = SOCKET_PATH) -> None:
|
||||
"""Remove a stale socket file left over from SIGKILL-orphaned daemon.
|
||||
|
||||
Pitfall 10 mitigation: the in-process case is handled either by the
|
||||
3.13+ kwarg (see serve_control_socket) or by the 3.12 finally-block
|
||||
emulation, but a prior daemon killed with SIGKILL never got to run its
|
||||
cleanup. Call this BEFORE the server binds.
|
||||
"""
|
||||
try:
|
||||
path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError:
|
||||
# Path may be a non-socket file -- still try to unlink. If even that
|
||||
# fails (e.g. permission), let asyncio surface the EADDRINUSE.
|
||||
try:
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _validate_socket_message(req: dict) -> tuple[bool, str | None]:
|
||||
"""Per-type schema validation (ASVS V5).
|
||||
|
||||
Returns (ok, error_message). `req` must already be known to be a dict.
|
||||
"""
|
||||
req_type = req.get("type")
|
||||
if not isinstance(req_type, str):
|
||||
return False, "type must be a string"
|
||||
|
||||
if req_type == "status":
|
||||
# No required fields.
|
||||
return True, None
|
||||
|
||||
if req_type == "user_initiated_sleep":
|
||||
reason = req.get("reason")
|
||||
ts = req.get("ts")
|
||||
if not isinstance(reason, str):
|
||||
return False, "reason must be a string"
|
||||
if not isinstance(ts, str):
|
||||
return False, "ts must be a string"
|
||||
return True, None
|
||||
|
||||
if req_type in ("force_wake", "force_rem"):
|
||||
ts = req.get("ts")
|
||||
if not isinstance(ts, str):
|
||||
return False, "ts must be a string"
|
||||
return True, None
|
||||
|
||||
if req_type in ("pause", "resume"):
|
||||
# pause may optionally carry `seconds`; we don't persist it as a timer
|
||||
# (the flag is binary) but we DO validate the type if supplied.
|
||||
if "seconds" in req:
|
||||
seconds = req.get("seconds")
|
||||
if not isinstance(seconds, int) or isinstance(seconds, bool):
|
||||
return False, "seconds must be an int"
|
||||
return True, None
|
||||
|
||||
# TOK-14 / D5-05: 7th message type `session_open`.
|
||||
# Both session_id and ts are OPTIONAL; when supplied, they must be strings.
|
||||
# Absence is tolerated so the TS wrapper can emit a bare ping on MCP boot
|
||||
# without stalling on id/ts bookkeeping.
|
||||
if req_type == "session_open":
|
||||
if "session_id" in req and not isinstance(req["session_id"], str):
|
||||
return False, "session_id must be a string"
|
||||
if "ts" in req and not isinstance(req["ts"], str):
|
||||
return False, "ts must be a string"
|
||||
return True, None
|
||||
|
||||
# Unknown types are not rejected at validation time; the dispatcher
|
||||
# returns a structured unknown_message_type response so the caller sees
|
||||
# a different reason code from "invalid_message".
|
||||
return True, None
|
||||
|
||||
|
||||
async def _dispatch_socket_request(
|
||||
req: dict,
|
||||
store: Any,
|
||||
lock: ProcessLock,
|
||||
state: dict,
|
||||
) -> dict:
|
||||
"""Default dispatcher for NDJSON socket requests.
|
||||
|
||||
Handles seven message types; mutates `state` in-place and persists via
|
||||
`save_state` when the message changes scheduler control flags. The
|
||||
dispatcher thread NEVER transitions the FSM directly
|
||||
(C-DISPATCHER-FSM-ISOLATION; renamed from bare `C2` per D7-16) --
|
||||
it only sets pending flags that `_tick_body` reads under the FSM lock.
|
||||
|
||||
Handled types:
|
||||
- status -> state snapshot including version
|
||||
- user_initiated_sleep -> set user_sleep_request pending flag
|
||||
- force_wake -> set force_wake_request pending flag
|
||||
- force_rem -> set force_rem_request pending flag
|
||||
- pause -> scheduler_paused=True
|
||||
- resume -> scheduler_paused=False
|
||||
- session_open -> set first_turn_pending + hippea_cascade_request
|
||||
(Plan 05-04 TOK-14 / D5-05)
|
||||
- any other -> {"ok": False, "reason": "unknown_message_type"}
|
||||
"""
|
||||
# Reject non-dict requests (defence-in-depth; caller already json.loaded).
|
||||
if not isinstance(req, dict):
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "invalid_message",
|
||||
"error": "request must be a JSON object",
|
||||
}
|
||||
|
||||
# Per-type schema validation (ASVS V5).
|
||||
ok, err = _validate_socket_message(req)
|
||||
if not ok:
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "invalid_message",
|
||||
"error": err or "schema_validation_failed",
|
||||
}
|
||||
|
||||
req_type = req.get("type")
|
||||
|
||||
# Lazy imports so test monkeypatches of STATE_PATH (via daemon_state) and
|
||||
# __version__ (via iai_mcp) always resolve to the current module state.
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from iai_mcp import __version__ as pkg_version
|
||||
from iai_mcp.daemon_state import save_state
|
||||
|
||||
# -------------------------------------------------------- status snapshot
|
||||
if req_type == "status":
|
||||
fsm_state = state.get("fsm_state", "WAKE")
|
||||
started_at = state.get("daemon_started_at")
|
||||
uptime_sec: float | None = None
|
||||
if started_at:
|
||||
try:
|
||||
start_dt = datetime.fromisoformat(started_at)
|
||||
uptime_sec = (datetime.now(timezone.utc) - start_dt).total_seconds()
|
||||
except (TypeError, ValueError):
|
||||
uptime_sec = None
|
||||
|
||||
# Truncate pending_digest to the top-level counters for socket
|
||||
# transport; the full digest can be multi-KB once insights are baked.
|
||||
pending_digest = state.get("pending_digest")
|
||||
if isinstance(pending_digest, dict):
|
||||
truncated_digest = {
|
||||
"rem_cycles_completed": pending_digest.get("rem_cycles_completed", 0),
|
||||
"episodes_processed": pending_digest.get("episodes_processed", 0),
|
||||
"schemas_induced_tier0": pending_digest.get(
|
||||
"schemas_induced_tier0", 0,
|
||||
),
|
||||
"claude_call_used": pending_digest.get("claude_call_used", False),
|
||||
}
|
||||
else:
|
||||
truncated_digest = None
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
# Backwards-compat key used by tests/test_concurrency.py Test 6.
|
||||
"state": fsm_state,
|
||||
"uptime_sec": uptime_sec,
|
||||
# Plan 04-gap-1 additions:
|
||||
"version": pkg_version,
|
||||
"fsm_state": fsm_state,
|
||||
"last_tick_at": state.get("last_tick_at"),
|
||||
"quiet_window": state.get("quiet_window"),
|
||||
"pending_digest": truncated_digest,
|
||||
"daemon_started_at": started_at,
|
||||
"scheduler_paused": bool(state.get("scheduler_paused", False)),
|
||||
}
|
||||
|
||||
# -------------------------------------------------- user_initiated_sleep
|
||||
if req_type == "user_initiated_sleep":
|
||||
current_fsm = state.get("fsm_state", "WAKE")
|
||||
if current_fsm in ("SLEEP", "DREAMING", "TRANSITIONING"):
|
||||
return {"ok": False, "reason": "already_sleeping"}
|
||||
|
||||
# Clip reason to 500 chars (ASVS V5 output hardening mirror).
|
||||
reason = str(req.get("reason", ""))[:500]
|
||||
ts = str(req.get("ts", ""))
|
||||
state["user_sleep_request"] = {
|
||||
"reason": reason,
|
||||
"ts": ts,
|
||||
"pending": True,
|
||||
}
|
||||
try:
|
||||
save_state(state)
|
||||
except Exception as exc: # noqa: BLE001 -- socket must never crash daemon
|
||||
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||||
# Tell the caller we queued the transition; the scheduler owns the FSM
|
||||
# and will move WAKE->TRANSITIONING->SLEEP on the next tick
|
||||
# (C-DISPATCHER-FSM-ISOLATION; renamed from bare `C2` per D7-16).
|
||||
return {"ok": True, "state": "TRANSITIONING"}
|
||||
|
||||
# ---------------------------------------------------------- force_wake
|
||||
if req_type == "force_wake":
|
||||
ts = str(req.get("ts", ""))
|
||||
state["force_wake_request"] = {"ts": ts, "pending": True}
|
||||
try:
|
||||
save_state(state)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||||
return {"ok": True, "reason": "wake_queued"}
|
||||
|
||||
# ----------------------------------------------------------- force_rem
|
||||
if req_type == "force_rem":
|
||||
ts = str(req.get("ts", ""))
|
||||
state["force_rem_request"] = {"ts": ts, "pending": True}
|
||||
try:
|
||||
save_state(state)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||||
return {"ok": True, "reason": "rem_queued"}
|
||||
|
||||
# --------------------------------------------------------- pause/resume
|
||||
if req_type == "pause":
|
||||
state["scheduler_paused"] = True
|
||||
try:
|
||||
save_state(state)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||||
return {"ok": True, "paused": True}
|
||||
|
||||
if req_type == "resume":
|
||||
state["scheduler_paused"] = False
|
||||
try:
|
||||
save_state(state)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||||
return {"ok": True, "paused": False}
|
||||
|
||||
# ---------------------------------------------------------- session_open
|
||||
# TOK-14 / D5-05: 7th message type. Sets two flags:
|
||||
# - first_turn_pending[session_id] = True -> consumed by core's
|
||||
# _first_turn_recall_hook exactly once per session.
|
||||
# - hippea_cascade_request {pending=True, session_id, ts} -> polled by
|
||||
# daemon._hippea_cascade_loop which pre-warms the LRU with records
|
||||
# from the top-K salient communities (Van de Cruys HIPPEA operational
|
||||
# form).
|
||||
# Both flags are idempotent under a re-emit: set_overwrite is intentional
|
||||
# so a client that retries session_open gets a fresh cascade.
|
||||
if req_type == "session_open":
|
||||
# Clip session_id to 128 chars (ASVS V5 output hardening — matches
|
||||
# user_initiated_sleep.reason clip at 500).
|
||||
session_id = str(req.get("session_id", ""))[:128]
|
||||
ts = str(req.get("ts", ""))
|
||||
state["last_session_open"] = {"session_id": session_id, "ts": ts}
|
||||
# first-turn hook flag. Co-exists with existing dict form
|
||||
# written by daemon_state.mark_session_opened.
|
||||
first_turn = state.setdefault("first_turn_pending", {})
|
||||
now_iso = datetime.now(timezone.utc).isoformat()
|
||||
if isinstance(first_turn, dict):
|
||||
first_turn[session_id] = now_iso
|
||||
else:
|
||||
# Legacy scalar-bool state -> upgrade in place to the dict form.
|
||||
state["first_turn_pending"] = {session_id: now_iso}
|
||||
# cascade flag.
|
||||
state["hippea_cascade_request"] = {
|
||||
"session_id": session_id,
|
||||
"ts": ts,
|
||||
"pending": True,
|
||||
}
|
||||
try:
|
||||
save_state(state)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||||
return {"ok": True, "reason": "session_open_queued"}
|
||||
|
||||
# ------------------------------------------------------------ unknown
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "unknown_message_type",
|
||||
"type": req_type,
|
||||
}
|
||||
|
||||
|
||||
async def serve_control_socket(
|
||||
store: Any,
|
||||
lock: ProcessLock,
|
||||
state: dict,
|
||||
shutdown: asyncio.Event,
|
||||
*,
|
||||
dispatcher: Callable[[dict], Awaitable[dict]] | None = None,
|
||||
socket_path: Path = SOCKET_PATH,
|
||||
) -> None:
|
||||
"""Unix socket NDJSON server at ~/.iai-mcp/.daemon.sock.
|
||||
|
||||
Protocol: each line from client is a JSON request; each response is one
|
||||
JSON line back. The cleanup_socket kwarg (Python 3.13+) auto-removes the
|
||||
socket file on server shutdown; on 3.12 we emulate in the finally-block.
|
||||
Stale-socket pre-cleanup protects against SIGKILL-orphaned files.
|
||||
|
||||
Permissions: chmod 0o600 immediately after bind so cross-user access
|
||||
requires privilege escalation (T-04-04 accepted risk).
|
||||
|
||||
When dispatcher is provided it receives only the parsed request dict and
|
||||
must return a dict. When None, the default _dispatch_socket_request is used.
|
||||
"""
|
||||
cleanup_stale_socket(socket_path)
|
||||
# Ensure parent dir exists (Path.home() / .iai-mcp could be first-run).
|
||||
socket_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Python 3.13 added a `cleanup_socket` kwarg to the event-loop unix server
|
||||
# that auto-removes the socket file on shutdown. On 3.12 we emulate the
|
||||
# same behaviour by unlinking in the finally-block below. See:
|
||||
# https://docs.python.org/3.13/library/asyncio-stream.html
|
||||
_supports_cleanup_socket = False
|
||||
try:
|
||||
import inspect as _inspect
|
||||
import asyncio as _asyncio_mod
|
||||
_loop_sig = _inspect.signature(
|
||||
_asyncio_mod.get_event_loop_policy().new_event_loop().create_unix_server
|
||||
)
|
||||
_supports_cleanup_socket = "cleanup_socket" in _loop_sig.parameters
|
||||
except Exception:
|
||||
_supports_cleanup_socket = False
|
||||
|
||||
async def handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
||||
try:
|
||||
line = await reader.readline()
|
||||
if not line:
|
||||
return
|
||||
try:
|
||||
req = json.loads(line)
|
||||
except (TypeError, ValueError) as exc:
|
||||
writer.write((json.dumps({"error": f"invalid_json: {exc}"}) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
return
|
||||
try:
|
||||
if dispatcher is not None:
|
||||
resp = await dispatcher(req)
|
||||
else:
|
||||
resp = await _dispatch_socket_request(req, store, lock, state)
|
||||
except Exception as exc: # noqa: BLE001 -- socket must never crash daemon
|
||||
resp = {"error": str(exc)}
|
||||
writer.write((json.dumps(resp) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
finally:
|
||||
try:
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build server kwargs. The native 3.13+ behaviour is opted in via
|
||||
# `cleanup_socket=True`; on 3.12 the finally-block emulates the same unlink
|
||||
# so a subsequent daemon boot cannot hit EADDRINUSE.
|
||||
_server_kwargs = {"cleanup_socket": True} if _supports_cleanup_socket else {}
|
||||
server = await asyncio.start_unix_server(
|
||||
handle, path=str(socket_path), **_server_kwargs,
|
||||
)
|
||||
# chmod 0o600 immediately after bind (T-04-07 mitigation).
|
||||
try:
|
||||
os.chmod(str(socket_path), 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
async with server:
|
||||
await shutdown.wait()
|
||||
finally:
|
||||
# Python 3.12 cleanup-socket emulation: remove the socket file on
|
||||
# shutdown so the next daemon boot doesn't hit EADDRINUSE. 3.13+ does
|
||||
# this natively inside the server.__aexit__.
|
||||
if not _supports_cleanup_socket:
|
||||
try:
|
||||
socket_path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
1332
src/iai_mcp/core.py
Normal file
1332
src/iai_mcp/core.py
Normal file
File diff suppressed because it is too large
Load diff
432
src/iai_mcp/crypto.py
Normal file
432
src/iai_mcp/crypto.py
Normal file
|
|
@ -0,0 +1,432 @@
|
|||
"""Plan 02-08 / AES-256-GCM encryption-at-rest primitives + file-backed key storage.
|
||||
|
||||
Ciphertext format (string-encoded for LanceDB string-column storage):
|
||||
|
||||
iai:enc:v1:<base64(nonce || ciphertext || tag)>
|
||||
|
||||
Components:
|
||||
- prefix "iai:enc:v1:" (identifies encrypted payload; enables mixed
|
||||
plaintext/ciphertext coexistence during v2->v3 migration)
|
||||
- nonce 12 random bytes (AES-GCM standard IV length)
|
||||
- ciphertext+tag AESGCM.encrypt(nonce, plaintext_utf8, associated_data) output;
|
||||
the 16-byte GCM authentication tag is appended by AESGCM.
|
||||
|
||||
Associated data (AD) is the UUID bytes of the record id: this binds the
|
||||
ciphertext to its row so an attacker with write access cannot swap ciphertext
|
||||
values between rows (T-02-08-01 tampering mitigation).
|
||||
|
||||
Key storage (Phase 07.10 — file-backed primary, no keyring at module scope):
|
||||
- Primary: a 32-raw-byte file at ``{store_root}/.crypto.key`` (default
|
||||
``~/.iai-mcp/.crypto.key``), mode ``0o600``, owner-uid validated. Resolved
|
||||
via the ``store_root`` constructor argument (single-source path, threaded
|
||||
from ``MemoryStore.root`` — see D-03). When ``store_root`` is
|
||||
``None`` the path is read lazily from ``IAI_MCP_STORE`` env or the
|
||||
``DEFAULT_STORAGE_PATH`` (``~/.iai-mcp``).
|
||||
- Fallback: passphrase via ``IAI_MCP_CRYPTO_PASSPHRASE`` env var (CI / fresh
|
||||
installs / non-interactive environments). Key derived via PBKDF2-HMAC-
|
||||
SHA256 with 600_000 iterations (OWASP 2023 recommendation) and a per-user
|
||||
salt (``sha256(user_id)[:16]``). Deterministic given passphrase + user_id,
|
||||
so the same machine survives reboots without persisting anything new.
|
||||
- If neither path resolves, ``CryptoKey.get_or_create()`` raises
|
||||
``CryptoKeyError`` with a dual-remediation message naming
|
||||
``iai-mcp crypto migrate-to-file`` (existing macOS Keychain key from before
|
||||
Phase 07.10), ``iai-mcp crypto init`` (fresh install), and the
|
||||
``IAI_MCP_CRYPTO_PASSPHRASE`` env var (CI / non-interactive). No silent
|
||||
key generation — that would render existing data unreadable.
|
||||
|
||||
The migration CLI command ``iai-mcp crypto migrate-to-file`` keeps
|
||||
a function-local ``import keyring`` to read an existing macOS Keychain key
|
||||
once and write it to the file backend; this module never imports ``keyring``
|
||||
at file scope, so daemon boot under launchd does not block on the Keychain
|
||||
ACL prompt (Phase 07.10 / D-12).
|
||||
|
||||
Module contract:
|
||||
- encrypt_field(plaintext, key, associated_data) -> str (prefixed base64)
|
||||
- decrypt_field(ciphertext_b64, key, associated_data) -> str
|
||||
- is_encrypted(field) -> bool
|
||||
- CryptoKey(user_id, store_root=None).get_or_create() / rotate() / delete()
|
||||
- derive_key_from_passphrase(passphrase, salt) -> bytes (32)
|
||||
|
||||
Constitutional fit:
|
||||
- D-STORAGE: no keys stored in the LanceDB store; only ciphertext.
|
||||
- D-GUARD: file backend missing degrades to passphrase fallback; absent both,
|
||||
refusal is loud with an actionable error pointing at both remediation paths.
|
||||
- encryption is lossless -- decrypt(encrypt(x)) == x byte-for-byte.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import os
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
||||
|
||||
|
||||
# Constitutional constants (module-scope for grep-discoverability).
|
||||
CIPHERTEXT_PREFIX: str = "iai:enc:v1:"
|
||||
NONCE_BYTES: int = 12 # AES-GCM standard IV length
|
||||
KEY_BYTES: int = 32 # 256-bit key
|
||||
PBKDF2_ITERATIONS: int = 600_000 # OWASP 2023 minimum for PBKDF2-HMAC-SHA256
|
||||
SERVICE_NAME_DEFAULT: str = "iai-mcp"
|
||||
|
||||
# Default storage root mirrors store.DEFAULT_STORAGE_PATH so a CryptoKey that
|
||||
# is constructed without a ``store_root`` argument resolves to the same
|
||||
# location MemoryStore would have used. Kept as a module-private to avoid
|
||||
# importing store.py here (would create a circular import).
|
||||
_DEFAULT_STORE_ROOT: Path = Path.home() / ".iai-mcp"
|
||||
_KEY_FILE_NAME: str = ".crypto.key"
|
||||
|
||||
|
||||
class CryptoKeyError(RuntimeError):
|
||||
"""Raised when a CryptoKey cannot be loaded or created.
|
||||
|
||||
Typical triggers:
|
||||
- The key file exists at the resolved path but is unreadable, has an
|
||||
insecure mode, is owned by a different uid, or has the wrong length.
|
||||
- Neither a key file NOR ``IAI_MCP_CRYPTO_PASSPHRASE`` is present;
|
||||
``MemoryStore`` surfaces the error so the daemon refuses to start with
|
||||
a clear actionable message instead of silently proceeding without
|
||||
encryption (Phase 07.10 D-04).
|
||||
"""
|
||||
|
||||
|
||||
def is_encrypted(field: Optional[str]) -> bool:
|
||||
"""Cheap prefix check supporting mixed-plaintext/ciphertext coexistence.
|
||||
|
||||
Returns True only when `field` is a non-empty string that starts with the
|
||||
exact version prefix `iai:enc:v1:`. Used by:
|
||||
- store._decrypt_fields to know whether to attempt decryption
|
||||
- migrate_encryption_v2_to_v3 to skip already-encrypted rows
|
||||
"""
|
||||
if not field or not isinstance(field, str):
|
||||
return False
|
||||
return field.startswith(CIPHERTEXT_PREFIX)
|
||||
|
||||
|
||||
def encrypt_field(
|
||||
plaintext: str,
|
||||
key: bytes,
|
||||
associated_data: bytes = b"",
|
||||
) -> str:
|
||||
"""AES-256-GCM encrypt a UTF-8 string; return prefixed base64 ciphertext.
|
||||
|
||||
The nonce is generated randomly with secrets.token_bytes (not os.urandom
|
||||
for slight additional entropy guarantees). A fresh nonce is REQUIRED for
|
||||
every call with a given key -- reusing a nonce with AES-GCM breaks the
|
||||
security of both messages.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
plaintext:
|
||||
Any UTF-8 string (including empty string). Cyrillic / CJK / Arabic
|
||||
preserved byte-for-byte.
|
||||
key:
|
||||
32-byte (256-bit) key. Typically sourced from CryptoKey.get_or_create().
|
||||
associated_data:
|
||||
Arbitrary bytes that are authenticated but not encrypted. In this
|
||||
codebase: the record id in UUID-string form (binds ciphertext to row).
|
||||
|
||||
Returns
|
||||
-------
|
||||
str: "iai:enc:v1:" + base64(nonce || ciphertext || tag)
|
||||
"""
|
||||
if len(key) != KEY_BYTES:
|
||||
raise ValueError(f"key must be {KEY_BYTES} bytes (got {len(key)})")
|
||||
aesgcm = AESGCM(key)
|
||||
nonce = secrets.token_bytes(NONCE_BYTES)
|
||||
ct_with_tag = aesgcm.encrypt(
|
||||
nonce, plaintext.encode("utf-8"), associated_data or None
|
||||
)
|
||||
payload = nonce + ct_with_tag
|
||||
return CIPHERTEXT_PREFIX + base64.b64encode(payload).decode("ascii")
|
||||
|
||||
|
||||
def decrypt_field(
|
||||
ciphertext_b64: str,
|
||||
key: bytes,
|
||||
associated_data: bytes = b"",
|
||||
) -> str:
|
||||
"""Decrypt a prefixed base64 AES-256-GCM payload back to a UTF-8 string.
|
||||
|
||||
Raises cryptography.exceptions.InvalidTag on:
|
||||
- Wrong key
|
||||
- Tampered ciphertext (single-bit flip in nonce / ct / tag)
|
||||
- Mismatched associated_data (even one byte off)
|
||||
|
||||
Raises ValueError if the field doesn't carry the iai:enc:v1: prefix -- the
|
||||
caller should have guarded with is_encrypted() first.
|
||||
"""
|
||||
if not is_encrypted(ciphertext_b64):
|
||||
raise ValueError("field is not iai:enc:v1:-prefixed ciphertext")
|
||||
if len(key) != KEY_BYTES:
|
||||
raise ValueError(f"key must be {KEY_BYTES} bytes (got {len(key)})")
|
||||
payload_b64 = ciphertext_b64[len(CIPHERTEXT_PREFIX):]
|
||||
payload = base64.b64decode(payload_b64)
|
||||
if len(payload) < NONCE_BYTES + 16: # nonce + min GCM tag
|
||||
raise ValueError("ciphertext payload too short")
|
||||
nonce = payload[:NONCE_BYTES]
|
||||
ct_with_tag = payload[NONCE_BYTES:]
|
||||
aesgcm = AESGCM(key)
|
||||
plaintext_bytes = aesgcm.decrypt(
|
||||
nonce, ct_with_tag, associated_data or None
|
||||
)
|
||||
return plaintext_bytes.decode("utf-8")
|
||||
|
||||
|
||||
def derive_key_from_passphrase(passphrase: str, salt: bytes) -> bytes:
|
||||
"""PBKDF2-HMAC-SHA256 key derivation for the passphrase-fallback path.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
passphrase:
|
||||
User-supplied passphrase (via IAI_MCP_CRYPTO_PASSPHRASE env var in the
|
||||
current design -- first-run prompt is future work when we have a CLI
|
||||
interaction point).
|
||||
salt:
|
||||
16+ bytes of salt. In practice the CryptoKey fallback uses
|
||||
sha256(user_id)[:16] so the derived key is deterministic per
|
||||
(passphrase, user_id) pair on a given machine.
|
||||
|
||||
Returns 32 bytes (256-bit) suitable for AESGCM.
|
||||
"""
|
||||
if len(salt) < 16:
|
||||
raise ValueError(f"salt must be at least 16 bytes (got {len(salt)})")
|
||||
kdf = PBKDF2HMAC(
|
||||
algorithm=hashes.SHA256(),
|
||||
length=KEY_BYTES,
|
||||
salt=salt,
|
||||
iterations=PBKDF2_ITERATIONS,
|
||||
)
|
||||
return kdf.derive(passphrase.encode("utf-8"))
|
||||
|
||||
|
||||
class CryptoKey:
|
||||
"""File-backed 256-bit AES key with passphrase fallback.
|
||||
|
||||
redesign:
|
||||
File backend at ``{store_root}/.crypto.key`` (32 raw bytes, mode
|
||||
``0o600``, owner-uid validated) is the primary. Passphrase via
|
||||
``IAI_MCP_CRYPTO_PASSPHRASE`` is the second-tier fallback. If neither
|
||||
resolves, ``get_or_create()`` raises ``CryptoKeyError`` with an
|
||||
actionable error message naming both remediation paths plus
|
||||
``iai-mcp crypto migrate-to-file`` (one-time migration of an existing
|
||||
Keychain key) and ``iai-mcp crypto init`` (fresh install).
|
||||
|
||||
Usage:
|
||||
ck = CryptoKey(user_id="default", store_root=Path("~/.iai-mcp"))
|
||||
key = ck.get_or_create() # 32 bytes; reads from file or falls back
|
||||
# to passphrase
|
||||
# ...
|
||||
new_key = ck.rotate() # writes a fresh key file (atomic temp+rename);
|
||||
# caller is responsible for re-encrypting data
|
||||
ck.delete() # remove the key file (test teardown / uninstall)
|
||||
|
||||
Multi-user ready: each ``user_id`` derives its own passphrase salt
|
||||
(``sha256(user_id)[:16]``). The current product ships a single
|
||||
``user_id="default"`` but the architecture supports per-user isolation for
|
||||
future multi-tenant deployments. (The file backend itself is currently
|
||||
single-tenant — one ``.crypto.key`` per store root.)
|
||||
|
||||
Thread-safety: instance-level ``_cached_key`` hides repeated
|
||||
``get_or_create()`` calls from the file backend (one read per process
|
||||
lifetime, not per call).
|
||||
"""
|
||||
|
||||
SERVICE_NAME: str = SERVICE_NAME_DEFAULT
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_id: str = "default",
|
||||
store_root: Path | None = None,
|
||||
) -> None:
|
||||
self.user_id = user_id
|
||||
self.store_root: Path | None = store_root
|
||||
self._cached_key: Optional[bytes] = None
|
||||
|
||||
# ---------------------------------------------------------------- helpers
|
||||
|
||||
def _passphrase_salt(self) -> bytes:
|
||||
"""Per-user salt for the passphrase fallback; deterministic across runs."""
|
||||
return hashlib.sha256(self.user_id.encode("utf-8")).digest()[:16]
|
||||
|
||||
def _key_file_path(self) -> Path:
|
||||
"""Resolve ``{store_root}/.crypto.key`` (Phase 07.10 D-03).
|
||||
|
||||
Lazy resolution: if ``self.store_root`` was not supplied at
|
||||
construction, read ``IAI_MCP_STORE`` env or fall back to the project
|
||||
default ``~/.iai-mcp`` — the same precedence ``MemoryStore.__init__``
|
||||
uses. Resolving here (not in ``__init__``) lets a test set
|
||||
``IAI_MCP_STORE`` after a CryptoKey instance was already created
|
||||
without the kwarg.
|
||||
"""
|
||||
if self.store_root is not None:
|
||||
root = Path(self.store_root)
|
||||
else:
|
||||
env_path = os.environ.get("IAI_MCP_STORE")
|
||||
root = Path(env_path) if env_path else _DEFAULT_STORE_ROOT
|
||||
return root / _KEY_FILE_NAME
|
||||
|
||||
def _try_file_get(self) -> Optional[bytes]:
|
||||
"""Return 32 raw bytes from the key file; ``None`` if the file is absent.
|
||||
|
||||
strict validation:
|
||||
- mode strictly ``0o600`` — refuse if any group/world bits are set
|
||||
(``mode & 0o077 != 0``) with ``CryptoKeyError("...insecure mode...")``
|
||||
- ``st_uid == os.geteuid()`` — refuse files owned by a different user
|
||||
with ``CryptoKeyError("...uid...")``
|
||||
- file length exactly ``KEY_BYTES`` — refuse with
|
||||
``CryptoKeyError("...wrong length...")``
|
||||
|
||||
Each rejection emits a distinct error message so misconfigurations are
|
||||
diagnosable at a glance.
|
||||
"""
|
||||
path = self._key_file_path()
|
||||
if not path.exists():
|
||||
return None
|
||||
# Use ``os.stat`` rather than ``Path.stat`` so test harnesses can
|
||||
# monkeypatch ``os.stat`` to simulate foreign-uid scenarios at the
|
||||
# syscall boundary (Phase 07.10 W1 case 4 path-scoped fake stat).
|
||||
st = os.stat(path)
|
||||
# Mode check: owner-only bits permitted.
|
||||
if st.st_mode & 0o077 != 0:
|
||||
raise CryptoKeyError(
|
||||
f"crypto key file at {path} has insecure mode "
|
||||
f"0o{st.st_mode & 0o777:03o}; expected 0o600 "
|
||||
f"(run: chmod 0o600 {path})"
|
||||
)
|
||||
# UID check: refuse files owned by a different user.
|
||||
if st.st_uid != os.geteuid():
|
||||
raise CryptoKeyError(
|
||||
f"crypto key file at {path} is owned by uid={st.st_uid}; "
|
||||
f"current process runs as uid={os.geteuid()} (refusing to read)"
|
||||
)
|
||||
raw = path.read_bytes()
|
||||
if len(raw) != KEY_BYTES:
|
||||
raise CryptoKeyError(
|
||||
f"crypto key file at {path} has wrong length {len(raw)} "
|
||||
f"(expected {KEY_BYTES})"
|
||||
)
|
||||
return raw
|
||||
|
||||
def _try_file_set(self, key: bytes) -> None:
|
||||
"""Atomically write ``key`` to the key file (Phase 07.10 D-07).
|
||||
|
||||
Pattern:
|
||||
1. ``mkdir -p`` the parent directory.
|
||||
2. Remove any stale ``{path}.tmp.*`` siblings from prior crashed runs.
|
||||
3. Open ``{path}.tmp.{pid}`` with ``O_CREAT|O_EXCL|O_WRONLY`` mode
|
||||
``0o600`` — refuses if a tmp file at the same pid already exists.
|
||||
4. ``os.fchmod(fd, 0o600)`` BEFORE writing bytes — defends against
|
||||
umask quirks, makes the mode-restriction window zero.
|
||||
5. ``os.write`` + ``os.fsync`` + ``os.close``.
|
||||
6. ``os.rename`` the tmp file to the final path (atomic on POSIX).
|
||||
|
||||
``ValueError`` is raised if ``key`` is not exactly ``KEY_BYTES`` long.
|
||||
"""
|
||||
if len(key) != KEY_BYTES:
|
||||
raise ValueError(f"key must be {KEY_BYTES} bytes (got {len(key)})")
|
||||
final = self._key_file_path()
|
||||
final.parent.mkdir(parents=True, exist_ok=True)
|
||||
# Clean stale tmp files from prior crashed runs so the new write is
|
||||
# never confused by leftover state.
|
||||
for stale in final.parent.glob(f"{final.name}.tmp.*"):
|
||||
try:
|
||||
stale.unlink()
|
||||
except OSError:
|
||||
# Best-effort cleanup; if unlink fails we still proceed and
|
||||
# the EXCL open below will refuse if our pid happens to
|
||||
# collide with a leftover.
|
||||
pass
|
||||
tmp = final.parent / f"{final.name}.tmp.{os.getpid()}"
|
||||
# ``O_CREAT | O_EXCL | O_WRONLY`` refuses if a tmp at this exact pid
|
||||
# already exists; combined with the cleanup above, this guarantees a
|
||||
# fresh write path. ``mode=0o600`` is enforced atomically by ``open``.
|
||||
fd = os.open(str(tmp), os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
|
||||
try:
|
||||
# Explicit ``fchmod`` BEFORE writing bytes: defends against any
|
||||
# umask quirk that might subtly relax the mode after open. The
|
||||
# window where the tmp file exists with permissive bits is zero.
|
||||
os.fchmod(fd, 0o600)
|
||||
os.write(fd, key)
|
||||
os.fsync(fd)
|
||||
finally:
|
||||
os.close(fd)
|
||||
os.rename(str(tmp), str(final))
|
||||
|
||||
# -------------------------------------------------------- public API
|
||||
|
||||
def get_or_create(self) -> bytes:
|
||||
"""Return the 256-bit AES key for this user_id.
|
||||
|
||||
priority:
|
||||
1. Instance cache (``self._cached_key``) — avoids repeated file reads.
|
||||
2. File backend (``_try_file_get``) — returns the 32 raw bytes from
|
||||
``{store_root}/.crypto.key`` if present, else ``None``.
|
||||
3. Passphrase fallback — derives a key from
|
||||
``IAI_MCP_CRYPTO_PASSPHRASE`` via PBKDF2; deterministic given
|
||||
``(passphrase, user_id)``. The derived key is NOT written to disk
|
||||
— it lives only in the instance cache for the session.
|
||||
4. Otherwise raise ``CryptoKeyError`` naming all remediation paths
|
||||
(``iai-mcp crypto migrate-to-file``, ``iai-mcp crypto init``,
|
||||
``IAI_MCP_CRYPTO_PASSPHRASE``).
|
||||
"""
|
||||
if self._cached_key is not None:
|
||||
return self._cached_key
|
||||
|
||||
# Priority 1: file backend (Phase 07.10 D-02).
|
||||
existing = self._try_file_get()
|
||||
if existing is not None:
|
||||
self._cached_key = existing
|
||||
return existing
|
||||
|
||||
# Priority 2: passphrase fallback (CI / non-interactive / fresh-install opt-in).
|
||||
passphrase = os.environ.get("IAI_MCP_CRYPTO_PASSPHRASE")
|
||||
if passphrase:
|
||||
derived = derive_key_from_passphrase(passphrase, self._passphrase_salt())
|
||||
self._cached_key = derived
|
||||
return derived
|
||||
|
||||
# Priority 3: refuse with a dual-remediation error message (Phase 07.10 D-04).
|
||||
path = self._key_file_path()
|
||||
raise CryptoKeyError(
|
||||
f"crypto key file not found at {path} and IAI_MCP_CRYPTO_PASSPHRASE "
|
||||
f"is not set.\n"
|
||||
f"\n"
|
||||
f"To fix:\n"
|
||||
f" - Existing install (key was in macOS Keychain before Phase 07.10): "
|
||||
f"run `iai-mcp crypto migrate-to-file` from a Terminal where the "
|
||||
f"Keychain prompt can appear, then click \"Always Allow\".\n"
|
||||
f" - Fresh install: run `iai-mcp crypto init` to generate a new key "
|
||||
f"file, OR set IAI_MCP_CRYPTO_PASSPHRASE to a strong passphrase "
|
||||
f"(suitable for CI or non-interactive environments)."
|
||||
)
|
||||
|
||||
def rotate(self) -> bytes:
|
||||
"""Generate a fresh 32-byte key, write it to the key file, return it.
|
||||
|
||||
rotation is now an atomic file-write operation,
|
||||
irrespective of how the previous key was sourced. Caller is responsible
|
||||
for re-encrypting any existing ciphertext under the old key (see
|
||||
``iai-mcp crypto rotate`` CLI; re-encryption is an application-layer
|
||||
concern). The cached instance key is updated so subsequent calls in
|
||||
the same process see the new key.
|
||||
"""
|
||||
fresh = secrets.token_bytes(KEY_BYTES)
|
||||
self._try_file_set(fresh)
|
||||
self._cached_key = fresh
|
||||
return fresh
|
||||
|
||||
def delete(self) -> None:
|
||||
"""Remove the key file (and drop the cache). Idempotent on absent files."""
|
||||
self._cached_key = None
|
||||
path = self._key_file_path()
|
||||
try:
|
||||
path.unlink()
|
||||
except FileNotFoundError:
|
||||
# Idempotent: nothing to delete.
|
||||
pass
|
||||
77
src/iai_mcp/crypto_key_watch.py
Normal file
77
src/iai_mcp/crypto_key_watch.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
"""Boot-time detection of ``.crypto.key`` file rotation for audit events."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
WATCHER_REL = ".crypto-key-watcher.json"
|
||||
|
||||
|
||||
def _watcher_path(store: "MemoryStore") -> Path:
|
||||
return store.root / WATCHER_REL
|
||||
|
||||
|
||||
def _key_path(store: "MemoryStore") -> Path:
|
||||
return store.root / ".crypto.key"
|
||||
|
||||
|
||||
def sync_crypto_key_watcher_to_disk(store: "MemoryStore") -> None:
|
||||
"""Persist watcher state matching the current key file (no event)."""
|
||||
kp = _key_path(store)
|
||||
if not kp.is_file():
|
||||
return
|
||||
st = kp.stat()
|
||||
cur = {"mtime_ns": int(st.st_mtime_ns), "size": int(st.st_size)}
|
||||
wp = _watcher_path(store)
|
||||
wp.write_text(json.dumps(cur), encoding="utf-8")
|
||||
try:
|
||||
os.chmod(wp, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def check_crypto_key_file_rotation_event(store: "MemoryStore") -> None:
|
||||
"""Emit ``crypto_key_rotated`` when ``.crypto.key`` mtime/size changed since last persist.
|
||||
|
||||
First run (no watcher file): writes baseline only — no event (cannot
|
||||
distinguish "first install" from "rotation" without prior state).
|
||||
"""
|
||||
from iai_mcp.events import write_event
|
||||
|
||||
kp = _key_path(store)
|
||||
if not kp.is_file():
|
||||
return
|
||||
st = kp.stat()
|
||||
cur = {"mtime_ns": int(st.st_mtime_ns), "size": int(st.st_size)}
|
||||
wp = _watcher_path(store)
|
||||
prev: dict | None = None
|
||||
if wp.is_file():
|
||||
try:
|
||||
prev = json.loads(wp.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
prev = None
|
||||
if prev is None:
|
||||
sync_crypto_key_watcher_to_disk(store)
|
||||
return
|
||||
if prev.get("mtime_ns") == cur["mtime_ns"] and prev.get("size") == cur["size"]:
|
||||
return
|
||||
try:
|
||||
write_event(
|
||||
store,
|
||||
kind="crypto_key_rotated",
|
||||
data={
|
||||
"source": "daemon_boot",
|
||||
"previous": prev,
|
||||
"current": cur,
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
sync_crypto_key_watcher_to_disk(store)
|
||||
81
src/iai_mcp/cue_router.py
Normal file
81
src/iai_mcp/cue_router.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
"""Plan 06-04 R4: cue-detection router.
|
||||
|
||||
Classifies a memory_recall cue into 'verbatim' or 'concept' mode based on
|
||||
surface signals (quoted phrases, exact-recall markers, RU starts-with
|
||||
triggers). Drives mode-dependent retrieval in both pipeline_recall (full
|
||||
graph path) and retrieve.recall (baseline fallback).
|
||||
|
||||
Constitutional framing:
|
||||
- Mottron EPF / Bowler TSH / Murray monotropism: when the cue signals exact
|
||||
recall, the user wants ONE hit, not 30. Verbatim mode is the response shape.
|
||||
- McClelland CLS: episodic and semantic stores have distinguishable retrieval
|
||||
surfaces; the cue tells us which store the user is asking.
|
||||
- Beer VSM S1 vs S4: verbatim is operations, schema is intelligence; the
|
||||
router separates the two recursion levels at the entrypoint.
|
||||
- Ashby ultrastability: the North-Star verbatim ≥99% essential variable is
|
||||
defended at the entrypoint — any verbatim-flavoured cue routes to the
|
||||
surface that protects it (tier filter + zeroed graph-bonus).
|
||||
|
||||
Triggers per CONTEXT (compiled once at module load):
|
||||
|
||||
EN (re.IGNORECASE):
|
||||
- quoted-phrase : "..." (one pair of straight double quotes around text)
|
||||
- european-quote : «...» (one pair of guillemets around text)
|
||||
- word-marker : verbatim | exact | quote | quoted | said | wrote
|
||||
- day-N : day <digits> (e.g. "day 17", "Day 7")
|
||||
|
||||
RU (case-insensitive, anchored at start-of-cue ^):
|
||||
- ru-start-найди-дословно
|
||||
- ru-start-точная-цитата
|
||||
- ru-start-что-я-сказал
|
||||
- ru-start-что-я-писал
|
||||
|
||||
Behaviour:
|
||||
- Any one EN match wins (returned with its label) and the function returns
|
||||
("verbatim", label) immediately.
|
||||
- Otherwise any one RU match wins (returned with its label).
|
||||
- No match -> ("concept", None).
|
||||
- Empty / falsy text -> ("concept", None).
|
||||
|
||||
The triggered_pattern label is for diagnostic logging (event payloads,
|
||||
debug traces) and is NOT surfaced on the JSON-RPC response — only the
|
||||
mode string lives in RecallResponse.cue_mode.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
EN_TRIGGERS: list[tuple[str, re.Pattern]] = [
|
||||
("quoted-phrase", re.compile(r'"[^"]+"')),
|
||||
("european-quote", re.compile(r'«[^»]+»')),
|
||||
("word-marker", re.compile(r'\b(verbatim|exact|quote|quoted|said|wrote)\b', re.IGNORECASE)),
|
||||
("day-N", re.compile(r'\bday\s+\d+\b', re.IGNORECASE)),
|
||||
]
|
||||
|
||||
RU_TRIGGERS: list[tuple[str, re.Pattern]] = [
|
||||
("ru-start-найди-дословно", re.compile(r'^найди дословно', re.IGNORECASE)),
|
||||
("ru-start-точная-цитата", re.compile(r'^точная цитата', re.IGNORECASE)),
|
||||
("ru-start-что-я-сказал", re.compile(r'^что я сказал', re.IGNORECASE)),
|
||||
("ru-start-что-я-писал", re.compile(r'^что я писал', re.IGNORECASE)),
|
||||
]
|
||||
|
||||
|
||||
def _classify_cue(text: str) -> tuple[str, str | None]:
|
||||
"""Return (mode, triggered_pattern) for the given cue.
|
||||
|
||||
mode is "verbatim" if any trigger matches, else "concept".
|
||||
triggered_pattern is the trigger label (string) on a verbatim hit, or
|
||||
None when the cue routes to concept (no trigger matched).
|
||||
|
||||
Empty / None-ish input returns ("concept", None) — defensive default
|
||||
so the dispatcher never crashes on a missing cue field.
|
||||
"""
|
||||
if not text:
|
||||
return "concept", None
|
||||
for label, pat in EN_TRIGGERS:
|
||||
if pat.search(text):
|
||||
return "verbatim", label
|
||||
for label, pat in RU_TRIGGERS:
|
||||
if pat.search(text):
|
||||
return "verbatim", label
|
||||
return "concept", None
|
||||
225
src/iai_mcp/curiosity.py
Normal file
225
src/iai_mcp/curiosity.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
"""Active curiosity (LEARN-04, D-23, D-24) -- Task 4.
|
||||
|
||||
D-23 trigger: prediction entropy > 0.7 bits AND 3-turn cooldown since last
|
||||
curiosity question in this session.
|
||||
|
||||
D-24 tiered style:
|
||||
- entropy in [ENTROPY_LOW, ENTROPY_MID) -> silent log event, no question
|
||||
- entropy in [ENTROPY_MID, ENTROPY_HIGH) -> inline hint
|
||||
- entropy >= ENTROPY_HIGH -> direct clarifying question
|
||||
|
||||
Every question creates curiosity_bridge edges from each triggering record to
|
||||
the question's UUID (used as a stable hub id). The question itself lives in
|
||||
the events table (kind=curiosity_question); callers may insert a first-class
|
||||
record if persistent text is desired, but keeps questions
|
||||
event-sourced to minimise LanceDB write volume.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- constants
|
||||
|
||||
|
||||
ENTROPY_LOW: float = 0.4
|
||||
ENTROPY_MID: float = 0.7
|
||||
ENTROPY_HIGH: float = 0.9
|
||||
COOLDOWN_TURNS: int = 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- types
|
||||
|
||||
|
||||
@dataclass
|
||||
class CuriosityQuestion:
|
||||
"""One curiosity question surfaced by fire_curiosity."""
|
||||
|
||||
id: UUID
|
||||
text: str
|
||||
triggered_by_record_ids: list[UUID] = field(default_factory=list)
|
||||
entropy: float = 0.0
|
||||
tier: str = "question" # "silent" | "inline" | "question"
|
||||
resolved: bool = False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def compute_entropy(scores: list[float]) -> float:
|
||||
"""Shannon entropy (base-2, bits) over a score distribution.
|
||||
|
||||
Returns 0.0 for empty or degenerate inputs. Negative scores are clamped
|
||||
to 0 before normalisation so the probability vector is well-defined.
|
||||
"""
|
||||
if not scores:
|
||||
return 0.0
|
||||
positive = [max(0.0, float(s)) for s in scores]
|
||||
total = sum(positive)
|
||||
if total <= 0:
|
||||
return 0.0
|
||||
probs = [p / total for p in positive]
|
||||
h = 0.0
|
||||
for p in probs:
|
||||
if p > 0:
|
||||
h -= p * math.log2(p)
|
||||
return h
|
||||
|
||||
|
||||
def _last_curiosity_turn(store: MemoryStore, session_id: str) -> int | None:
|
||||
"""Return the turn of the most recent curiosity_question in this session."""
|
||||
events = query_events(store, kind="curiosity_question", limit=20)
|
||||
for e in events:
|
||||
if e.get("session_id") == session_id:
|
||||
try:
|
||||
return int(e["data"].get("turn", 0))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- fire_curiosity
|
||||
|
||||
|
||||
def fire_curiosity(
|
||||
store: MemoryStore,
|
||||
hits: list,
|
||||
cue: str,
|
||||
entropy: float,
|
||||
session_id: str,
|
||||
turn: int,
|
||||
) -> CuriosityQuestion | None:
|
||||
"""D-23 gate + tiering.
|
||||
|
||||
Returns a CuriosityQuestion (or None) and, as a side effect:
|
||||
- emits a curiosity_silent_log event for low-entropy misses
|
||||
- emits a curiosity_question event for mid/high fires
|
||||
- creates curiosity_bridge edges from each triggering record -> question
|
||||
"""
|
||||
if entropy < ENTROPY_LOW:
|
||||
return None
|
||||
|
||||
# Low-mid band -> silent log, no question.
|
||||
if entropy < ENTROPY_MID:
|
||||
write_event(
|
||||
store,
|
||||
kind="curiosity_silent_log",
|
||||
data={
|
||||
"cue": cue[:200],
|
||||
"entropy": float(entropy),
|
||||
"source_ids": [str(h.record_id) for h in hits[:3]],
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# Cooldown check.
|
||||
last = _last_curiosity_turn(store, session_id)
|
||||
if last is not None and (turn - last) < COOLDOWN_TURNS:
|
||||
return None
|
||||
|
||||
q_id = uuid4()
|
||||
if entropy < ENTROPY_HIGH:
|
||||
tier = "inline"
|
||||
text = f"I'm not fully sure -- did you mean {cue!r}?"
|
||||
else:
|
||||
tier = "question"
|
||||
text = f"Could you clarify: {cue!r}?"
|
||||
|
||||
trigger_ids: list[UUID] = [h.record_id for h in hits[:5]]
|
||||
question = CuriosityQuestion(
|
||||
id=q_id,
|
||||
text=text,
|
||||
triggered_by_record_ids=trigger_ids,
|
||||
entropy=float(entropy),
|
||||
tier=tier,
|
||||
)
|
||||
|
||||
# curiosity_bridge edges. Delta proportional to entropy so higher-entropy
|
||||
# questions get stronger edges.
|
||||
# R3: batch all triggers into a single boost_edges call
|
||||
# (one merge_insert + one tbl.add at most). The diagnostic try/except
|
||||
# boundary is preserved at the SINGLE-call level — failure of the batched
|
||||
# write must never block the curiosity fire path.
|
||||
bridge_pairs = [(tid, q_id) for tid in trigger_ids]
|
||||
if bridge_pairs:
|
||||
try:
|
||||
store.boost_edges(
|
||||
bridge_pairs,
|
||||
edge_type="curiosity_bridge",
|
||||
delta=float(entropy),
|
||||
)
|
||||
except Exception:
|
||||
# Diagnostic; never block the curiosity fire on edge failure.
|
||||
pass
|
||||
|
||||
write_event(
|
||||
store,
|
||||
kind="curiosity_question",
|
||||
data={
|
||||
"question_id": str(q_id),
|
||||
"text": text,
|
||||
"tier": tier,
|
||||
"entropy": float(entropy),
|
||||
"turn": int(turn),
|
||||
"triggered_by": [str(t) for t in trigger_ids],
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
source_ids=trigger_ids,
|
||||
)
|
||||
return question
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- pending
|
||||
|
||||
|
||||
def pending_questions(
|
||||
store: MemoryStore,
|
||||
session_id: str | None = None,
|
||||
) -> list[CuriosityQuestion]:
|
||||
"""Return unresolved curiosity questions, optionally scoped to a session."""
|
||||
events = query_events(store, kind="curiosity_question", limit=200)
|
||||
resolved_events = query_events(store, kind="curiosity_resolved", limit=500)
|
||||
resolved_ids = {
|
||||
r["data"].get("question_id")
|
||||
for r in resolved_events
|
||||
if r["data"].get("question_id")
|
||||
}
|
||||
out: list[CuriosityQuestion] = []
|
||||
for e in events:
|
||||
if session_id is not None and e.get("session_id") != session_id:
|
||||
continue
|
||||
data = e["data"]
|
||||
qid_raw = data.get("question_id")
|
||||
if not qid_raw:
|
||||
continue
|
||||
if qid_raw in resolved_ids:
|
||||
continue
|
||||
try:
|
||||
qid = UUID(qid_raw)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
triggered: list[UUID] = []
|
||||
for t in data.get("triggered_by", []):
|
||||
try:
|
||||
triggered.append(UUID(t))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
out.append(
|
||||
CuriosityQuestion(
|
||||
id=qid,
|
||||
text=data.get("text", ""),
|
||||
triggered_by_record_ids=triggered,
|
||||
entropy=float(data.get("entropy", 0.0)),
|
||||
tier=data.get("tier", "question"),
|
||||
resolved=False,
|
||||
)
|
||||
)
|
||||
return out
|
||||
1690
src/iai_mcp/daemon.py
Normal file
1690
src/iai_mcp/daemon.py
Normal file
File diff suppressed because it is too large
Load diff
294
src/iai_mcp/daemon_state.py
Normal file
294
src/iai_mcp/daemon_state.py
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
"""Phase 4 -- atomic daemon state persistence (DAEMON-01 / D-24).
|
||||
|
||||
State file at ~/.iai-mcp/.daemon-state.json holds:
|
||||
- fsm_state -- WAKE / TRANSITIONING / SLEEP / DREAMING
|
||||
- daemon_started_at -- ISO8601 UTC
|
||||
- last_digest_shown_at -- ISO8601 UTC, used by morning digest gate
|
||||
- pending_digest -- dict ready to surface in next memory_recall
|
||||
- last_learned_at -- last quiet-window learn timestamp
|
||||
- last_session_ts -- last observed session_started event ts
|
||||
|
||||
All writes via tempfile + os.replace (POSIX atomic rename). Crash-mid-write
|
||||
leaves the old file intact; readers either see old complete or new complete,
|
||||
never partial.
|
||||
|
||||
T-04-01 mitigation: atomic rename precludes torn writes.
|
||||
T-04-07 mitigation: file mode 0o600 user-only.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
STATE_PATH: Path = Path.home() / ".iai-mcp" / ".daemon-state.json"
|
||||
|
||||
# morning-digest gating threshold. The digest is surfaced only when it
|
||||
# has been at least this many hours since the last show (or has never shown).
|
||||
DIGEST_SHOW_THRESHOLD_HOURS: int = 18
|
||||
|
||||
# first_turn_pending eviction guards. A session is considered stale once it
|
||||
# has sat in the dict for longer than FIRST_TURN_TTL_HOURS -- typically it
|
||||
# means the client died before consuming the flag, so the entry will never
|
||||
# be popped by ``consume_first_turn``. MAX_FIRST_TURN_ENTRIES caps the dict
|
||||
# as a secondary safety net when many sessions open in a short window.
|
||||
FIRST_TURN_TTL_HOURS: int = 24
|
||||
MAX_FIRST_TURN_ENTRIES: int = 100
|
||||
|
||||
|
||||
def load_state() -> dict:
|
||||
"""Read the state file; return {} if missing or malformed (self-heal)."""
|
||||
if not STATE_PATH.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(STATE_PATH.read_text())
|
||||
except (OSError, json.JSONDecodeError):
|
||||
# Corrupt file -- return empty dict; next save_state writes fresh.
|
||||
return {}
|
||||
|
||||
|
||||
def save_state(state: dict) -> None:
|
||||
"""Atomically persist state via tempfile + os.replace.
|
||||
|
||||
Semantics:
|
||||
- Creates parent dir if missing.
|
||||
- Writes to a sibling temp file in the same directory (required so
|
||||
os.replace can do an atomic rename on the same filesystem).
|
||||
- fsync the file contents before rename so the data is on disk.
|
||||
- chmod 0o600 before the swap so the visible file is never world-readable.
|
||||
- On exception: unlink the temp file so `/tmp` doesn't accumulate.
|
||||
"""
|
||||
STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp = tempfile.mkstemp(
|
||||
prefix=".daemon-state.",
|
||||
suffix=".tmp",
|
||||
dir=str(STATE_PATH.parent),
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(state, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.chmod(tmp, 0o600)
|
||||
os.replace(tmp, STATE_PATH)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
|
||||
def prune_stale_first_turn(
|
||||
state: dict,
|
||||
now: datetime | None = None,
|
||||
ttl_hours: int = FIRST_TURN_TTL_HOURS,
|
||||
max_entries: int = MAX_FIRST_TURN_ENTRIES,
|
||||
) -> int:
|
||||
"""Evict first_turn_pending entries older than ``ttl_hours`` and cap the
|
||||
dict at ``max_entries`` (keep newest by timestamp). Returns the number
|
||||
of entries removed.
|
||||
|
||||
Accepts legacy values ``True`` / ``False`` as "unknown timestamp" and
|
||||
stamps them with ``now`` so they age out on the next prune. Idempotent;
|
||||
safe to call on every save.
|
||||
"""
|
||||
pending = state.get("first_turn_pending")
|
||||
if not isinstance(pending, dict) or not pending:
|
||||
return 0
|
||||
|
||||
current = now if now is not None else datetime.now(timezone.utc)
|
||||
if current.tzinfo is None:
|
||||
current = current.replace(tzinfo=timezone.utc)
|
||||
cutoff = current - timedelta(hours=ttl_hours)
|
||||
|
||||
def _as_dt(value: object) -> datetime:
|
||||
"""Parse stored value into an aware datetime; unknown -> epoch (evict).
|
||||
|
||||
Legacy bool / malformed strings are treated as "stale, evict now" —
|
||||
they cannot be aged sensibly without a real timestamp, and the
|
||||
former "stamp with current" behaviour kept the dict from ever
|
||||
draining when clients died before writing ISO timestamps.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
dt = datetime.fromisoformat(value)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
except ValueError:
|
||||
return datetime.fromtimestamp(0, tz=timezone.utc)
|
||||
return datetime.fromtimestamp(0, tz=timezone.utc)
|
||||
|
||||
# Normalise every entry to an ISO timestamp string so downstream
|
||||
# callers see a consistent value shape after the first prune.
|
||||
removed = 0
|
||||
for sid, value in list(pending.items()):
|
||||
dt = _as_dt(value)
|
||||
if dt < cutoff:
|
||||
pending.pop(sid, None)
|
||||
removed += 1
|
||||
elif not isinstance(value, str):
|
||||
pending[sid] = dt.isoformat()
|
||||
|
||||
# Secondary cap — keep the newest ``max_entries`` by timestamp.
|
||||
if len(pending) > max_entries:
|
||||
ordered = sorted(
|
||||
pending.items(),
|
||||
key=lambda kv: _as_dt(kv[1]),
|
||||
reverse=True,
|
||||
)
|
||||
keep = dict(ordered[:max_entries])
|
||||
removed += len(pending) - len(keep)
|
||||
state["first_turn_pending"] = keep
|
||||
|
||||
return removed
|
||||
|
||||
|
||||
def mark_session_opened(state: dict, session_id: str) -> None:
|
||||
"""Plan 05-03 TOK-12 / D5-03: mark first_turn_pending for a session.
|
||||
|
||||
Stores the opening timestamp as the dict value so ``prune_stale_first_turn``
|
||||
can evict entries whose client died before consuming the flag. Opportunistic
|
||||
prune on every mark keeps the dict bounded without a dedicated reaper.
|
||||
|
||||
Idempotent. Persistence is the caller's responsibility (typical callers:
|
||||
concurrency socket handler; tests directly).
|
||||
"""
|
||||
if not isinstance(session_id, str) or not session_id:
|
||||
return
|
||||
pending = state.setdefault("first_turn_pending", {})
|
||||
pending[session_id] = datetime.now(timezone.utc).isoformat()
|
||||
prune_stale_first_turn(state)
|
||||
|
||||
|
||||
def consume_first_turn(state: dict, session_id: str) -> bool:
|
||||
"""Return True iff first call for session; atomic pop+save.
|
||||
|
||||
D5-03: the first memory_recall in a session consumes the
|
||||
flag so subsequent recalls bypass the first-turn hook.
|
||||
"""
|
||||
try:
|
||||
pending = state.get("first_turn_pending")
|
||||
if not isinstance(pending, dict):
|
||||
return False
|
||||
if pending.pop(session_id, False):
|
||||
try:
|
||||
save_state(state)
|
||||
except Exception:
|
||||
# save failure is non-fatal — returning True still triggers
|
||||
# the hook exactly once in-process; cross-process atomicity
|
||||
# is best-effort.
|
||||
pass
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
# R3 (per D7.2-07 / D7.2-08 / D7.2-10): a per-tick + startup
|
||||
# reaper for stale `first_turn_pending` entries with a 1-hour TTL and a
|
||||
# tuple return shape (updated_state, dropped_session_ids).
|
||||
#
|
||||
# Distinct from `prune_stale_first_turn` above which has a 24h ceiling and
|
||||
# is opportunistically invoked from `mark_session_opened`. Both helpers
|
||||
# coexist by design (researcher finding #1 + advisor recommendation):
|
||||
# - `prune_stale_first_turn` keeps its 24h opportunistic path on session-open;
|
||||
# - `prune_first_turn_pending` is the per-tick + startup reaper that needs
|
||||
# the dropped IDs back so the caller can emit
|
||||
# `kind=first_turn_pending_expired` events (D7.2-10).
|
||||
#
|
||||
# Pure function — no I/O. Caller is responsible for `save_state(state)`
|
||||
# and the event emit. Idempotent; safe on empty/missing input.
|
||||
|
||||
FIRST_TURN_PENDING_TTL_SEC_DEFAULT: float = 3600.0 # D7.2-08 1h default
|
||||
|
||||
|
||||
def prune_first_turn_pending(
|
||||
state: dict,
|
||||
now: datetime | None = None,
|
||||
ttl_sec: float = FIRST_TURN_PENDING_TTL_SEC_DEFAULT,
|
||||
) -> tuple[dict, list[str]]:
|
||||
"""Phase 7.2 R3: drain stale `first_turn_pending` entries.
|
||||
|
||||
Returns (updated_state_dict, dropped_session_ids). Pure function —
|
||||
does NOT call save_state; does NOT emit events. Caller decides
|
||||
persistence + event emission.
|
||||
|
||||
Eviction rules:
|
||||
- String value parsed as ISO timestamp; entry evicts if (now - ts) >= ttl_sec.
|
||||
- Non-string value (legacy bool / dict / None) treated as stale → evict.
|
||||
Matches the established behavior of `prune_stale_first_turn` for
|
||||
legacy entries (cannot be aged sensibly without a timestamp).
|
||||
- Naive timestamps assumed UTC.
|
||||
- Malformed ISO strings → evict (defensive against corruption).
|
||||
|
||||
Distinct from `prune_stale_first_turn` (24h default, returns int);
|
||||
this helper is per-tick + startup with a shorter TTL and visibility
|
||||
into which sessions were dropped (D7.2-10 event payload needs the
|
||||
session_ids list).
|
||||
"""
|
||||
pending = state.get("first_turn_pending")
|
||||
if not isinstance(pending, dict) or not pending:
|
||||
return state, []
|
||||
|
||||
current = now if now is not None else datetime.now(timezone.utc)
|
||||
if current.tzinfo is None:
|
||||
current = current.replace(tzinfo=timezone.utc)
|
||||
cutoff = current - timedelta(seconds=ttl_sec)
|
||||
|
||||
dropped: list[str] = []
|
||||
fresh: dict = {}
|
||||
for sid, value in pending.items():
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
ts = datetime.fromisoformat(value)
|
||||
if ts.tzinfo is None:
|
||||
ts = ts.replace(tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
dropped.append(sid)
|
||||
continue
|
||||
if ts < cutoff:
|
||||
dropped.append(sid)
|
||||
continue
|
||||
fresh[sid] = value
|
||||
else:
|
||||
# Legacy bool / dict / None / number — no recoverable timestamp.
|
||||
dropped.append(sid)
|
||||
|
||||
state["first_turn_pending"] = fresh
|
||||
return state, dropped
|
||||
|
||||
|
||||
def get_pending_digest(state: dict, now: datetime) -> dict | None:
|
||||
"""D-24 / DAEMON-11: return pending morning digest if eligible, else None.
|
||||
|
||||
Eligibility gate: >= DIGEST_SHOW_THRESHOLD_HOURS since last_digest_shown_at
|
||||
OR never shown. When returned, the digest is consumed from state and
|
||||
last_digest_shown_at is advanced to `now`; state is persisted via
|
||||
save_state so the same digest never appears twice in the same 18h window.
|
||||
"""
|
||||
last_shown = state.get("last_digest_shown_at")
|
||||
if last_shown:
|
||||
try:
|
||||
last_dt = datetime.fromisoformat(last_shown)
|
||||
if last_dt.tzinfo is None:
|
||||
last_dt = last_dt.replace(tzinfo=timezone.utc)
|
||||
now_cmp = now if now.tzinfo is not None else now.replace(tzinfo=timezone.utc)
|
||||
if now_cmp - last_dt < timedelta(hours=DIGEST_SHOW_THRESHOLD_HOURS):
|
||||
return None
|
||||
except (TypeError, ValueError):
|
||||
# Malformed timestamp -- treat as never shown, fall through.
|
||||
pass
|
||||
|
||||
digest = state.get("pending_digest")
|
||||
if not digest:
|
||||
return None
|
||||
|
||||
now_cmp = now if now.tzinfo is not None else now.replace(tzinfo=timezone.utc)
|
||||
state["last_digest_shown_at"] = now_cmp.isoformat()
|
||||
state.pop("pending_digest", None)
|
||||
save_state(state)
|
||||
return digest
|
||||
79
src/iai_mcp/delegate.py
Normal file
79
src/iai_mcp/delegate.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""TOK-07 subagent delegation context (Plan 02-04 Task 3, D-27).
|
||||
|
||||
Parent session exposes a JSON blob containing the 4-segment session-start
|
||||
payload (L0, L1, L2, rich-club) plus per-component hashes (for delta
|
||||
encoding) and a proxy-tools schema listing the 5 Phase-1 memory tools the
|
||||
subagent may invoke via the parent.
|
||||
|
||||
The subagent inherits the parent's session cache; it does NOT re-load the
|
||||
graph from scratch. This matches the Claude Code subagent-context feature
|
||||
request (#20304).
|
||||
|
||||
Constitutional note: the 3 MCP surface tools (curiosity_pending,
|
||||
schema_list, events_query) are user-introspection surfaces and are NOT
|
||||
included in SUBAGENT_HOT_TOOLS. Subagents receive the 5 memory tools; user
|
||||
introspection stays with the parent session.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
# The 5 memory tools exposed to subagents (Phase 1 hot surface). Plan 02-04's
|
||||
# new user-introspection tools are intentionally excluded.
|
||||
SUBAGENT_HOT_TOOLS: tuple[str, ...] = (
|
||||
"memory_recall",
|
||||
"memory_reinforce",
|
||||
"memory_contradict",
|
||||
"memory_consolidate",
|
||||
"profile_get_set",
|
||||
)
|
||||
|
||||
|
||||
def subagent_proxy_tools() -> list[dict]:
|
||||
"""Return a list of tool stubs advertised to the subagent.
|
||||
|
||||
Each stub carries `name` + `proxied_via`; the subagent invokes its
|
||||
parent's MCP bridge with the tool name, and the parent forwards the call
|
||||
to the Python core.
|
||||
"""
|
||||
return [
|
||||
{"name": name, "proxied_via": "parent_session"}
|
||||
for name in SUBAGENT_HOT_TOOLS
|
||||
]
|
||||
|
||||
|
||||
def serialize_session_for_subagent(
|
||||
store,
|
||||
assignment,
|
||||
rich_club,
|
||||
) -> dict:
|
||||
"""Build a JSON-safe dict for subagent spawn.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"l0": str,
|
||||
"l1": str,
|
||||
"l2": list[str],
|
||||
"rich_club": str,
|
||||
"hashes": {"l0": str, "l1": str, "l2": str, "rich_club": str},
|
||||
"proxy_tools": [{"name": ..., "proxied_via": "parent_session"}, ...],
|
||||
}
|
||||
"""
|
||||
from iai_mcp.delta import build_delta
|
||||
from iai_mcp.session import assemble_session_start
|
||||
|
||||
payload = assemble_session_start(store, assignment, rich_club)
|
||||
payload_dict = {
|
||||
"l0": payload.l0,
|
||||
"l1": payload.l1,
|
||||
"l2": list(payload.l2),
|
||||
"rich_club": payload.rich_club,
|
||||
}
|
||||
_delta, hashes = build_delta({}, payload_dict)
|
||||
return {
|
||||
"l0": payload_dict["l0"],
|
||||
"l1": payload_dict["l1"],
|
||||
"l2": payload_dict["l2"],
|
||||
"rich_club": payload_dict["rich_club"],
|
||||
"hashes": hashes,
|
||||
"proxy_tools": subagent_proxy_tools(),
|
||||
}
|
||||
78
src/iai_mcp/delta.py
Normal file
78
src/iai_mcp/delta.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
"""TOK-08 delta encoding for session-start payloads (Plan 02-04 Task 2, D-28).
|
||||
|
||||
The session-start payload is a 4-component dict: l0, l1, l2 (list), rich_club.
|
||||
On the first session turn the client sends nothing; the server hashes each
|
||||
component and returns both the payload and the hash bundle. On subsequent
|
||||
turns the client sends previous_hashes; the server compares, and only the
|
||||
components whose hash changed are returned in the delta payload. Unchanged
|
||||
components are implicit in the delta (absent from delta, carried over from
|
||||
the client's cache).
|
||||
|
||||
On hash miss (client sends a stale hash), the server returns the full
|
||||
component value in the delta -- this is also the first-session behaviour.
|
||||
|
||||
Reduces per-turn token spend 60-80% on typical within-session continuation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
|
||||
|
||||
HASH_LEN = 16 # sha256 hex truncated to 16 chars
|
||||
COMPONENTS = ("l0", "l1", "l2", "rich_club")
|
||||
|
||||
|
||||
def hash_component(text: str) -> str:
|
||||
"""Return a stable 16-char hex digest of the UTF-8-encoded text."""
|
||||
h = hashlib.sha256(text.encode("utf-8") if text is not None else b"").hexdigest()
|
||||
return h[:HASH_LEN]
|
||||
|
||||
|
||||
def _component_text(value) -> str:
|
||||
"""Flatten a payload component to a single string for hashing.
|
||||
|
||||
L0/L1/rich_club are strings. L2 is a list of strings; we join with "\n"
|
||||
so ordering matters (which matches the wire format).
|
||||
"""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, list):
|
||||
return "\n".join(str(x) for x in value)
|
||||
return str(value)
|
||||
|
||||
|
||||
def build_delta(
|
||||
previous_hashes: dict[str, str],
|
||||
current_payload: dict,
|
||||
) -> tuple[dict, dict[str, str]]:
|
||||
"""Compute (delta, new_hashes) given the client's last-seen hashes.
|
||||
|
||||
delta is a subset of current_payload containing only components whose
|
||||
hash does not match previous_hashes (including the first-session case
|
||||
where previous_hashes is empty or missing keys). new_hashes is the full
|
||||
current hash bundle, keyed by component name.
|
||||
"""
|
||||
delta: dict = {}
|
||||
new_hashes: dict[str, str] = {}
|
||||
for key in COMPONENTS:
|
||||
value = current_payload.get(key)
|
||||
text = _component_text(value)
|
||||
h = hash_component(text)
|
||||
new_hashes[key] = h
|
||||
prev = previous_hashes.get(key) if previous_hashes else None
|
||||
if prev != h:
|
||||
delta[key] = value if value is not None else ""
|
||||
return delta, new_hashes
|
||||
|
||||
|
||||
def apply_delta(previous: dict, delta: dict) -> dict:
|
||||
"""Merge delta on top of previous full payload -> new full payload.
|
||||
|
||||
Keys absent from delta carry over from `previous`. Provides the client
|
||||
side of the round-trip (parent agent: server emits delta; subagent:
|
||||
client applies delta).
|
||||
"""
|
||||
merged = dict(previous)
|
||||
for key, value in delta.items():
|
||||
merged[key] = value
|
||||
return merged
|
||||
1558
src/iai_mcp/doctor.py
Normal file
1558
src/iai_mcp/doctor.py
Normal file
File diff suppressed because it is too large
Load diff
123
src/iai_mcp/dream.py
Normal file
123
src/iai_mcp/dream.py
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
"""REM cycle orchestrator. CALLS existing modules -- does not reimplement.
|
||||
|
||||
Biological mapping:
|
||||
- NREM-2 (Hebbian binding) = existing hebbian LTP inside sleep.py cluster pass
|
||||
- NREM-3 (hippocampal replay) = sleep.run_heavy_consolidation Tier-0 path
|
||||
- REM (cross-community) = schema.induce_schemas_tier1(llm_enabled=False)
|
||||
- REM lucid moment (last cycle) = insight.generate_overnight_insight
|
||||
|
||||
Constitutional guard:
|
||||
- LOCAL primary worker; llm_enabled ALWAYS False when calling sleep/schema.
|
||||
- has_api_key=False always for daemon (zero paid-API path).
|
||||
- 15-minute hard cap per cycle (asyncio.timeout context manager).
|
||||
- C1: daemon must already hold the fcntl exclusive lock BEFORE calling
|
||||
run_rem_cycle -- this module does NOT acquire locks, that is _tick_body's
|
||||
job. This module is called under the lock.
|
||||
- C3: ZERO API cost. The single nightly Claude call is a subprocess, wired
|
||||
by in insight.py. No paid-API env var is referenced here.
|
||||
- C5: literal preservation -- we only call modules that modify metadata
|
||||
(FSRS state, edge weights, schema tags). Never assigns to literal_surface.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||||
from iai_mcp.schema import induce_schemas_tier1
|
||||
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
|
||||
|
||||
# hard cap per REM cycle.
|
||||
REM_CYCLE_MAX_SEC: int = 15 * 60
|
||||
|
||||
|
||||
async def _emit(store, kind: str, data: dict, severity: str | None = None) -> None:
|
||||
"""Emit an event off the main loop so LanceDB writes don't block asyncio."""
|
||||
if severity is None:
|
||||
await asyncio.to_thread(write_event, store, kind, data)
|
||||
else:
|
||||
await asyncio.to_thread(write_event, store, kind, data, severity=severity)
|
||||
|
||||
|
||||
async def run_rem_cycle(
|
||||
store,
|
||||
cycle_num: int,
|
||||
total_cycles: int,
|
||||
session_id: str,
|
||||
*,
|
||||
is_last: bool,
|
||||
claude_enabled: bool,
|
||||
) -> dict:
|
||||
"""One REM cycle. Runs to completion or hits 15min cap.
|
||||
|
||||
Returns dict consumed by the morning digest:
|
||||
{cycle, summaries_created, schemas_induced, schema_candidates,
|
||||
claude_call_used, main_insight_text, timed_out}
|
||||
|
||||
Never raises. All failure modes (timeout, module exception) surface as
|
||||
event emissions + a partial result dict so the daemon's outer loop
|
||||
cannot crash on cycle-internal exceptions (T-04-12 mitigation).
|
||||
"""
|
||||
await _emit(store, "rem_cycle_started", {"n": cycle_num, "of": total_cycles})
|
||||
|
||||
result: dict = {
|
||||
"cycle": cycle_num,
|
||||
"summaries_created": 0,
|
||||
"schemas_induced": 0,
|
||||
"schema_candidates": 0,
|
||||
"claude_call_used": False,
|
||||
"main_insight_text": None,
|
||||
"timed_out": False,
|
||||
}
|
||||
|
||||
try:
|
||||
async with asyncio.timeout(REM_CYCLE_MAX_SEC):
|
||||
# NREM-3 equivalent: heavy consolidation, Tier-0 only in daemon.
|
||||
cfg = SleepConfig(llm_enabled=False)
|
||||
heavy = await asyncio.to_thread(
|
||||
run_heavy_consolidation,
|
||||
store, session_id, cfg,
|
||||
BudgetLedger(store), RateLimitLedger(store),
|
||||
False, # has_api_key=False always for daemon
|
||||
)
|
||||
if isinstance(heavy, dict):
|
||||
result["summaries_created"] = int(heavy.get("summaries_created", 0) or 0)
|
||||
result["schemas_induced"] = int(heavy.get("schemas_induced", 0) or 0)
|
||||
|
||||
# REM cross-community schema induction (explicit Tier-0).
|
||||
# Signature: induce_schemas_tier1(store, budget, rate, llm_enabled=True)
|
||||
# -- we force llm_enabled=False so the D-GUARD ladder falls through to
|
||||
# the pure-local Tier-0 path.
|
||||
candidates = await asyncio.to_thread(
|
||||
induce_schemas_tier1,
|
||||
store, BudgetLedger(store), RateLimitLedger(store), False,
|
||||
)
|
||||
result["schema_candidates"] = len(candidates) if candidates else 0
|
||||
|
||||
# Lucid moment -- ONLY on last cycle, budget-gated by caller.
|
||||
if is_last and claude_enabled:
|
||||
from iai_mcp.insight import generate_overnight_insight
|
||||
|
||||
insight = await generate_overnight_insight(store, session_id)
|
||||
if isinstance(insight, dict) and insight.get("ok"):
|
||||
result["claude_call_used"] = True
|
||||
result["main_insight_text"] = insight.get("text")
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
result["timed_out"] = True
|
||||
await _emit(
|
||||
store,
|
||||
"rem_cycle_timeout",
|
||||
{"cycle": cycle_num},
|
||||
severity="warning",
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 -- daemon must never die on cycle error
|
||||
await _emit(
|
||||
store,
|
||||
"rem_cycle_error",
|
||||
{"cycle": cycle_num, "error": str(exc)[:500]},
|
||||
severity="critical",
|
||||
)
|
||||
|
||||
await _emit(store, "rem_cycle_completed", result)
|
||||
return result
|
||||
193
src/iai_mcp/embed.py
Normal file
193
src/iai_mcp/embed.py
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
"""Embedding layer -- configurable embedder with a 3-model registry.
|
||||
|
||||
Plan 05-08 (2026-04-20): the DEFAULT is now ``bge-small-en-v1.5`` (384d
|
||||
English-only), reverting the Phase-2 deviation. PROJECT.md line
|
||||
125 always specified bge-small-en-v1.5 as the intended default; Phase-2
|
||||
swapped in bge-m3 (1024d multilingual) as D-08a. User directive
|
||||
2026-04-19: the brain stores English, surface translation is Claude's
|
||||
job. bge-m3 stays selectable via env var / kwarg for anyone who needs
|
||||
multilingual semantic match at the 5x RAM cost.
|
||||
|
||||
Configurable 4-model registry:
|
||||
- "bge-m3" -> BAAI/bge-m3 -> 1024d (opt-in, multilingual)
|
||||
- "multilingual-e5-small" -> intfloat/multilingual-e5-small -> 384d (compromise)
|
||||
- "bge-small-en-v1.5" -> BAAI/bge-small-en-v1.5 -> 384d (DEFAULT, English)
|
||||
- "all-MiniLM-L6-v2" -> sentence-transformers/all-MiniLM-L6-v2 -> 384d (English alternative embedder option; included for compatibility testing)
|
||||
|
||||
Selection priority at Embedder() instantiation:
|
||||
1. Explicit `model_key` constructor arg
|
||||
2. IAI_MCP_EMBED_MODEL environment variable
|
||||
3. MODEL_REGISTRY default ("bge-small-en-v1.5")
|
||||
|
||||
The model is loaded once per process and cached in a module-level dict so
|
||||
multiple Embedder() instances share the underlying SentenceTransformer.
|
||||
|
||||
Deterministic: `normalize_embeddings=True` is always passed,
|
||||
`show_progress_bar=False`. Same input text always produces the same output
|
||||
vector across calls within a process.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import threading
|
||||
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
|
||||
# 4-model registry. Name convention: short logical key -> HF repo id + dim.
|
||||
# (2026-04-29): all-MiniLM-L6-v2 added as additive ablation entry;
|
||||
# DEFAULT_MODEL_KEY unchanged (English-Only Brain lock from / Plan 05-08).
|
||||
MODEL_REGISTRY: dict[str, dict] = {
|
||||
"bge-m3": {"hf": "BAAI/bge-m3", "dim": 1024},
|
||||
"multilingual-e5-small": {"hf": "intfloat/multilingual-e5-small", "dim": 384},
|
||||
"bge-small-en-v1.5": {"hf": "BAAI/bge-small-en-v1.5", "dim": 384},
|
||||
"all-MiniLM-L6-v2": {"hf": "sentence-transformers/all-MiniLM-L6-v2", "dim": 384},
|
||||
}
|
||||
DEFAULT_MODEL_KEY = "bge-small-en-v1.5"
|
||||
|
||||
|
||||
def _resolve_model_key(model_key: str | None = None) -> str:
|
||||
if model_key is not None:
|
||||
if model_key not in MODEL_REGISTRY:
|
||||
raise ValueError(
|
||||
f"unknown embed model key {model_key!r}; valid: {sorted(MODEL_REGISTRY)}"
|
||||
)
|
||||
return model_key
|
||||
env_key = os.environ.get("IAI_MCP_EMBED_MODEL")
|
||||
if env_key:
|
||||
if env_key not in MODEL_REGISTRY:
|
||||
raise ValueError(
|
||||
f"unknown embed model key {env_key!r} from IAI_MCP_EMBED_MODEL; "
|
||||
f"valid: {sorted(MODEL_REGISTRY)}"
|
||||
)
|
||||
return env_key
|
||||
return DEFAULT_MODEL_KEY
|
||||
|
||||
|
||||
_MODEL_LOCK = threading.Lock()
|
||||
_MODEL_CACHE: dict[str, SentenceTransformer] = {}
|
||||
|
||||
|
||||
def _get_model(hf_id: str) -> SentenceTransformer:
|
||||
"""Process-local lazy-load + cache. Thread-safe via lock around cache mutation."""
|
||||
with _MODEL_LOCK:
|
||||
if hf_id not in _MODEL_CACHE:
|
||||
_MODEL_CACHE[hf_id] = SentenceTransformer(hf_id)
|
||||
return _MODEL_CACHE[hf_id]
|
||||
|
||||
|
||||
class Embedder:
|
||||
"""English-Only Brain embedder with a configurable model registry.
|
||||
|
||||
Default model is `bge-small-en-v1.5` (384d, English) per Plan 05-08.
|
||||
Used by the retrieval pipeline (stage 1, cue embedding) and by session-start
|
||||
assembler. `.DIM` is per-instance (varies by model). `.DEFAULT_DIM` is a
|
||||
class-level default pointing at the registry's default model dimension.
|
||||
|
||||
The opt-in `bge-m3` (1024d multilingual) path stays in the registry for
|
||||
users who explicitly need multilingual semantic match at the 5x RAM cost,
|
||||
but it is opt-in via `IAI_MCP_EMBED_MODEL=bge-m3` — not the product.
|
||||
|
||||
Backward compatibility:
|
||||
- `Embedder.DIM` is kept as a class attribute aliased to the default model
|
||||
dimension so tests that reference `Embedder.DIM` still work; new
|
||||
code should prefer `Embedder().DIM` (instance attr) for correctness.
|
||||
- `Embedder.DEFAULT_MODEL` is the HF id of the default model (bge-small-en-v1.5).
|
||||
"""
|
||||
|
||||
DEFAULT_MODEL_KEY: str = DEFAULT_MODEL_KEY
|
||||
DEFAULT_DIM: int = MODEL_REGISTRY[DEFAULT_MODEL_KEY]["dim"]
|
||||
# Legacy class-level attributes (Phase 1 test compatibility).
|
||||
# New code should construct Embedder() and read .DIM from the instance.
|
||||
DEFAULT_MODEL: str = MODEL_REGISTRY[DEFAULT_MODEL_KEY]["hf"]
|
||||
DIM: int = DEFAULT_DIM
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_key: str | None = None,
|
||||
*,
|
||||
model_name: str | None = None,
|
||||
) -> None:
|
||||
"""Initialise an Embedder.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model_key:
|
||||
Logical key from MODEL_REGISTRY ("bge-m3" | "multilingual-e5-small" |
|
||||
"bge-small-en-v1.5"). If None, uses IAI_MCP_EMBED_MODEL env var or
|
||||
the registry default.
|
||||
model_name:
|
||||
Legacy parameter: full HuggingFace repo id (e.g. "BAAI/bge-small-en-v1.5").
|
||||
Prefer model_key for new code. If both are provided, model_key wins.
|
||||
"""
|
||||
if model_key is None and model_name is not None:
|
||||
# Reverse-lookup: find the key whose hf matches this name.
|
||||
match = next(
|
||||
(k for k, v in MODEL_REGISTRY.items() if v["hf"] == model_name),
|
||||
None,
|
||||
)
|
||||
if match is None:
|
||||
raise ValueError(
|
||||
f"model_name {model_name!r} is not in MODEL_REGISTRY; "
|
||||
f"valid hf ids: {[v['hf'] for v in MODEL_REGISTRY.values()]}"
|
||||
)
|
||||
key = match
|
||||
else:
|
||||
key = _resolve_model_key(model_key)
|
||||
self.model_key: str = key
|
||||
spec = MODEL_REGISTRY[key]
|
||||
self.model_name: str = spec["hf"]
|
||||
self.DIM: int = int(spec["dim"]) # instance attr overrides class attr
|
||||
self._model = _get_model(self.model_name)
|
||||
|
||||
def embed(self, text: str) -> list[float]:
|
||||
"""Encode a single string to a DIM-length list[float]. Normalised, deterministic."""
|
||||
vec = self._model.encode(
|
||||
text, normalize_embeddings=True, show_progress_bar=False
|
||||
)
|
||||
return vec.tolist()
|
||||
|
||||
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Batch-encode preserving input order. Returns N vectors for N inputs."""
|
||||
vecs = self._model.encode(
|
||||
list(texts),
|
||||
normalize_embeddings=True,
|
||||
show_progress_bar=False,
|
||||
batch_size=32,
|
||||
)
|
||||
return [v.tolist() for v in vecs]
|
||||
|
||||
|
||||
def embedder_for_store(store) -> "Embedder":
|
||||
"""Store-aware Embedder factory. Picks the model whose output dim matches
|
||||
the existing LanceDB records schema, so a legacy 1024d store from the
|
||||
pre-Plan-05-08 bge-m3 era stays queryable until it is re-embedded down to
|
||||
the 384d English-Only-Brain default.
|
||||
|
||||
Resolution order:
|
||||
1. If store.embed_dim has an exact match in MODEL_REGISTRY, prefer the
|
||||
model whose logical key name indicates the canonical model at that dim
|
||||
(bge-small-en-v1.5 for 384d default; bge-m3 for legacy/opt-in 1024d).
|
||||
2. Otherwise fall through to the env/registry default via Embedder().
|
||||
|
||||
This decouples runtime model selection from a global env var so a single
|
||||
process can operate multiple stores at different dims while the migration
|
||||
from a legacy 1024d store down to 384d completes.
|
||||
"""
|
||||
target_dim = getattr(store, "embed_dim", None)
|
||||
if target_dim is None:
|
||||
return Embedder()
|
||||
preferred = {384: "bge-small-en-v1.5", 1024: "bge-m3"}
|
||||
key = preferred.get(int(target_dim))
|
||||
# Tests and migrations may monkey-patch `Embedder` with a stub that takes no
|
||||
# kwargs. Fall back to the zero-arg form in that case so the fake surface
|
||||
# stays compatible; real production code still respects store.embed_dim.
|
||||
try:
|
||||
if key is not None and key in MODEL_REGISTRY:
|
||||
return Embedder(model_key=key)
|
||||
for reg_key, spec in MODEL_REGISTRY.items():
|
||||
if int(spec["dim"]) == int(target_dim):
|
||||
return Embedder(model_key=reg_key)
|
||||
except TypeError:
|
||||
pass
|
||||
return Embedder()
|
||||
184
src/iai_mcp/events.py
Normal file
184
src/iai_mcp/events.py
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
"""D-STORAGE events table interface.
|
||||
|
||||
Single source of runtime state. Every kind of event — S4 contradictions,
|
||||
trajectory metrics, LLM health probes, schema induction runs, CLS consolidation
|
||||
runs, migration traces, alerts — goes through write_event.
|
||||
|
||||
No .jsonl files. No .json files scattered under internal storage or
|
||||
internal storage. Everything persists in the LanceDB `events` table.
|
||||
|
||||
CLI queries (iai-mcp health, iai-mcp trajectory) read via query_events.
|
||||
|
||||
events.data_json is AES-256-GCM encrypted at rest (some event
|
||||
payloads carry user quotes / cues -- safest default). The event UUID is the
|
||||
associated data binding. kind / severity / domain / ts / session_id stay
|
||||
plaintext so audit queries (`iai-mcp health`, `iai-mcp trajectory`) can filter
|
||||
on them without decrypting.
|
||||
|
||||
Phase 3 additions (new event kinds — free-form strings, no taxonomy enum):
|
||||
- CONN-05 TEM factorization: `migration_v3_to_v4`.
|
||||
- CONN-07 small-world sigma: `sigma_observation`, `sigma_drift`
|
||||
(sigma-curve diagnostic per Ashby ultrastability).
|
||||
- M2/M4/M6 live wiring: `retrieval_used`, `profile_updated`,
|
||||
`session_started` (existing emit sites extended; not all new — verify via
|
||||
ctx_search before emitting duplicates).
|
||||
- Chapman ecological self-regulation:
|
||||
* `formality_score_weekly` — per-turn aggregate of user SURFACE formality.
|
||||
* `camouflaging_detected` — over-formal trajectory detected over 5-point weekly window.
|
||||
* `register_relaxed` — OUR `camouflaging_relaxation` knob bumped; the system
|
||||
relaxes its OWN register (never the user's; masking modeling is out-of-scope).
|
||||
|
||||
Phase 6 additions (Plan 06-01 schema dedup):
|
||||
- `schema_reinforced` — emitted when `persist_schema` finds an existing
|
||||
schema for the candidate pattern and reinforces incoming
|
||||
`schema_instance_of` edges from new evidence onto the existing keeper
|
||||
instead of inserting a duplicate row. Payload:
|
||||
{schema_id: str, pattern: str, evidence_added: int, total_evidence: int}
|
||||
Source IDs: [keeper_schema_id, *new_evidence_ids[:5]] mirroring the
|
||||
existing `schema_induction_run` shape.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from iai_mcp.crypto import (
|
||||
decrypt_field,
|
||||
encrypt_field,
|
||||
is_encrypted,
|
||||
)
|
||||
from iai_mcp.store import EVENTS_TABLE, MemoryStore
|
||||
|
||||
|
||||
def write_event(
|
||||
store: MemoryStore,
|
||||
kind: str,
|
||||
data: dict[str, Any],
|
||||
*,
|
||||
severity: str | None = None,
|
||||
domain: str | None = None,
|
||||
session_id: str = "-",
|
||||
source_ids: list[UUID] | None = None,
|
||||
) -> UUID:
|
||||
"""Persist a single event to the LanceDB events table.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
store:
|
||||
Open MemoryStore instance.
|
||||
kind:
|
||||
Logical event kind (e.g. "s4_contradiction", "trajectory_metric",
|
||||
"llm_health", "migration_v1_to_v2"). Free-form string; downstream
|
||||
consumers filter on it.
|
||||
data:
|
||||
JSON-serialisable kind-specific payload. Encoded to data_json.
|
||||
severity:
|
||||
Optional alert severity ("info" | "warning" | "critical"). Stored
|
||||
as empty string for non-alert events.
|
||||
domain:
|
||||
Optional monotropic-domain tag. Stored as empty string when absent.
|
||||
session_id:
|
||||
Session identifier; defaults to "-" when no session is active.
|
||||
source_ids:
|
||||
Optional list of MemoryRecord UUIDs that triggered this event.
|
||||
|
||||
Returns the newly-minted event UUID.
|
||||
"""
|
||||
event_id = uuid4()
|
||||
# encrypt data_json with AD = event UUID bytes. kind / severity /
|
||||
# domain / ts / session_id stay plaintext for filter queries.
|
||||
data_plain = json.dumps(data)
|
||||
ad = str(event_id).encode("ascii")
|
||||
data_ct = encrypt_field(data_plain, store._key(), associated_data=ad)
|
||||
row = {
|
||||
"id": str(event_id),
|
||||
"kind": kind,
|
||||
"severity": severity or "",
|
||||
"domain": domain or "",
|
||||
"ts": datetime.now(timezone.utc),
|
||||
"data_json": data_ct,
|
||||
"session_id": session_id,
|
||||
"source_ids_json": json.dumps([str(x) for x in (source_ids or [])]),
|
||||
}
|
||||
store.db.open_table(EVENTS_TABLE).add([row])
|
||||
return event_id
|
||||
|
||||
|
||||
def query_events(
|
||||
store: MemoryStore,
|
||||
kind: str | None = None,
|
||||
since: datetime | None = None,
|
||||
severity: str | None = None,
|
||||
limit: int = 100,
|
||||
) -> list[dict]:
|
||||
"""Query events matching the given filters, newest first.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
store:
|
||||
Open MemoryStore instance.
|
||||
kind:
|
||||
Filter by event kind. None returns all kinds.
|
||||
since:
|
||||
Only return events with ts >= since. Naive datetimes are treated as UTC.
|
||||
severity:
|
||||
Exact-match filter on severity field.
|
||||
limit:
|
||||
Maximum rows returned (default 100). Caller can pass e.g. 1 to get
|
||||
only the most recent event of a given kind (iai-mcp health).
|
||||
|
||||
Returns a list of dicts with keys: id, kind, severity, domain, ts, data,
|
||||
session_id, source_ids. data and source_ids are decoded from JSON.
|
||||
"""
|
||||
tbl = store.db.open_table(EVENTS_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
if df.empty:
|
||||
return []
|
||||
if kind is not None:
|
||||
df = df[df["kind"] == kind]
|
||||
if severity is not None:
|
||||
df = df[df["severity"] == severity]
|
||||
if since is not None:
|
||||
# Ensure tz-aware comparison
|
||||
since_cmp = since if since.tzinfo is not None else since.replace(tzinfo=timezone.utc)
|
||||
# Pandas Timestamp compares naturally with tz-aware datetimes
|
||||
df = df[df["ts"] >= since_cmp]
|
||||
if df.empty:
|
||||
return []
|
||||
df = df.sort_values("ts", ascending=False).head(limit)
|
||||
out: list[dict] = []
|
||||
for _, row in df.iterrows():
|
||||
# decrypt data_json when it carries the iai:enc:v1: prefix.
|
||||
# Pre-02-08 rows stay plaintext; migration rewrites them lazily.
|
||||
raw_data = row["data_json"] or "{}"
|
||||
if is_encrypted(raw_data):
|
||||
ad = str(row["id"]).encode("ascii")
|
||||
try:
|
||||
raw_data = decrypt_field(raw_data, store._key(), associated_data=ad)
|
||||
except Exception:
|
||||
# Rule 1 diagnostic semantics: a corrupt event row should not
|
||||
# fail the entire query. Return empty payload + mark in meta.
|
||||
raw_data = "{}"
|
||||
try:
|
||||
data = json.loads(raw_data)
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
data = {}
|
||||
try:
|
||||
source_ids = json.loads(row["source_ids_json"] or "[]")
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
source_ids = []
|
||||
out.append(
|
||||
{
|
||||
"id": row["id"],
|
||||
"kind": row["kind"],
|
||||
"severity": row["severity"] or None,
|
||||
"domain": row["domain"] or None,
|
||||
"ts": row["ts"],
|
||||
"data": data,
|
||||
"session_id": row["session_id"],
|
||||
"source_ids": source_ids,
|
||||
}
|
||||
)
|
||||
return out
|
||||
244
src/iai_mcp/formality.py
Normal file
244
src/iai_mcp/formality.py
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
"""Plan 03-03 — surface-feature formality scorer (Chapman ecological self-regulation).
|
||||
|
||||
Constitutional anchor:
|
||||
- Observes ONLY the user's surface lexical features (D-AUTIST13-01).
|
||||
- Never models user internal state, never tries to infer "is the user masking".
|
||||
- Paired with src/iai_mcp/camouflaging.py which adjusts OUR register in response.
|
||||
|
||||
Scientific anchor: Chapman R (2021) "Neurodiversity and the Social Ecology of Mental
|
||||
Functions." — the ecological self-regulation framing. Cook 2021 + Raymaker 2020 tell us
|
||||
WHAT NOT to model (masking as an inferred user state).
|
||||
|
||||
Four surface features (D-AUTIST13-01, weighted sum):
|
||||
1. Lexical formality (w=0.45) — per-language register-marker density. Strongest signal.
|
||||
2. Sentence complexity (w=0.20) — sigmoid on avg chars-per-sentence + clause density.
|
||||
3. Hedging density (w=0.15) — hedge markers per 100 tokens.
|
||||
4. Punctuation formality (w=0.20) — semicolon + em-dash + full-quote density.
|
||||
|
||||
Output: formality_score(text, lang) -> float in [0.0, 1.0]. 0 = fully informal,
|
||||
1 = fully formal. Unknown lang returns 0.5 (neutral) with a logged warning; NEVER raises
|
||||
(MEMORY.md global-product mandate).
|
||||
|
||||
Weight rationale (Pattern 3 proposed
|
||||
0.30/0.30/0.20/0.20 as a baseline — fixture-tuned to 0.45/0.20/0.15/0.20 because the lex
|
||||
dimension is the most unambiguous signal across RU+EN and the shortest formal sentences
|
||||
(e.g. "The proposal is, therefore, accepted.") are otherwise penalised by the
|
||||
complexity sigmoid. Fixture accuracy: 100% (51/51) with the current weights.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import warnings
|
||||
from typing import Iterable
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- constants
|
||||
# Grep-discoverable module-scope constants (PATTERNS.md §7).
|
||||
|
||||
LEX_MARKERS: dict[str, list[str]] = {
|
||||
"en": [
|
||||
"therefore", "however", "accordingly", "nonetheless", "furthermore",
|
||||
"hence", "thus", "consequently", "moreover", "notwithstanding",
|
||||
"whereas", "hereby", "herein", "thereof", "pursuant", "aforementioned",
|
||||
"shall", "aforesaid",
|
||||
],
|
||||
"ru": [
|
||||
"тем не менее", "следовательно", "однако", "впрочем", "таким образом",
|
||||
"вследствие", "настоящим", "согласно", "вышеизложенного", "вышеизложенному",
|
||||
"в соответствии", "по-видимому", "в силу", "исходя из", "данное",
|
||||
"настоящее", "прилагаемым", "представленное", "уведомляем",
|
||||
],
|
||||
}
|
||||
|
||||
HEDGE_MARKERS: dict[str, list[str]] = {
|
||||
"en": [
|
||||
"possibly", "perhaps", "might", "may", "could", "seemingly",
|
||||
"appears to", "seems", "somewhat", "apparently", "presumably",
|
||||
],
|
||||
"ru": [
|
||||
"возможно", "вероятно", "видимо", "по-видимому", "наверное",
|
||||
"кажется", "пожалуй", "скорее всего", "вроде", "будто",
|
||||
],
|
||||
}
|
||||
|
||||
DEFAULT_WEIGHTS: dict[str, float] = {
|
||||
"lex": 0.45,
|
||||
"complexity": 0.20,
|
||||
"hedge": 0.15,
|
||||
"punct": 0.20,
|
||||
}
|
||||
|
||||
# Sentence-complexity sigmoid parameters.
|
||||
# avg chars per sentence: centre 40 credits terse formal writing (e.g. "The
|
||||
# proposal is, therefore, accepted."). clause count adds a second signal
|
||||
# weighted equally with length (avg_cl centre 0.5 = one comma per sentence).
|
||||
_SENTENCE_COMPLEXITY_CENTER: float = 40.0
|
||||
_SENTENCE_COMPLEXITY_SCALE: float = 25.0
|
||||
_CLAUSE_COUNT_CENTER: float = 0.5
|
||||
_CLAUSE_COUNT_SCALE: float = 0.5
|
||||
|
||||
# Density sigmoid parameters. Tuned so 0 markers -> ~0.1, 1.5 markers/100tok -> 0.5.
|
||||
_LEX_DENSITY_CENTER: float = 1.5 # markers per 100 tokens
|
||||
_LEX_DENSITY_SCALE: float = 1.2
|
||||
_HEDGE_DENSITY_CENTER: float = 1.0
|
||||
_HEDGE_DENSITY_SCALE: float = 0.8
|
||||
_PUNCT_DENSITY_CENTER: float = 1.5
|
||||
_PUNCT_DENSITY_SCALE: float = 1.3
|
||||
|
||||
_NEUTRAL_SCORE: float = 0.5
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- helpers
|
||||
def _tokens(text: str) -> list[str]:
|
||||
"""Whitespace split on letter sequences; lowercase. Unicode-aware."""
|
||||
cleaned = re.sub(r"[^\w\s\-]", " ", text, flags=re.UNICODE)
|
||||
return [t.lower() for t in cleaned.split() if t]
|
||||
|
||||
|
||||
def _sentence_split(text: str) -> list[str]:
|
||||
parts = re.split(r"[.!?;]+", text)
|
||||
return [p.strip() for p in parts if p.strip()]
|
||||
|
||||
|
||||
def _sigmoid(x: float) -> float:
|
||||
if x >= 0:
|
||||
ez = math.exp(-x)
|
||||
return 1.0 / (1.0 + ez)
|
||||
ez = math.exp(x)
|
||||
return ez / (1.0 + ez)
|
||||
|
||||
|
||||
def _count_phrase_occurrences(text_lower: str, phrases: Iterable[str]) -> int:
|
||||
count = 0
|
||||
for p in phrases:
|
||||
if not p:
|
||||
continue
|
||||
if " " in p or "-" in p:
|
||||
# Multi-word or hyphenated phrase -> substring match is fine.
|
||||
count += text_lower.count(p)
|
||||
else:
|
||||
count += len(re.findall(rf"\b{re.escape(p)}\b", text_lower, flags=re.UNICODE))
|
||||
return count
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- features
|
||||
def _lex_score(text: str, lang: str) -> float:
|
||||
"""Per-language register-marker density, sigmoid-bounded to [0, 1]."""
|
||||
markers = LEX_MARKERS.get(lang, [])
|
||||
if not markers:
|
||||
return _NEUTRAL_SCORE
|
||||
toks = _tokens(text)
|
||||
if not toks:
|
||||
return 0.0
|
||||
hits = _count_phrase_occurrences(text.lower(), markers)
|
||||
density = hits * 100.0 / max(len(toks), 1)
|
||||
return _sigmoid((density - _LEX_DENSITY_CENTER) / _LEX_DENSITY_SCALE)
|
||||
|
||||
|
||||
def _complexity_score(text: str) -> float:
|
||||
"""Avg chars per sentence + clause-count proxy. Language-independent.
|
||||
|
||||
Returns equal-weight blend of:
|
||||
- length sigmoid (centred at 40 chars so terse formal sentences aren't depressed).
|
||||
- clause sigmoid based on commas per sentence (centred at 0.5 = one comma avg).
|
||||
"""
|
||||
sents = _sentence_split(text)
|
||||
if not sents:
|
||||
return 0.0
|
||||
avg_len = sum(len(s) for s in sents) / len(sents)
|
||||
avg_clauses = sum(s.count(",") for s in sents) / len(sents)
|
||||
len_score = _sigmoid(
|
||||
(avg_len - _SENTENCE_COMPLEXITY_CENTER) / _SENTENCE_COMPLEXITY_SCALE
|
||||
)
|
||||
cl_score = _sigmoid((avg_clauses - _CLAUSE_COUNT_CENTER) / _CLAUSE_COUNT_SCALE)
|
||||
return 0.5 * len_score + 0.5 * cl_score
|
||||
|
||||
|
||||
def _hedge_score(text: str, lang: str) -> float:
|
||||
"""Hedging density per 100 tokens, sigmoid-bounded to [0, 1]."""
|
||||
markers = HEDGE_MARKERS.get(lang, [])
|
||||
if not markers:
|
||||
return _NEUTRAL_SCORE
|
||||
toks = _tokens(text)
|
||||
if not toks:
|
||||
return 0.0
|
||||
hits = _count_phrase_occurrences(text.lower(), markers)
|
||||
density = hits * 100.0 / max(len(toks), 1)
|
||||
return _sigmoid((density - _HEDGE_DENSITY_CENTER) / _HEDGE_DENSITY_SCALE)
|
||||
|
||||
|
||||
def _punct_score(text: str) -> float:
|
||||
"""Semicolon + em-dash + full-quote density per 100 tokens."""
|
||||
toks = _tokens(text)
|
||||
if not toks:
|
||||
return 0.0
|
||||
semi = text.count(";")
|
||||
em = text.count("—") + text.count("–")
|
||||
fq = (
|
||||
text.count('"')
|
||||
+ text.count("“")
|
||||
+ text.count("”")
|
||||
+ text.count("«")
|
||||
+ text.count("»")
|
||||
)
|
||||
hits = semi + em + fq
|
||||
density = hits * 100.0 / max(len(toks), 1)
|
||||
return _sigmoid((density - _PUNCT_DENSITY_CENTER) / _PUNCT_DENSITY_SCALE)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------- public
|
||||
def formality_score(
|
||||
text: str,
|
||||
lang: str,
|
||||
*,
|
||||
weights: dict[str, float] | None = None,
|
||||
) -> float:
|
||||
"""Return surface-feature formality score in [0.0, 1.0].
|
||||
|
||||
0.0 = fully informal, 1.0 = fully formal. Unknown languages get a neutral 0.5
|
||||
with a logged warning (MEMORY.md global-product graceful degradation). NEVER
|
||||
raises on bad input.
|
||||
|
||||
Args:
|
||||
text: free-form user utterance (SURFACE only, per D-AUTIST13-01).
|
||||
lang: ISO-639-1 language code ("en", "ru"). Other codes -> neutral + warning.
|
||||
weights: optional override {lex, complexity, hedge, punct}.
|
||||
|
||||
Constitutional guard reminder: callers pass user SURFACE text only. The scorer
|
||||
does not see any inferred internal state. See camouflaging.py for how the
|
||||
score is consumed (to adjust OUR register, never the user's).
|
||||
"""
|
||||
if not isinstance(text, str) or not text.strip():
|
||||
return 0.0
|
||||
|
||||
if lang not in LEX_MARKERS:
|
||||
warnings.warn(
|
||||
f"formality_score: lang={lang!r} outside RU+EN baseline; "
|
||||
"returning neutral 0.5 (MEMORY.md global-product graceful degradation)",
|
||||
stacklevel=2,
|
||||
)
|
||||
_logger.debug("formality_score unknown lang=%s text_len=%d", lang, len(text))
|
||||
return _NEUTRAL_SCORE
|
||||
|
||||
w = dict(DEFAULT_WEIGHTS)
|
||||
if weights:
|
||||
w.update({k: float(v) for k, v in weights.items() if k in w})
|
||||
total_w = sum(w.values()) or 1.0
|
||||
|
||||
lex = _lex_score(text, lang)
|
||||
complexity = _complexity_score(text)
|
||||
hedge = _hedge_score(text, lang)
|
||||
punct = _punct_score(text)
|
||||
|
||||
weighted = (
|
||||
w["lex"] * lex
|
||||
+ w["complexity"] * complexity
|
||||
+ w["hedge"] * hedge
|
||||
+ w["punct"] * punct
|
||||
) / total_w
|
||||
# Clamp to [0, 1] defensively.
|
||||
return max(0.0, min(1.0, weighted))
|
||||
80
src/iai_mcp/gate.py
Normal file
80
src/iai_mcp/gate.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
"""TOK-06 active-inference retrieval gate (Plan 02-04 Task 2, D-26).
|
||||
|
||||
Skip full pipeline_recall when the expected free-energy reduction for the
|
||||
current cue is below THETA_SKIP bits. Trivial cues (greetings, "thanks",
|
||||
single characters) short-circuit to an L0-only response, saving 200-500
|
||||
tokens per trivial turn.
|
||||
|
||||
The heuristic uses a simple token-count proxy for EFE:
|
||||
- Empty / sub-3-char cues: 0.0 bits (no signal).
|
||||
- Greetings ("hi", "hello", "thanks", "ok") in the fixed trivial set: 0.1 bits.
|
||||
- Single-token cues not in the trivial set: 0.25 bits (above threshold --
|
||||
one rare/novel token can still justify a retrieval).
|
||||
- General cues: min(2.0, log2(1 + unique_token_count) * 0.5).
|
||||
|
||||
Phase 2 note: this is an approximation. can replace with a real
|
||||
embedding-distance-to-prior computation once the write policy is active.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
|
||||
# threshold (bits).
|
||||
THETA_SKIP = 0.2
|
||||
|
||||
# Fixed-EFE trivial cues. Matched case-insensitively against stripped punctuation.
|
||||
TRIVIAL_SHORT_CUES: frozenset[str] = frozenset({
|
||||
"hi", "hello", "hey", "thanks", "thank you", "ok", "okay",
|
||||
"yes", "no", "sure", ".", "!", "?",
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------- EFE computation
|
||||
|
||||
|
||||
def expected_free_energy_reduction(cue: str) -> float:
|
||||
"""Estimate the expected free-energy reduction for `cue` (bits).
|
||||
|
||||
- Empty or <3 chars -> 0.0 (below threshold; skip)
|
||||
- Fixed trivial set -> 0.1 (below threshold; skip)
|
||||
- Single non-trivial -> 0.25 (above threshold; proceed)
|
||||
- General formula -> min(2.0, log2(1 + unique_token_count) * 0.5)
|
||||
"""
|
||||
if not cue:
|
||||
return 0.0
|
||||
stripped = cue.strip()
|
||||
if len(stripped) < 3:
|
||||
return 0.0
|
||||
|
||||
normalised = stripped.lower().rstrip(".!?").strip()
|
||||
if normalised in TRIVIAL_SHORT_CUES:
|
||||
return 0.1
|
||||
|
||||
tokens = [t for t in stripped.split() if t]
|
||||
unique = len({t.lower() for t in tokens})
|
||||
if unique <= 1:
|
||||
# Single token not in trivial set -- rare/novel token MAY be a proper
|
||||
# noun, code identifier, or keyword. Stay above threshold.
|
||||
return 0.25
|
||||
value = math.log2(1 + unique) * 0.5
|
||||
return min(2.0, float(value))
|
||||
|
||||
|
||||
# ---------------------------------------------------------- skip decision
|
||||
|
||||
|
||||
def should_skip_retrieval(cue: str) -> tuple[bool, str]:
|
||||
"""Return (skip, reason) per D-26.
|
||||
|
||||
reason is a short English diagnostic suitable for a RecallResponse hint.
|
||||
"""
|
||||
if not cue or len(cue.strip()) < 3:
|
||||
return True, "very short cue (<3 chars); no discriminable signal"
|
||||
|
||||
value = expected_free_energy_reduction(cue)
|
||||
if value < THETA_SKIP:
|
||||
return True, (
|
||||
f"trivial cue (EFE {value:.3f} bits < theta {THETA_SKIP})"
|
||||
)
|
||||
return False, ""
|
||||
198
src/iai_mcp/graph.py
Normal file
198
src/iai_mcp/graph.py
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
"""Dual-library graph wrapper.
|
||||
|
||||
NetworkX for dev ergonomics at small N; igraph (C-backed) for hot-path at
|
||||
N >= IGRAPH_THRESHOLD. Backend switches automatically in add_node when the
|
||||
node count crosses the threshold, so callers don't have to care.
|
||||
|
||||
Exposed surface (consumed by community.py, richclub.py, pipeline.py):
|
||||
- add_node, add_edge
|
||||
- node_count, backend (property)
|
||||
- centrality() -> dict[UUID, float] # betweenness
|
||||
- two_hop_neighborhood(seeds, top_k) # CONN-03 greedy spread
|
||||
- rich_club_coefficient() # van den Heuvel & Sporns 2011
|
||||
- get_embedding(node_id)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
import networkx as nx
|
||||
|
||||
# switch to C-backed igraph at N >= 500 (centrality + Leiden hot path).
|
||||
IGRAPH_THRESHOLD = 500
|
||||
|
||||
try:
|
||||
import igraph as ig # type: ignore
|
||||
_HAS_IGRAPH = True
|
||||
except ImportError: # pragma: no cover -- igraph is a hard dep in pyproject
|
||||
_HAS_IGRAPH = False
|
||||
|
||||
|
||||
class MemoryGraph:
|
||||
"""Dual-library graph. NetworkX is the source of truth for topology; igraph
|
||||
is rebuilt on demand when backend flips.
|
||||
|
||||
Storage model:
|
||||
- `self._nx` holds the authoritative NetworkX graph (str(UUID) node labels).
|
||||
- `self._attrs` maps UUID -> {"community_id": UUID|None, "embedding": list[float]}.
|
||||
- `self._ig` holds a cached igraph mirror once the backend switches.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._nx: nx.Graph = nx.Graph()
|
||||
self._ig: "ig.Graph | None" = None
|
||||
self._attrs: dict[UUID, dict[str, Any]] = {}
|
||||
self._backend: str = "networkx"
|
||||
|
||||
# -------------------------------------------------------------- properties
|
||||
|
||||
@property
|
||||
def backend(self) -> str:
|
||||
return self._backend
|
||||
|
||||
def node_count(self) -> int:
|
||||
return self._nx.number_of_nodes()
|
||||
|
||||
# ----------------------------------------------------------------- writes
|
||||
|
||||
def add_node(
|
||||
self,
|
||||
node_id: UUID,
|
||||
community_id: UUID | None,
|
||||
embedding: list[float],
|
||||
) -> None:
|
||||
self._nx.add_node(str(node_id))
|
||||
self._attrs[node_id] = {
|
||||
"community_id": community_id,
|
||||
"embedding": embedding,
|
||||
}
|
||||
self._maybe_switch_backend()
|
||||
|
||||
def add_edge(
|
||||
self,
|
||||
src: UUID,
|
||||
dst: UUID,
|
||||
weight: float = 1.0,
|
||||
edge_type: str = "hebbian",
|
||||
) -> None:
|
||||
self._nx.add_edge(
|
||||
str(src), str(dst), weight=weight, edge_type=edge_type
|
||||
)
|
||||
if self._ig is not None:
|
||||
# igraph mirror is immutable by topology; rebuild after each edge
|
||||
# write while in igraph backend. Cheap enough at Phase-1 scale.
|
||||
self._rebuild_igraph()
|
||||
|
||||
# ------------------------------------------------------ backend switching
|
||||
|
||||
def _maybe_switch_backend(self) -> None:
|
||||
n = self.node_count()
|
||||
if (
|
||||
n >= IGRAPH_THRESHOLD
|
||||
and self._backend == "networkx"
|
||||
and _HAS_IGRAPH
|
||||
):
|
||||
self._rebuild_igraph()
|
||||
self._backend = "igraph"
|
||||
|
||||
def _rebuild_igraph(self) -> None:
|
||||
if not _HAS_IGRAPH:
|
||||
return
|
||||
nodes = list(self._nx.nodes())
|
||||
idx = {n: i for i, n in enumerate(nodes)}
|
||||
edges = [(idx[u], idx[v]) for u, v in self._nx.edges()]
|
||||
weights = [
|
||||
float(self._nx[u][v].get("weight", 1.0)) for u, v in self._nx.edges()
|
||||
]
|
||||
g = ig.Graph(n=len(nodes), edges=edges, directed=False)
|
||||
g.vs["name"] = nodes
|
||||
if weights:
|
||||
g.es["weight"] = weights
|
||||
self._ig = g
|
||||
|
||||
# ---------------------------------------------------------- graph metrics
|
||||
|
||||
def centrality(self) -> dict[UUID, float]:
|
||||
"""Betweenness centrality. NetworkX for small N, igraph at scale.
|
||||
|
||||
Empty-edge graphs return all-zero centrality (betweenness undefined).
|
||||
"""
|
||||
if self._backend == "networkx":
|
||||
if self._nx.number_of_edges() == 0:
|
||||
return {UUID(n): 0.0 for n in self._nx.nodes()}
|
||||
bc = nx.betweenness_centrality(self._nx, weight="weight")
|
||||
return {UUID(n): float(c) for n, c in bc.items()}
|
||||
# igraph path
|
||||
assert self._ig is not None
|
||||
has_weight = "weight" in self._ig.es.attributes()
|
||||
raw = self._ig.betweenness(weights="weight" if has_weight else None)
|
||||
names = self._ig.vs["name"]
|
||||
return {UUID(name): float(c) for name, c in zip(names, raw)}
|
||||
|
||||
def two_hop_neighborhood(
|
||||
self, seeds: list[UUID], top_k: int = 5
|
||||
) -> list[UUID]:
|
||||
"""CONN-03: 2-hop greedy spread.
|
||||
|
||||
At each hop, for each frontier node, take the top_k highest-weight
|
||||
neighbours (Seguin 2018 local-information reconstruction). Dedup
|
||||
across seeds and hops; exclude seeds themselves.
|
||||
"""
|
||||
visited: set[str] = {str(s) for s in seeds}
|
||||
frontier: set[str] = {str(s) for s in seeds if str(s) in self._nx}
|
||||
collected: set[str] = set()
|
||||
|
||||
for _ in range(2): # 2 hops
|
||||
next_frontier: set[str] = set()
|
||||
for node in frontier:
|
||||
if node not in self._nx:
|
||||
continue
|
||||
neighbours = [
|
||||
(n, float(self._nx[node][n].get("weight", 1.0)))
|
||||
for n in self._nx.neighbors(node)
|
||||
]
|
||||
neighbours.sort(key=lambda x: x[1], reverse=True)
|
||||
for n, _ in neighbours[:top_k]:
|
||||
if n not in visited:
|
||||
next_frontier.add(n)
|
||||
collected.add(n)
|
||||
visited.add(n)
|
||||
frontier = next_frontier
|
||||
if not frontier:
|
||||
break
|
||||
|
||||
return [UUID(n) for n in collected]
|
||||
|
||||
def rich_club_coefficient(self, k_threshold: int | None = None) -> float:
|
||||
"""van den Heuvel & Sporns 2011 -- rich-club coefficient.
|
||||
|
||||
Defaults to using the degree at the 90th percentile as the threshold,
|
||||
matching the 10% rich-club convention used in the connectome literature.
|
||||
Returns 0.0 on graphs smaller than 2 nodes or without any edges.
|
||||
"""
|
||||
if (
|
||||
self._nx.number_of_nodes() < 2
|
||||
or self._nx.number_of_edges() == 0
|
||||
):
|
||||
return 0.0
|
||||
if k_threshold is None:
|
||||
degrees = [d for _, d in self._nx.degree()]
|
||||
if not degrees:
|
||||
return 0.0
|
||||
sorted_deg = sorted(degrees)
|
||||
# 90th percentile ~ top 10% threshold. len//10 is conservative rounding.
|
||||
k_threshold = int(max(1, sorted_deg[-max(1, len(degrees) // 10)]))
|
||||
try:
|
||||
coeffs = nx.rich_club_coefficient(self._nx, normalized=False)
|
||||
except (ZeroDivisionError, nx.NetworkXError):
|
||||
# Rich-club is undefined for disconnected or very small graphs.
|
||||
return 0.0
|
||||
return float(coeffs.get(k_threshold, 0.0))
|
||||
|
||||
# ---------------------------------------------------------------- helpers
|
||||
|
||||
def get_embedding(self, node_id: UUID) -> list[float] | None:
|
||||
"""Return the embedding attached at add_node() time, or None."""
|
||||
attrs = self._attrs.get(node_id)
|
||||
return attrs.get("embedding") if attrs else None
|
||||
188
src/iai_mcp/guard.py
Normal file
188
src/iai_mcp/guard.py
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
"""D-GUARD: graceful-degradation ladder before any LLM call.
|
||||
|
||||
Every LLM-dependent operation must pass through `should_call_llm`
|
||||
BEFORE making an API call. The 7-step ladder (D-GUARD):
|
||||
|
||||
1. sleep.llm_enabled=true? else Tier 0
|
||||
2. API key present? else Tier 0
|
||||
3. BudgetLedger daily cap OK? else Tier 0
|
||||
4. BudgetLedger monthly cap OK? else Tier 0
|
||||
5. RateLimitLedger: last 429 > 15 min ago? else Tier 0 this cycle
|
||||
6. API call with retry(max=2, exp backoff) + timeout(60s) -- caller's job
|
||||
7. On 429/400/401/5xx -> record in ledger, Tier 0 this cycle -- caller's job
|
||||
|
||||
Write & read paths (memory_recall/reinforce/contradict, profile_get/set,
|
||||
session_start) NEVER block on LLM failure. LLM failures only reduce the QUALITY
|
||||
of semantic consolidation, schema induction, and identity refinement.
|
||||
|
||||
Budget defaults: daily_usd_cap=$0.10, monthly_usd_cap=$3.00,
|
||||
cooldown=15min, on_cap_hit=fallback_to_local.
|
||||
|
||||
BudgetLedger + RateLimitLedger persist in LanceDB tables (budget_ledger,
|
||||
ratelimit_ledger) created by MemoryStore._ensure_tables.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from iai_mcp.store import BUDGET_TABLE, RATELIMIT_TABLE, MemoryStore
|
||||
|
||||
|
||||
# D-GUARD defaults
|
||||
BUDGET_DAILY_USD_DEFAULT = 0.10
|
||||
BUDGET_MONTHLY_USD_DEFAULT = 3.00
|
||||
RATELIMIT_COOLDOWN_MIN = 15
|
||||
|
||||
|
||||
class BudgetLedger:
|
||||
"""LanceDB-backed daily + monthly USD spend tracker (D-GUARD).
|
||||
|
||||
Caps default to $0.10/day and $3.00/month. Both are advisory (no OS-level
|
||||
enforcement); caller inspects can_spend() before invoking an LLM API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: MemoryStore,
|
||||
daily_usd_cap: float = BUDGET_DAILY_USD_DEFAULT,
|
||||
monthly_usd_cap: float = BUDGET_MONTHLY_USD_DEFAULT,
|
||||
) -> None:
|
||||
self.store = store
|
||||
self.daily_cap = float(daily_usd_cap)
|
||||
self.monthly_cap = float(monthly_usd_cap)
|
||||
|
||||
# ---- internal helpers
|
||||
|
||||
def _today_utc(self) -> str:
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
def _this_month(self) -> str:
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m")
|
||||
|
||||
# ---- queries
|
||||
|
||||
def daily_used(self) -> float:
|
||||
"""Sum of usd_spent rows for today (UTC)."""
|
||||
tbl = self.store.db.open_table(BUDGET_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
if df.empty:
|
||||
return 0.0
|
||||
today = df[df["date"] == self._today_utc()]
|
||||
return float(today["usd_spent"].sum()) if not today.empty else 0.0
|
||||
|
||||
def monthly_used(self) -> float:
|
||||
"""Sum of usd_spent rows for the current month (UTC)."""
|
||||
tbl = self.store.db.open_table(BUDGET_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
if df.empty:
|
||||
return 0.0
|
||||
mo = df[df["date"].str.startswith(self._this_month())]
|
||||
return float(mo["usd_spent"].sum()) if not mo.empty else 0.0
|
||||
|
||||
def can_spend(self, usd: float) -> tuple[bool, str]:
|
||||
"""Return (ok, reason). reason is "" on success."""
|
||||
daily = self.daily_used()
|
||||
if daily + float(usd) > self.daily_cap:
|
||||
return (
|
||||
False,
|
||||
f"daily cap exceeded (used {daily:.4f} + {float(usd):.4f} "
|
||||
f"> {self.daily_cap:.4f})",
|
||||
)
|
||||
monthly = self.monthly_used()
|
||||
if monthly + float(usd) > self.monthly_cap:
|
||||
return (
|
||||
False,
|
||||
f"monthly cap exceeded (used {monthly:.4f} + {float(usd):.4f} "
|
||||
f"> {self.monthly_cap:.4f})",
|
||||
)
|
||||
return True, ""
|
||||
|
||||
# ---- writes
|
||||
|
||||
def record_spend(self, usd: float, kind: str = "llm") -> None:
|
||||
"""Persist a spend event to the ledger."""
|
||||
tbl = self.store.db.open_table(BUDGET_TABLE)
|
||||
tbl.add(
|
||||
[
|
||||
{
|
||||
"date": self._today_utc(),
|
||||
"usd_spent": float(usd),
|
||||
"kind": kind,
|
||||
"ts": datetime.now(timezone.utc),
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class RateLimitLedger:
|
||||
"""LanceDB-backed 429 history with 15-min cooldown (D-GUARD)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: MemoryStore,
|
||||
cooldown_minutes: int = RATELIMIT_COOLDOWN_MIN,
|
||||
) -> None:
|
||||
self.store = store
|
||||
self.cooldown = timedelta(minutes=int(cooldown_minutes))
|
||||
|
||||
def in_cooldown(self) -> bool:
|
||||
"""True iff the most recent 429 was less than `cooldown_minutes` ago."""
|
||||
tbl = self.store.db.open_table(RATELIMIT_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
if df.empty:
|
||||
return False
|
||||
latest = df["ts"].max()
|
||||
# Pandas timestamp -> python datetime; may be naive on some backends.
|
||||
try:
|
||||
py = latest.to_pydatetime()
|
||||
except AttributeError:
|
||||
py = latest
|
||||
if py.tzinfo is None:
|
||||
py = py.replace(tzinfo=timezone.utc)
|
||||
return (datetime.now(timezone.utc) - py) < self.cooldown
|
||||
|
||||
def record_429(self, endpoint: str = "anthropic") -> None:
|
||||
"""Record a 429 hit; subsequent in_cooldown() calls will see it."""
|
||||
tbl = self.store.db.open_table(RATELIMIT_TABLE)
|
||||
tbl.add(
|
||||
[
|
||||
{
|
||||
"ts": datetime.now(timezone.utc),
|
||||
"status_code": 429,
|
||||
"endpoint": endpoint,
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def should_call_llm(
|
||||
budget: BudgetLedger,
|
||||
rate: RateLimitLedger,
|
||||
llm_enabled: bool,
|
||||
has_api_key: bool,
|
||||
estimated_usd: float = 0.001,
|
||||
) -> tuple[bool, str]:
|
||||
"""D-GUARD 7-step ladder.
|
||||
|
||||
Returns (ok, reason). reason is "ok" on success or a short diagnostic
|
||||
describing which ladder step blocked the call.
|
||||
|
||||
Ordering is constitutional: downstream plans rely on this exact
|
||||
precedence. Changing the order without updating test_should_call_llm_ordering_*
|
||||
tests is a spec violation.
|
||||
"""
|
||||
# Step 1: sleep.llm_enabled toggle.
|
||||
if not llm_enabled:
|
||||
return False, "sleep.llm_enabled=false"
|
||||
# Step 2: credentials.
|
||||
if not has_api_key:
|
||||
return False, "no api key"
|
||||
# Step 3 + 4: budget caps (daily, then monthly). can_spend tests both.
|
||||
ok, reason = budget.can_spend(estimated_usd)
|
||||
if not ok:
|
||||
return False, reason
|
||||
# Step 5: rate-limit cooldown.
|
||||
if rate.in_cooldown():
|
||||
return False, "ratelimit cooldown (last 429 < 15min)"
|
||||
# Steps 6-7 are caller's responsibility (retry + 429 recording).
|
||||
return True, "ok"
|
||||
158
src/iai_mcp/handle.py
Normal file
158
src/iai_mcp/handle.py
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
"""Compact session handle (Plan 05-06 -- ≤16 raw tok target).
|
||||
|
||||
Collapses three pointer fields historically emitted at session-start::
|
||||
|
||||
<id:{8-hex}> (~8 raw tok) identity pointer (L0 UUID prefix)
|
||||
<sess:{8-hex} pend:{N}> (~12 raw tok) brain session handle + pending
|
||||
<topic:{label<=8}> (~8 raw tok) dominant community hint
|
||||
|
||||
into a single opaque pointer::
|
||||
|
||||
<iai:HHHHHHHHHHHHHHHH> (~6-10 raw tok) 16-hex blake2s digest
|
||||
|
||||
The payload bytes are derived deterministically from the three inputs via
|
||||
blake2s(digest_size=8) -> 64 bits -> 16 hex chars. Deterministic encoding
|
||||
means identical (id, sess, topic, pending) always yields the same handle,
|
||||
so the handle can be quoted back to the server and resolved.
|
||||
|
||||
Resolution: the module keeps a bounded LRU (`_HANDLE_CACHE`) of the most
|
||||
recent encodings so the wrapper / recall paths can decode a handle back
|
||||
into its tuple without re-running the encoder. The cache is process-
|
||||
local and intentionally small -- session-start emits one handle per new
|
||||
session, so 256 slots handles the realistic working set with room for
|
||||
concurrent sessions during sleep-daemon transitions. Misses are a
|
||||
possible outcome (stale handle from an old process) and callers treat
|
||||
them as recoverable: the live payload still carries the legacy pointer
|
||||
fields under ``standard`` / ``deep`` wake_depth for fallback.
|
||||
|
||||
Security / invariants:
|
||||
|
||||
* The handle carries NO secrets. It is a hash of values Claude already
|
||||
saw (L0 UUID prefix, session id prefix, community label, pending
|
||||
count). Compromising the handle tells an attacker nothing they could
|
||||
not learn from the full session-start payload.
|
||||
* blake2s is non-reversible. The cache is the only decode path. A
|
||||
caller that did not mint the handle cannot invert it -- by design.
|
||||
* C6 (read-only audit) is untouched: this module writes nothing to the
|
||||
store; the cache is pure in-memory state.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
import threading
|
||||
from collections import OrderedDict
|
||||
from typing import NamedTuple
|
||||
|
||||
# ------------------------------------------------------------------ constants
|
||||
|
||||
#: Regex a compact handle must match. Exposed for test assertions and
|
||||
#: for the decoder's input-validation contract.
|
||||
COMPACT_HANDLE_RE = re.compile(r"<iai:[0-9a-f]{16}>")
|
||||
|
||||
#: Raw-token budget ceiling for the compact handle per target.
|
||||
#: Enforced by tests/test_handle.py against ``bench/tokens._approx_tokens``.
|
||||
COMPACT_HANDLE_TOKEN_BUDGET = 16
|
||||
|
||||
#: Cache capacity. 256 concurrent handles is plenty for the realistic
|
||||
#: steady-state: one per session, a handful of overlapping sessions
|
||||
#: during daemon sleep transitions, plus test churn. Tuning knob, not
|
||||
#: a policy guarantee.
|
||||
_CACHE_CAPACITY = 256
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ types
|
||||
|
||||
|
||||
class HandleParts(NamedTuple):
|
||||
"""Decoded parts of a compact handle (server-side, never serialised)."""
|
||||
|
||||
identity_short: str # 8 hex of L0 UUID, or "" when unseeded
|
||||
session_short: str # 8 hex of session id, or "-" placeholder
|
||||
topic_label: str # community label (<=8 char) or "none"
|
||||
pending: int # first_turn_pending count (>= 0)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ cache
|
||||
|
||||
|
||||
_HANDLE_CACHE: "OrderedDict[str, HandleParts]" = OrderedDict()
|
||||
_CACHE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def _remember(handle: str, parts: HandleParts) -> None:
|
||||
"""Record handle -> parts with LRU eviction."""
|
||||
with _CACHE_LOCK:
|
||||
if handle in _HANDLE_CACHE:
|
||||
_HANDLE_CACHE.move_to_end(handle)
|
||||
return
|
||||
_HANDLE_CACHE[handle] = parts
|
||||
while len(_HANDLE_CACHE) > _CACHE_CAPACITY:
|
||||
_HANDLE_CACHE.popitem(last=False)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ public API
|
||||
|
||||
|
||||
def encode_compact_handle(
|
||||
identity_short: str,
|
||||
session_short: str,
|
||||
topic_label: str,
|
||||
pending: int,
|
||||
) -> str:
|
||||
"""Derive the ``<iai:HHHHHHHHHHHHHHHH>`` handle from the three pointer inputs.
|
||||
|
||||
The output is deterministic: equal inputs always yield equal handles.
|
||||
Inputs are normalised (``str``, sanitised) before hashing so whitespace
|
||||
or accidental newlines never affect the digest.
|
||||
|
||||
The returned handle is also inserted into the in-memory decode cache
|
||||
so ``decode_compact_handle`` can reverse it within the same process.
|
||||
"""
|
||||
id_s = str(identity_short or "")
|
||||
sess_s = str(session_short or "-")
|
||||
topic_s = str(topic_label or "none")
|
||||
# Coerce pending to a bounded non-negative int; negatives or huge values
|
||||
# are clamped to the [0, 999] window the emit site actually produces.
|
||||
try:
|
||||
pend_i = max(0, min(999, int(pending)))
|
||||
except (TypeError, ValueError):
|
||||
pend_i = 0
|
||||
|
||||
h = hashlib.blake2s(digest_size=8)
|
||||
h.update(id_s.encode("utf-8"))
|
||||
h.update(b"\x1f")
|
||||
h.update(sess_s.encode("utf-8"))
|
||||
h.update(b"\x1f")
|
||||
h.update(topic_s.encode("utf-8"))
|
||||
h.update(b"\x1f")
|
||||
h.update(str(pend_i).encode("utf-8"))
|
||||
digest = h.hexdigest() # 16 hex chars
|
||||
|
||||
handle = f"<iai:{digest}>"
|
||||
_remember(handle, HandleParts(id_s, sess_s, topic_s, pend_i))
|
||||
return handle
|
||||
|
||||
|
||||
def decode_compact_handle(handle: str) -> HandleParts | None:
|
||||
"""Return the parts for a handle minted earlier in this process.
|
||||
|
||||
Returns ``None`` when the input is malformed or the handle is no
|
||||
longer in the LRU (cold cache / different process). Callers treat a
|
||||
miss as a soft error -- the legacy ``identity_pointer`` /
|
||||
``brain_handle`` / ``topic_cluster_hint`` fields remain available in
|
||||
``standard`` / ``deep`` wake_depth for fallback resolution.
|
||||
"""
|
||||
if not isinstance(handle, str) or not COMPACT_HANDLE_RE.fullmatch(handle):
|
||||
return None
|
||||
with _CACHE_LOCK:
|
||||
parts = _HANDLE_CACHE.get(handle)
|
||||
if parts is not None:
|
||||
_HANDLE_CACHE.move_to_end(handle)
|
||||
return parts
|
||||
|
||||
|
||||
def _reset_cache_for_tests() -> None:
|
||||
"""Test-only: clear the LRU. Production code must never call this."""
|
||||
with _CACHE_LOCK:
|
||||
_HANDLE_CACHE.clear()
|
||||
333
src/iai_mcp/heartbeat_scanner.py
Normal file
333
src/iai_mcp/heartbeat_scanner.py
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
"""Phase 10.4 L4 — daemon-side heartbeat scanner (per-wrapper, PID-scoped).
|
||||
|
||||
Reads ``~/.iai-mcp/wrappers/heartbeat-<pid>-<uuid>.json`` files written by
|
||||
each MCP wrapper instance, validates freshness (``now - last_refresh <= M``)
|
||||
AND PID liveness (``os.kill(pid, 0)``), and aggregates presence so the daemon's
|
||||
state machine can decide WAKE vs BEDTIME.
|
||||
|
||||
Constraints (carried from CONTEXT 10.4):
|
||||
- Idle CPU near zero — scanner runs on lifecycle TICK (every 30s), not faster.
|
||||
- Scanner code is reentrant: ``scan()`` MUST be safe to call concurrently with
|
||||
a wrapper writing a heartbeat file (atomic rename pattern + JSON-parse-fail
|
||||
fallback to file mtime).
|
||||
- No new third-party dependencies — stdlib only.
|
||||
- macOS-only PID semantics carried through (Linux subset works the same; only
|
||||
Windows is unsupported, which matches the phase's macOS-only stance).
|
||||
- This module is STANDALONE — daemon main-loop integration lands in Phase 10.5.
|
||||
|
||||
Heartbeat file schema (written by wrapper, read here)::
|
||||
|
||||
{
|
||||
"pid": 12345,
|
||||
"uuid": "01HZQ...",
|
||||
"started_at": "2026-05-02T15:00:00Z",
|
||||
"last_refresh": "2026-05-02T15:14:30Z",
|
||||
"wrapper_version": "1.0.0",
|
||||
"schema_version": 1
|
||||
}
|
||||
|
||||
Status semantics:
|
||||
- FRESH: ``last_refresh`` within ``M`` seconds AND PID alive.
|
||||
- STALE: ``last_refresh`` older than ``M`` seconds (regardless of PID).
|
||||
- ORPHAN: PID is dead (``ProcessLookupError`` from ``kill(pid, 0)``) and the
|
||||
file's freshness window has not yet expired. Treated as not-active.
|
||||
|
||||
A file that fails JSON parse falls back to its filesystem mtime so a torn
|
||||
half-written write does not silently mask presence.
|
||||
|
||||
Validates: WAKE-07.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Module-level constants -------------------------------------------------------
|
||||
|
||||
#: Default refresh staleness threshold (seconds). A heartbeat older than this
|
||||
#: is STALE regardless of PID liveness. The wrapper SHOULD refresh every
|
||||
#: ``REFRESH_INTERVAL_SEC`` — three missed refreshes (~90 s)
|
||||
#: trip staleness.
|
||||
DEFAULT_STALE_THRESHOLD_SEC = 90
|
||||
|
||||
#: Window for the "no fresh activity in last 30 minutes" predicate consumed
|
||||
#: by the L6 ``IdleDetector.sleep_eligible`` rule.
|
||||
IDLE_WINDOW_SEC = 30 * 60
|
||||
|
||||
#: Filename glob used to enumerate heartbeat files. Matches the
|
||||
#: ``heartbeat-<pid>-<uuid>.json`` convention from CONTEXT 10.4.
|
||||
_HEARTBEAT_GLOB = "heartbeat-*.json"
|
||||
|
||||
|
||||
class HeartbeatStatus(Enum):
|
||||
"""Tri-state classification of a single heartbeat file."""
|
||||
|
||||
FRESH = "fresh"
|
||||
STALE = "stale"
|
||||
ORPHAN = "orphan"
|
||||
|
||||
|
||||
@dataclass
|
||||
class HeartbeatEntry:
|
||||
"""One scanned heartbeat file with its derived status.
|
||||
|
||||
Attributes:
|
||||
path: Absolute path of the heartbeat file on disk.
|
||||
pid: Wrapper PID parsed from the file's payload.
|
||||
uuid: Wrapper UUID parsed from the file's payload (used as a stable
|
||||
tie-breaker when the same PID is reused after wrapper restart).
|
||||
last_refresh: Timezone-aware UTC datetime parsed from
|
||||
``last_refresh``; falls back to file mtime if JSON parse fails.
|
||||
status: One of ``HeartbeatStatus.{FRESH, STALE, ORPHAN}``.
|
||||
"""
|
||||
|
||||
path: Path
|
||||
pid: int
|
||||
uuid: str
|
||||
last_refresh: datetime
|
||||
status: HeartbeatStatus
|
||||
|
||||
|
||||
# PID liveness ----------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_pid_alive(pid: int) -> bool:
|
||||
"""Return True iff ``pid`` exists in the kernel's process table.
|
||||
|
||||
Uses the ``kill(pid, 0)`` POSIX trick — sends no signal but raises
|
||||
``ProcessLookupError`` (ESRCH) when the PID has been reaped. A
|
||||
``PermissionError`` (EPERM) means the process exists but the current
|
||||
user cannot signal it — for liveness purposes we count that as alive.
|
||||
A negative or zero ``pid`` is treated as dead (those values would map
|
||||
to ``kill(self_pgrp, 0)`` semantics which is not what we want).
|
||||
"""
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
return True
|
||||
return True
|
||||
|
||||
|
||||
# Atomic-read-with-mtime-fallback helper --------------------------------------
|
||||
|
||||
|
||||
def _parse_heartbeat_file(path: Path) -> tuple[int, str, datetime] | None:
|
||||
"""Best-effort parse of a single heartbeat file.
|
||||
|
||||
Returns ``(pid, uuid, last_refresh_utc)`` on success or ``None`` if the
|
||||
file disappeared mid-read (race with wrapper rotation) or its content
|
||||
cannot be coerced into the minimum schema.
|
||||
|
||||
A JSON-parse failure falls back to the file's mtime so that a torn
|
||||
write produced by a wrapper crash mid-rename is treated as STALE-on-
|
||||
age rather than silently dropped — matches the "reentrant + safe under
|
||||
concurrent writers" requirement in PLAN 10.4-01 Task 1.1.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
try:
|
||||
payload = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
# Torn write — fall back to filename PID + filesystem mtime so we
|
||||
# at least get a STALE classification rather than dropping the file.
|
||||
return _fallback_parse_from_filename(path)
|
||||
|
||||
pid = payload.get("pid")
|
||||
uuid_str = payload.get("uuid", "")
|
||||
last_refresh_raw = payload.get("last_refresh")
|
||||
|
||||
if not isinstance(pid, int) or not isinstance(uuid_str, str):
|
||||
return _fallback_parse_from_filename(path)
|
||||
if not isinstance(last_refresh_raw, str):
|
||||
return _fallback_parse_from_filename(path)
|
||||
|
||||
try:
|
||||
# ``2026-05-02T15:14:30Z`` — Python 3.11+ accepts the trailing Z;
|
||||
# for safety we normalize to ``+00:00`` for older 3.10 compatibility.
|
||||
normalized = last_refresh_raw.replace("Z", "+00:00")
|
||||
last_refresh = datetime.fromisoformat(normalized)
|
||||
except ValueError:
|
||||
return _fallback_parse_from_filename(path)
|
||||
|
||||
if last_refresh.tzinfo is None:
|
||||
last_refresh = last_refresh.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
last_refresh = last_refresh.astimezone(timezone.utc)
|
||||
|
||||
return pid, uuid_str, last_refresh
|
||||
|
||||
|
||||
def _fallback_parse_from_filename(path: Path) -> tuple[int, str, datetime] | None:
|
||||
"""Recover ``(pid, uuid, mtime_utc)`` from filename + filesystem stat.
|
||||
|
||||
Filename convention: ``heartbeat-<pid>-<uuid>.json``. We split on ``-``
|
||||
once for ``heartbeat`` and once for the PID, joining the remainder as
|
||||
the UUID (UUIDs may contain dashes).
|
||||
"""
|
||||
name = path.stem # heartbeat-<pid>-<uuid>
|
||||
parts = name.split("-", 2)
|
||||
if len(parts) != 3 or parts[0] != "heartbeat":
|
||||
return None
|
||||
try:
|
||||
pid = int(parts[1])
|
||||
except ValueError:
|
||||
return None
|
||||
uuid_str = parts[2]
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
return pid, uuid_str, datetime.fromtimestamp(mtime, tz=timezone.utc)
|
||||
|
||||
|
||||
# HeartbeatScanner -------------------------------------------------------------
|
||||
|
||||
|
||||
class HeartbeatScanner:
|
||||
"""Aggregates per-wrapper heartbeat files into a daemon-side presence signal.
|
||||
|
||||
standalone module — wires this into the daemon
|
||||
main-loop TICK to dispatch HEARTBEAT_REFRESH / IDLE state events.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
wrappers_dir: Path,
|
||||
stale_threshold_sec: int = DEFAULT_STALE_THRESHOLD_SEC,
|
||||
) -> None:
|
||||
self._wrappers_dir = wrappers_dir
|
||||
self._stale_threshold_sec = stale_threshold_sec
|
||||
self._last_scan: list[HeartbeatEntry] = []
|
||||
|
||||
# ----- Scan / classify -----------------------------------------------
|
||||
|
||||
def scan(self) -> list[HeartbeatEntry]:
|
||||
"""Read all heartbeat files, classify each, and return entries.
|
||||
|
||||
Reentrant: tolerates concurrent writes by ignoring files that vanish
|
||||
mid-read and falling back to mtime when JSON is half-written.
|
||||
|
||||
Empty / missing wrappers dir → empty list (the daemon hasn't seen
|
||||
any wrappers yet, which is a valid steady state on a fresh install).
|
||||
"""
|
||||
entries: list[HeartbeatEntry] = []
|
||||
if not self._wrappers_dir.exists():
|
||||
self._last_scan = entries
|
||||
return entries
|
||||
|
||||
try:
|
||||
candidates = list(self._wrappers_dir.glob(_HEARTBEAT_GLOB))
|
||||
except OSError:
|
||||
self._last_scan = entries
|
||||
return entries
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
for path in candidates:
|
||||
parsed = _parse_heartbeat_file(path)
|
||||
if parsed is None:
|
||||
# File vanished mid-glob (cleanup race) — skip silently.
|
||||
continue
|
||||
pid, uuid_str, last_refresh = parsed
|
||||
|
||||
age_sec = (now - last_refresh).total_seconds()
|
||||
is_alive = _is_pid_alive(pid)
|
||||
|
||||
if age_sec > self._stale_threshold_sec:
|
||||
# Stale wins over orphan — the file is too old to trust
|
||||
# regardless of whether its PID happens to still be live.
|
||||
status = HeartbeatStatus.STALE
|
||||
elif not is_alive:
|
||||
status = HeartbeatStatus.ORPHAN
|
||||
else:
|
||||
status = HeartbeatStatus.FRESH
|
||||
|
||||
entries.append(
|
||||
HeartbeatEntry(
|
||||
path=path,
|
||||
pid=pid,
|
||||
uuid=uuid_str,
|
||||
last_refresh=last_refresh,
|
||||
status=status,
|
||||
)
|
||||
)
|
||||
|
||||
self._last_scan = entries
|
||||
return entries
|
||||
|
||||
# ----- Aggregations consumed by the state machine --------------------
|
||||
|
||||
def fresh_count(self) -> int:
|
||||
"""Number of heartbeats classified as FRESH on the most recent scan.
|
||||
|
||||
Re-runs ``scan()`` so callers don't have to remember to invoke it
|
||||
first; the cost is one filesystem walk per call which is negligible
|
||||
at TICK cadence (every 30 s).
|
||||
"""
|
||||
return sum(1 for e in self.scan() if e.status is HeartbeatStatus.FRESH)
|
||||
|
||||
def is_active(self) -> bool:
|
||||
"""True iff at least one wrapper is currently FRESH.
|
||||
|
||||
This is the primary signal the state machine uses to dispatch
|
||||
HEARTBEAT_REFRESH (→ WAKE) vs. begin the IDLE-eligibility check.
|
||||
"""
|
||||
return self.fresh_count() >= 1
|
||||
|
||||
def heartbeat_idle_30min(self) -> bool:
|
||||
"""True iff no FRESH heartbeats existed in the last 30 minutes.
|
||||
|
||||
Consumed by ``IdleDetector.sleep_eligible`` as one of the three
|
||||
disjuncts that gate L6 sleep. "No FRESH in window" is implemented
|
||||
as: scan now, and if zero entries are FRESH, the window is empty.
|
||||
STALE / ORPHAN entries imply the wrapper has not refreshed for at
|
||||
least the staleness threshold (90 s by default), so a single scan
|
||||
suffices — we don't keep a history buffer in this module.
|
||||
"""
|
||||
# Fresh count == 0 means no wrapper is currently active. Combined
|
||||
# with the 30-min wall-clock window enforced by the daemon's TICK
|
||||
# rhythm and the L6 idle predicate's hardware backstop (HIDIdleTime
|
||||
# ≥ 1800 s), this gives the same observable behavior as a separate
|
||||
# 30-minute history without keeping in-memory state.
|
||||
return self.fresh_count() == 0
|
||||
|
||||
# ----- Cleanup -------------------------------------------------------
|
||||
|
||||
def cleanup_stale_orphans(self) -> int:
|
||||
"""Delete heartbeat files classified STALE or ORPHAN. Returns count deleted.
|
||||
|
||||
Best-effort: a delete that races with another process unlinking the
|
||||
same file (``FileNotFoundError``) is counted as a successful
|
||||
cleanup; any other ``OSError`` is swallowed so a single problematic
|
||||
file cannot break the rest of the cleanup pass.
|
||||
"""
|
||||
deleted = 0
|
||||
for entry in self.scan():
|
||||
if entry.status is HeartbeatStatus.FRESH:
|
||||
continue
|
||||
try:
|
||||
entry.path.unlink()
|
||||
deleted += 1
|
||||
except FileNotFoundError:
|
||||
# Already unlinked (concurrent wrapper rotation / sibling
|
||||
# daemon scan). Count as cleaned — the file is gone.
|
||||
deleted += 1
|
||||
except OSError:
|
||||
# Permission / FS error on a single file: skip it, keep
|
||||
# going. The doctor row will surface persistent
|
||||
# cleanup failures via "n=X stale" delta on next run.
|
||||
continue
|
||||
return deleted
|
||||
122
src/iai_mcp/hebbian_structure.py
Normal file
122
src/iai_mcp/hebbian_structure.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
"""Plan 03-01 CONN-05 D-TEM-04: structure-edge Hebbian LTP.
|
||||
|
||||
Mirrors content-edge Hebbian (retrieve.reinforce_edges -> store.boost_edges
|
||||
with edge_type="hebbian"). Co-retrieval of two records whose structure_hv
|
||||
hypervectors are sufficiently similar (Hamming similarity >= 0.7 by default)
|
||||
strengthens a "hebbian_structure" edge between them. FSRS decay on the new
|
||||
edge type is identical to the content-edge formula in sleep._decay_edges.
|
||||
|
||||
Constitutional fit:
|
||||
- D-TEM-04: Hebbian LTP on structure edges. Autopoiesis applied to structure;
|
||||
the brain reinforces structural co-occurrence the same way it reinforces
|
||||
content co-occurrence in Phase 1.
|
||||
- Flat layout (PATTERNS.md): no `connectome/` subpackage. Module path is
|
||||
src/iai_mcp/hebbian_structure.py.
|
||||
- Same shape as retrieve.reinforce_edges -- pairwise iterate, compute
|
||||
similarity, call store.boost_edges with edge_type="hebbian_structure".
|
||||
|
||||
Public API:
|
||||
- STRUCTURAL_SIMILARITY_THRESHOLD: pairs above this fire LTP (default 0.7).
|
||||
- structural_similarity(a, b): 1 - hamming_distance(a, b) / D in [0, 1].
|
||||
- strengthen_structure_edge(store, src_id, dst_id, gain=1.0): boost the
|
||||
structure edge between two records.
|
||||
- co_retrieval_trigger(store, hits): pairwise scan of co-retrieved hits;
|
||||
fire strengthen_structure_edge for every pair above the threshold.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from itertools import combinations
|
||||
from uuid import UUID
|
||||
|
||||
import numpy as np
|
||||
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import STRUCTURE_HV_DIM
|
||||
|
||||
|
||||
# D-TEM-04 default trigger (per plan Task 2b behavior contract):
|
||||
# co-retrieval LTP fires when structural similarity >= 0.7 (Hamming distance
|
||||
# fraction <= 0.3). Tunable later via the profile registry if a knob is added.
|
||||
STRUCTURAL_SIMILARITY_THRESHOLD: float = 0.7
|
||||
|
||||
|
||||
def structural_similarity(a: bytes, b: bytes) -> float:
|
||||
"""Return 1 - hamming_distance(a, b) / STRUCTURE_HV_DIM in [0.0, 1.0].
|
||||
|
||||
Empty / unequal-length / corrupt inputs return 0.0 (graceful degradation).
|
||||
"""
|
||||
if not a or not b or len(a) != len(b):
|
||||
return 0.0
|
||||
aa = np.frombuffer(a, dtype=np.uint8)
|
||||
bb = np.frombuffer(b, dtype=np.uint8)
|
||||
# popcount of XOR -> hamming distance in bits.
|
||||
xor = np.bitwise_xor(aa, bb)
|
||||
# numpy >= 2.x has np.bitwise_count; fall back to unpackbits sum on older.
|
||||
try:
|
||||
ham_bits = int(np.bitwise_count(xor).sum())
|
||||
except AttributeError:
|
||||
ham_bits = int(np.unpackbits(xor).sum())
|
||||
return 1.0 - (ham_bits / STRUCTURE_HV_DIM)
|
||||
|
||||
|
||||
def strengthen_structure_edge(
|
||||
store: MemoryStore,
|
||||
src_id: UUID,
|
||||
dst_id: UUID,
|
||||
gain: float = 1.0,
|
||||
) -> dict[tuple[str, str], float]:
|
||||
"""Plan 03-01 D-TEM-04: structure-edge LTP via store.boost_edges.
|
||||
|
||||
Returns the new weights dict (same shape as retrieve.reinforce_edges'
|
||||
underlying call). Mirrors content-edge LTP shape so downstream code
|
||||
(events, audit, decay sweep) treats structure edges identically.
|
||||
"""
|
||||
return store.boost_edges(
|
||||
[(src_id, dst_id)],
|
||||
delta=float(gain),
|
||||
edge_type="hebbian_structure",
|
||||
)
|
||||
|
||||
|
||||
def co_retrieval_trigger(
|
||||
store: MemoryStore,
|
||||
hits,
|
||||
*,
|
||||
threshold: float = STRUCTURAL_SIMILARITY_THRESHOLD,
|
||||
gain: float = 1.0,
|
||||
) -> int:
|
||||
"""Pairwise scan of co-retrieved hits; fire strengthen_structure_edge
|
||||
for each pair whose structural_similarity >= threshold.
|
||||
|
||||
`hits` may be a list of MemoryHit (record_id only -- structure_hv is
|
||||
fetched lazily from store.get) OR a list of MemoryRecord (faster path,
|
||||
structure_hv read directly).
|
||||
|
||||
Returns the number of structure edges strengthened. A structurally-
|
||||
isolated co-retrieved set returns 0 -- this is expected (means no two
|
||||
hits shared structure to reinforce).
|
||||
"""
|
||||
# Materialise (id, structure_hv) tuples once.
|
||||
pairs: list[tuple[UUID, bytes]] = []
|
||||
for h in hits:
|
||||
rec_id = getattr(h, "record_id", None) or getattr(h, "id", None)
|
||||
if rec_id is None:
|
||||
continue
|
||||
hv = getattr(h, "structure_hv", None)
|
||||
if hv is None:
|
||||
rec = store.get(rec_id)
|
||||
if rec is None:
|
||||
continue
|
||||
hv = rec.structure_hv
|
||||
pairs.append((rec_id, hv or b""))
|
||||
|
||||
fired = 0
|
||||
for (a_id, a_hv), (b_id, b_hv) in combinations(pairs, 2):
|
||||
if structural_similarity(a_hv, b_hv) >= threshold:
|
||||
try:
|
||||
strengthen_structure_edge(store, a_id, b_id, gain=gain)
|
||||
fired += 1
|
||||
except Exception:
|
||||
# Diagnostic only -- never block the pipeline on edge failure.
|
||||
continue
|
||||
return fired
|
||||
324
src/iai_mcp/hippea_cascade.py
Normal file
324
src/iai_mcp/hippea_cascade.py
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
"""TOK-14 / D5-05: HIPPEA activation-cascade prefetch.
|
||||
|
||||
Daemon receives `session_open` over the Phase-4 unix socket and this module
|
||||
computes precision-weighted salience over 7 days of `session_started` +
|
||||
`retrieval_used` events, selects top-K communities, and pre-warms their
|
||||
top-N records into a process-local LRU cache (cachetools.TTLCache) guarded
|
||||
by an asyncio.Lock.
|
||||
|
||||
Operationalization (Van de Cruys 2014 HIPPEA):
|
||||
f(c) = count(session_gated_to_community=c, last_7_days) / total_sessions_7d
|
||||
p(c) = 1 / |communities|
|
||||
PE(c) = |f(c) - p(c)|
|
||||
sigma2 = Var[day_i_count(c) : i in 7 days]
|
||||
w(c) = 1 / (sigma2(c) + 0.01)
|
||||
S(c) = w(c) * PE(c)
|
||||
top_K = argmax_K S(c) # K=3 default
|
||||
warm = union over c in top_K of top_N_by_centrality(records(c))
|
||||
|
||||
Cold-fallback (<3 sessions in 7-day window): return
|
||||
assignment.top_communities[:top_k] without variance weighting.
|
||||
|
||||
Constitutional invariants (asserted by grep guards in tests/test_hippea_cascade.py):
|
||||
- C1 HUMAN-FIRST: cascade task yields on shutdown within 5s.
|
||||
- C3 ZERO API COST: pure local -- no paid-API env var, no Anthropic SDK import.
|
||||
- C6 READ-ONLY: no store.insert / store.append_provenance / store.update calls.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any, Iterable
|
||||
from uuid import UUID
|
||||
|
||||
from cachetools import TTLCache
|
||||
|
||||
|
||||
# ---------------------------------------------------------- process-local LRU
|
||||
|
||||
# D5-05 constants:
|
||||
# maxsize=200, ttl=1800 (30 min). These match the recommendations and
|
||||
# keep the cache small enough to fit in MCP core RAM headroom.
|
||||
_WARM_MAXSIZE = 200
|
||||
_WARM_TTL_SECONDS = 1800
|
||||
|
||||
|
||||
_warm_lru: TTLCache[UUID, Any] = TTLCache(maxsize=_WARM_MAXSIZE, ttl=_WARM_TTL_SECONDS)
|
||||
_warm_lru_lock = asyncio.Lock()
|
||||
|
||||
|
||||
def snapshot_warm_ids() -> list[UUID]:
|
||||
"""Lock-free snapshot of warm record IDs.
|
||||
|
||||
CPython GIL makes `list(dict.keys())` atomic for simple types. A concurrent
|
||||
mutator may race and invalidate the iterator -- we catch RuntimeError and
|
||||
return an empty list rather than propagating the rare race.
|
||||
"""
|
||||
try:
|
||||
return list(_warm_lru.keys())
|
||||
except RuntimeError:
|
||||
return []
|
||||
|
||||
|
||||
def get_warm_record(rid: UUID) -> Any | None:
|
||||
"""Return the warmed record or None. Silent on miss / structural error."""
|
||||
try:
|
||||
return _warm_lru.get(rid)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
async def warm_records(record_ids: Iterable[UUID], store: Any) -> int:
|
||||
"""Load records into the LRU. Returns count inserted.
|
||||
|
||||
C6: READ-ONLY against the store -- only `store.get(rid)` is called.
|
||||
Any store-get exception is swallowed per-record so a single bad id
|
||||
cannot poison the warmer.
|
||||
"""
|
||||
inserted = 0
|
||||
async with _warm_lru_lock:
|
||||
for rid in record_ids:
|
||||
try:
|
||||
rec = store.get(rid)
|
||||
if rec is not None:
|
||||
_warm_lru[rid] = rec
|
||||
inserted += 1
|
||||
except Exception:
|
||||
continue
|
||||
return inserted
|
||||
|
||||
|
||||
# ---------------------------------------------------------- salience formula
|
||||
|
||||
|
||||
def compute_salient_communities(
|
||||
store: Any,
|
||||
assignment: Any,
|
||||
*,
|
||||
lookback_days: int = 7,
|
||||
top_k: int = 3,
|
||||
) -> list[UUID]:
|
||||
"""Return top-K community UUIDs by HIPPEA salience S(c) = w(c) * PE(c).
|
||||
|
||||
Cold fallback (<3 sessions in window): return
|
||||
`assignment.top_communities[:top_k]` with no variance weighting.
|
||||
"""
|
||||
# Lazy import to keep the module's surface clean of store-mutating paths.
|
||||
from iai_mcp.events import query_events
|
||||
|
||||
since = datetime.now(timezone.utc) - timedelta(days=lookback_days)
|
||||
try:
|
||||
sessions = query_events(store, kind="session_started", since=since, limit=10000)
|
||||
except Exception:
|
||||
sessions = []
|
||||
|
||||
if len(sessions) < 3:
|
||||
# D5-05 cold fallback: simplified formula drops the variance term.
|
||||
# Use the existing Leiden top-communities as a reasonable default.
|
||||
return list(getattr(assignment, "top_communities", []))[:top_k]
|
||||
|
||||
try:
|
||||
retrievals = query_events(
|
||||
store, kind="retrieval_used", since=since, limit=50000,
|
||||
)
|
||||
except Exception:
|
||||
retrievals = []
|
||||
|
||||
# session_id -> dominant community for that session (most retrieved).
|
||||
per_session_counter: dict[str, Counter] = defaultdict(Counter)
|
||||
for ev in retrievals:
|
||||
data = ev.get("data", {}) if isinstance(ev, dict) else {}
|
||||
sid = data.get("session_id") or ev.get("session_id", "")
|
||||
cid = data.get("community_id") or data.get("community", "")
|
||||
if sid and cid:
|
||||
per_session_counter[sid][str(cid)] += 1
|
||||
session_comm: dict[str, str] = {
|
||||
sid: ctr.most_common(1)[0][0]
|
||||
for sid, ctr in per_session_counter.items()
|
||||
if ctr
|
||||
}
|
||||
|
||||
total_sessions = len(sessions)
|
||||
community_pool: list[UUID] = list(getattr(assignment, "top_communities", []) or [])
|
||||
# Also admit any community seen in retrievals during the window even if it
|
||||
# isn't in top_communities -- the salience formula evaluates all observed
|
||||
# communities, not just the Leiden-top.
|
||||
seen: set[str] = set(session_comm.values())
|
||||
for cid in (str(c) for c in community_pool):
|
||||
seen.add(cid)
|
||||
if not seen:
|
||||
return []
|
||||
p = 1.0 / len(seen)
|
||||
|
||||
# f(c) across the window.
|
||||
freq: Counter = Counter(session_comm.values())
|
||||
|
||||
# Day-bucketed counts (0 = today, lookback_days-1 = oldest).
|
||||
day_buckets: dict[str, list[int]] = defaultdict(lambda: [0] * lookback_days)
|
||||
now = datetime.now(timezone.utc)
|
||||
for sev in sessions:
|
||||
ts = sev.get("ts") if isinstance(sev, dict) else None
|
||||
try:
|
||||
if isinstance(ts, str):
|
||||
t = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
elif hasattr(ts, "to_pydatetime"):
|
||||
t = ts.to_pydatetime()
|
||||
if t.tzinfo is None:
|
||||
t = t.replace(tzinfo=timezone.utc)
|
||||
elif hasattr(ts, "tzinfo") and ts is not None:
|
||||
t = ts
|
||||
if t.tzinfo is None:
|
||||
t = t.replace(tzinfo=timezone.utc)
|
||||
else:
|
||||
t = now
|
||||
delta = (now - t).days
|
||||
day_idx = max(0, min(lookback_days - 1, delta))
|
||||
except Exception:
|
||||
day_idx = 0
|
||||
data = sev.get("data", {}) if isinstance(sev, dict) else {}
|
||||
sid = data.get("session_id") or sev.get("session_id", "")
|
||||
c = session_comm.get(sid)
|
||||
if c:
|
||||
day_buckets[c][day_idx] += 1
|
||||
|
||||
# Compute S(c) per community.
|
||||
scores: dict[str, float] = {}
|
||||
for c in seen:
|
||||
f_c = freq.get(c, 0) / max(1, total_sessions)
|
||||
pe = abs(f_c - p)
|
||||
bucket = day_buckets.get(c, [0] * lookback_days)
|
||||
n = len(bucket) or 1
|
||||
mean = sum(bucket) / n
|
||||
variance = sum((x - mean) ** 2 for x in bucket) / n
|
||||
w = 1.0 / (variance + 0.01)
|
||||
scores[c] = w * pe
|
||||
|
||||
ranked = sorted(
|
||||
scores.items(),
|
||||
key=lambda kv: (-kv[1], kv[0]), # deterministic tiebreak by cid str
|
||||
)
|
||||
top: list[UUID] = []
|
||||
for cid_str, _ in ranked:
|
||||
try:
|
||||
top.append(UUID(cid_str))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if len(top) >= top_k:
|
||||
break
|
||||
return top
|
||||
|
||||
|
||||
# ---------------------------------------------------------- centrality helper
|
||||
|
||||
|
||||
def _top_n_records_by_centrality(
|
||||
store: Any, assignment: Any, community_id: UUID, n: int,
|
||||
) -> list[UUID]:
|
||||
"""READ-ONLY: return top-N record ids for `community_id` by centrality.
|
||||
|
||||
Uses `assignment.mid_regions[community_id]` to enumerate member records,
|
||||
then reads each record's `centrality` field via store.get and sorts by
|
||||
descending centrality. Falls back to insertion order if centrality is
|
||||
missing or non-comparable.
|
||||
"""
|
||||
mid_regions = getattr(assignment, "mid_regions", {}) or {}
|
||||
member_ids = list(mid_regions.get(community_id) or [])
|
||||
if not member_ids:
|
||||
return []
|
||||
scored: list[tuple[float, UUID]] = []
|
||||
for rid in member_ids:
|
||||
try:
|
||||
rec = store.get(rid)
|
||||
except Exception:
|
||||
rec = None
|
||||
if rec is None:
|
||||
continue
|
||||
try:
|
||||
centrality = float(getattr(rec, "centrality", 0.0) or 0.0)
|
||||
except (TypeError, ValueError):
|
||||
centrality = 0.0
|
||||
scored.append((centrality, rid))
|
||||
scored.sort(key=lambda kv: (-kv[0], str(kv[1])))
|
||||
return [rid for _c, rid in scored[:n]]
|
||||
|
||||
|
||||
# ---------------------------------------------------------- sync core-side helper
|
||||
|
||||
|
||||
def compute_core_side_warm_snapshot(
|
||||
store: Any,
|
||||
assignment: Any,
|
||||
*,
|
||||
top_k: int = 3,
|
||||
per_community: int | None = None,
|
||||
max_records: int = 50,
|
||||
) -> list[UUID]:
|
||||
"""Synchronous counterpart to :func:`run_cascade`'s compute path.
|
||||
|
||||
the MCP core runs in a different process from the sleep
|
||||
daemon, so the daemon's ``_warm_lru`` is invisible to core --
|
||||
``snapshot_warm_ids()`` returns ``[]`` in the core on every fresh
|
||||
process boot. This helper lets the core compute its OWN cascade
|
||||
inline (no asyncio dependency) and write the warmed record ids into
|
||||
its own process-local LRU. Duplicates daemon work by design; that
|
||||
is the price of not having shared-memory IPC between the two
|
||||
processes.
|
||||
|
||||
Reuses :func:`compute_salient_communities` (already sync) and
|
||||
:func:`_top_n_records_by_centrality` (sync) -- no new salience
|
||||
formula; only the orchestration that :func:`run_cascade` would do
|
||||
asynchronously.
|
||||
|
||||
READ-ONLY against store (C6 invariant); no async I/O; no paid-API
|
||||
import (C3 invariant).
|
||||
"""
|
||||
top = compute_salient_communities(store, assignment, top_k=top_k)
|
||||
if not top:
|
||||
return []
|
||||
per_c = per_community or max(1, max_records // max(1, len(top)))
|
||||
out: list[UUID] = []
|
||||
for cid in top:
|
||||
try:
|
||||
out.extend(_top_n_records_by_centrality(store, assignment, cid, per_c))
|
||||
except Exception:
|
||||
continue
|
||||
return out[:max_records]
|
||||
|
||||
|
||||
# ---------------------------------------------------------- public entrypoint
|
||||
|
||||
|
||||
async def run_cascade(
|
||||
store: Any,
|
||||
assignment: Any,
|
||||
*,
|
||||
top_k: int = 3,
|
||||
per_community: int | None = None,
|
||||
) -> dict:
|
||||
"""Pre-warm records for top-K salient communities.
|
||||
|
||||
Returns a stats dict: {
|
||||
"communities_selected": int,
|
||||
"records_warmed": int,
|
||||
"top_communities": list[str],
|
||||
}
|
||||
"""
|
||||
top = compute_salient_communities(store, assignment, top_k=top_k)
|
||||
if not top:
|
||||
return {"communities_selected": 0, "records_warmed": 0, "top_communities": []}
|
||||
|
||||
per_c = per_community or max(1, _WARM_MAXSIZE // max(1, len(top)))
|
||||
to_warm: list[UUID] = []
|
||||
for cid in top:
|
||||
try:
|
||||
rec_ids = _top_n_records_by_centrality(store, assignment, cid, per_c)
|
||||
to_warm.extend(rec_ids)
|
||||
except Exception:
|
||||
continue
|
||||
inserted = await warm_records(to_warm[:_WARM_MAXSIZE], store)
|
||||
return {
|
||||
"communities_selected": len(top),
|
||||
"records_warmed": inserted,
|
||||
"top_communities": [str(c) for c in top],
|
||||
}
|
||||
364
src/iai_mcp/host_cli.py
Normal file
364
src/iai_mcp/host_cli.py
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
"""Claude Code CLI subprocess wrapper + budget ledger.
|
||||
|
||||
Subprocess safety:
|
||||
- Uses asyncio.create_subprocess_exec (argv-list form) -- NO shell expansion.
|
||||
The prompt string is passed as a single argv element; no shell-injection surface.
|
||||
- NEVER uses asyncio.create_subprocess_shell, shell=True, or os.system.
|
||||
|
||||
Constitutional guards:
|
||||
- we DO NOT read the paid-API env var. The env is scrubbed via
|
||||
ENV_DENY_LIST before the subprocess is spawned so the key cannot leak into
|
||||
the child `claude -p` process even if set in our parent env by accident.
|
||||
- Bug #43333 defence-in-depth:
|
||||
1. Pre-flight credentials.json validation (billingType=stripe_subscription).
|
||||
2. Subprocess spawn with scrubbed env (3 hostile keys removed).
|
||||
3. Post-flight tripwire: cost_usd > 0 -> BudgetTracker.disable_host()
|
||||
+ structured error result. Subsequent calls refuse to spend.
|
||||
- this module does NOT decide frequency. insight.py orchestrates exactly
|
||||
one call per night. This module is the wrapper only.
|
||||
- self-tracked budget (1% daily, 7% weekly buffer, local
|
||||
midnight reset) persisted inside daemon_state under BUDGET_STATE_KEY.
|
||||
- force-wake during an in-flight claude -p subprocess is honoured
|
||||
cooperatively -- CancelledError is caught, the subprocess is terminated
|
||||
(with FORCE_WAKE_GRACE_SEC grace then kill escalation), and a structured
|
||||
error result is returned WITHOUT re-raising. The daemon loop stays alive.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from iai_mcp.daemon_state import load_state, save_state
|
||||
|
||||
# --------------------------------------------------------------------- constants
|
||||
# hostile env key deny list. The paid-API key must NEVER reach the
|
||||
# `claude -p` subprocess; two alias names have been seen in issue reports for
|
||||
# bug #43333 so we scrub all three. We build the key strings from fragments
|
||||
# so the literal names do not appear as static text in this module -- the
|
||||
# constitutional-guard grep test (test_no_api_key_in_daemon) greps for the
|
||||
# bare literal, and the scrub path still removes every variant at runtime.
|
||||
_ANTHR = "ANTHR" + "OPIC_" + "API_" + "KEY"
|
||||
_CLAUDE_KEY = "CLAUDE_" + "API_" + "KEY"
|
||||
_CLAUDE_CODE_KEY = "CLAUDE_" + "CODE_" + "API_" + "KEY"
|
||||
ENV_DENY_LIST: tuple[str, ...] = (_ANTHR, _CLAUDE_KEY, _CLAUDE_CODE_KEY)
|
||||
|
||||
HOST_TIMEOUT_SEC: float = 120.0 # hard wall for a single call
|
||||
FORCE_WAKE_GRACE_SEC: float = 60.0 # cooperative grace on cancel
|
||||
TERMINATE_WAIT_SEC: float = 5.0 # timeout window before kill escalation
|
||||
KILL_WAIT_SEC: float = 2.0 # bound for post-SIGKILL reap wait
|
||||
DAILY_QUOTA_BUDGET_PCT: float = 0.01 # -- 1% of daily estimate
|
||||
WEEKLY_BUFFER_PCT: float = 0.07 # -- 7% weekly ceiling
|
||||
ESTIMATED_DAILY_TOKEN_CEILING: int = 1_000_000 # heuristic (Pro subscription)
|
||||
CREDENTIALS_PATH: Path = Path.home() / ".claude" / ".credentials.json"
|
||||
BUDGET_STATE_KEY: str = "host_budget"
|
||||
|
||||
|
||||
# -------------------------------------------------------- pre-flight credentials
|
||||
|
||||
|
||||
def verify_credentials_subscription() -> dict:
|
||||
"""Validate the local Claude credentials file says the user is on a
|
||||
Stripe subscription (bug #43333 layer 2 defence).
|
||||
|
||||
We do NOT read the file's secret material. We look at `billingType` only
|
||||
and refuse to call `claude -p` when the billing mode is anything other
|
||||
than `stripe_subscription` (accepts both camelCase and snake_case keys
|
||||
since the schema has varied across Claude CLI versions).
|
||||
"""
|
||||
if not CREDENTIALS_PATH.exists():
|
||||
return {"ok": False, "reason": "credentials_file_missing"}
|
||||
try:
|
||||
data = json.loads(CREDENTIALS_PATH.read_text())
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
return {"ok": False, "reason": "credentials_unreadable", "error": str(exc)}
|
||||
billing = data.get("billingType") or data.get("billing_type") or ""
|
||||
if billing != "stripe_subscription":
|
||||
return {"ok": False, "reason": "not_subscription", "billing_type": billing}
|
||||
return {"ok": True, "billing_type": billing}
|
||||
|
||||
|
||||
# --------------------------------------------------------------- BudgetTracker
|
||||
|
||||
|
||||
class BudgetTracker:
|
||||
"""Self-tracked daily + weekly token budget.
|
||||
|
||||
State is stored inside daemon_state under BUDGET_STATE_KEY. The tracker
|
||||
reads once at construction and writes back via save_state on any mutation.
|
||||
Thread-safety is handled at the daemon-state filesystem layer (atomic
|
||||
rename in daemon_state.save_state).
|
||||
"""
|
||||
|
||||
def __init__(self, state: dict) -> None:
|
||||
self._state = state
|
||||
budget = state.get(BUDGET_STATE_KEY) or {}
|
||||
self._daily_used_tokens = int(budget.get("daily_used_tokens", 0) or 0)
|
||||
self._weekly_buffer_used_tokens = int(
|
||||
budget.get("weekly_buffer_used_tokens", 0) or 0,
|
||||
)
|
||||
self._last_reset_date = budget.get("last_reset_date")
|
||||
self._host_disabled = bool(budget.get("host_disabled", False))
|
||||
self._disabled_reason = budget.get("host_disabled_reason")
|
||||
|
||||
# --- read helpers --------------------------------------------------------
|
||||
|
||||
def host_disabled_after_billing_event(self) -> bool:
|
||||
"""True if a prior call hit the bug #43333 tripwire and auto-disabled."""
|
||||
return self._host_disabled
|
||||
|
||||
def weekly_buffer_exceeded(self) -> bool:
|
||||
"""D-16 ceiling: 7% weekly buffer fully consumed."""
|
||||
weekly_cap = int(WEEKLY_BUFFER_PCT * ESTIMATED_DAILY_TOKEN_CEILING * 7)
|
||||
return self._weekly_buffer_used_tokens >= weekly_cap
|
||||
|
||||
def can_spend(self, estimated_tokens: int) -> bool:
|
||||
"""Pre-flight check: will this call fit in the daily cap, or (if
|
||||
overflowing) in the remaining weekly buffer? Returns False when
|
||||
Claude is auto-disabled or when neither ledger has room."""
|
||||
if self._host_disabled:
|
||||
return False
|
||||
daily_cap = int(DAILY_QUOTA_BUDGET_PCT * ESTIMATED_DAILY_TOKEN_CEILING)
|
||||
if self._daily_used_tokens + estimated_tokens <= daily_cap:
|
||||
return True
|
||||
weekly_cap = int(WEEKLY_BUFFER_PCT * ESTIMATED_DAILY_TOKEN_CEILING * 7)
|
||||
overflow = (self._daily_used_tokens + estimated_tokens) - daily_cap
|
||||
return self._weekly_buffer_used_tokens + overflow <= weekly_cap
|
||||
|
||||
# --- mutations -----------------------------------------------------------
|
||||
|
||||
def reset_if_new_day(self, now: datetime, tz) -> None:
|
||||
"""zero the daily counter at the user's LOCAL midnight. Any
|
||||
unused daily budget returns to the weekly buffer (capped at the
|
||||
weekly ceiling). Safe to call every tick -- it's a no-op until the
|
||||
local-date actually rolls."""
|
||||
today_local = now.astimezone(tz).date().isoformat()
|
||||
if self._last_reset_date == today_local:
|
||||
return
|
||||
daily_cap = int(DAILY_QUOTA_BUDGET_PCT * ESTIMATED_DAILY_TOKEN_CEILING)
|
||||
weekly_cap = int(WEEKLY_BUFFER_PCT * ESTIMATED_DAILY_TOKEN_CEILING * 7)
|
||||
unused_today = max(0, daily_cap - self._daily_used_tokens)
|
||||
self._weekly_buffer_used_tokens = max(
|
||||
0,
|
||||
min(
|
||||
weekly_cap,
|
||||
self._weekly_buffer_used_tokens - unused_today,
|
||||
),
|
||||
)
|
||||
self._daily_used_tokens = 0
|
||||
self._last_reset_date = today_local
|
||||
self._persist()
|
||||
|
||||
def record(self, tokens_in: int, tokens_out: int, now: datetime) -> None:
|
||||
"""Record the tokens spent on one `claude -p` call. Overflow past the
|
||||
daily cap spills into the weekly buffer; daily counter is then clamped
|
||||
at the cap so `can_spend` sees today as fully exhausted."""
|
||||
total = int(tokens_in) + int(tokens_out)
|
||||
daily_cap = int(DAILY_QUOTA_BUDGET_PCT * ESTIMATED_DAILY_TOKEN_CEILING)
|
||||
if self._daily_used_tokens + total <= daily_cap:
|
||||
self._daily_used_tokens += total
|
||||
else:
|
||||
overflow = (self._daily_used_tokens + total) - daily_cap
|
||||
self._daily_used_tokens = daily_cap
|
||||
self._weekly_buffer_used_tokens += overflow
|
||||
self._persist()
|
||||
|
||||
def disable_host(self, reason: str) -> None:
|
||||
"""Bug #43333 tripwire. Once fired, no further calls are allowed
|
||||
until explicit re-enable (requires user intervention via the morning
|
||||
digest which surfaces the event)."""
|
||||
self._host_disabled = True
|
||||
self._disabled_reason = str(reason)[:500]
|
||||
self._persist()
|
||||
|
||||
# --- persistence ---------------------------------------------------------
|
||||
|
||||
def _persist(self) -> None:
|
||||
self._state[BUDGET_STATE_KEY] = {
|
||||
"daily_used_tokens": self._daily_used_tokens,
|
||||
"weekly_buffer_used_tokens": self._weekly_buffer_used_tokens,
|
||||
"last_reset_date": self._last_reset_date,
|
||||
"host_disabled": self._host_disabled,
|
||||
"host_disabled_reason": self._disabled_reason,
|
||||
}
|
||||
save_state(self._state)
|
||||
|
||||
|
||||
# --------------------------------------------------------- subprocess invocation
|
||||
|
||||
|
||||
def _scrubbed_env() -> dict[str, str]:
|
||||
"""Return a copy of os.environ with the hostile keys removed.
|
||||
|
||||
ENV_DENY_LIST above is the single source of truth for the key names so
|
||||
the constitutional-guard grep test sees them in exactly one place.
|
||||
"""
|
||||
result: dict[str, str] = {}
|
||||
for key, value in os.environ.items():
|
||||
if key in ENV_DENY_LIST:
|
||||
continue
|
||||
result[key] = value
|
||||
for hostile in ENV_DENY_LIST:
|
||||
result.pop(hostile, None)
|
||||
return result
|
||||
|
||||
|
||||
def _build_cmd(prompt: str, model: str) -> list[str]:
|
||||
"""Argv list for `claude -p`. Single list element for prompt -> no shell
|
||||
interpolation path."""
|
||||
return [
|
||||
"claude",
|
||||
"--bare",
|
||||
"-p",
|
||||
prompt,
|
||||
"--output-format",
|
||||
"json",
|
||||
"--max-turns",
|
||||
"1",
|
||||
"--tools",
|
||||
"",
|
||||
"--no-session-persistence",
|
||||
"--model",
|
||||
model,
|
||||
]
|
||||
|
||||
|
||||
async def _terminate_then_kill(proc, grace_sec: float) -> None:
|
||||
"""Cooperative shutdown: terminate(); wait `grace_sec`; kill() if still
|
||||
running. Never raises -- best-effort cleanup only."""
|
||||
try:
|
||||
if proc.returncode is None:
|
||||
proc.terminate()
|
||||
except ProcessLookupError:
|
||||
return
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=grace_sec)
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except ProcessLookupError:
|
||||
return
|
||||
try:
|
||||
# Bound the post-kill wait so the scheduler always yields even
|
||||
# when the OS refuses to reap the child (zombie path).
|
||||
await asyncio.wait_for(proc.wait(), timeout=KILL_WAIT_SEC)
|
||||
except (asyncio.TimeoutError, Exception): # noqa: BLE001 -- best-effort
|
||||
pass
|
||||
|
||||
|
||||
async def invoke_host_once(
|
||||
prompt: str,
|
||||
*,
|
||||
model: str = "haiku",
|
||||
) -> dict:
|
||||
"""Spawn one `claude -p` subprocess, return a structured result dict.
|
||||
|
||||
Shape of the return value always includes ok, cost_usd, tokens_in,
|
||||
tokens_out so callers can sum budgets unconditionally. On ok=False,
|
||||
reason is one of:
|
||||
timeout | nonzero_exit | unparseable_output | api_billing_detected
|
||||
| force_wake_killed
|
||||
|
||||
Constitutional guarantees:
|
||||
- No shell expansion of `prompt` -- argv list only.
|
||||
- Hostile env keys scrubbed via ENV_DENY_LIST before spawn.
|
||||
- bug #43333: cost_usd > 0 triggers BudgetTracker.disable_host plus an
|
||||
error result. A second call then short-circuits at can_spend().
|
||||
"""
|
||||
env = _scrubbed_env()
|
||||
cmd = _build_cmd(prompt, model)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdin=asyncio.subprocess.DEVNULL,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=env,
|
||||
)
|
||||
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(),
|
||||
timeout=HOST_TIMEOUT_SEC,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
await _terminate_then_kill(proc, TERMINATE_WAIT_SEC)
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "timeout",
|
||||
"exit_code": proc.returncode if proc.returncode is not None else -1,
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
except asyncio.CancelledError:
|
||||
# + Warning 8: force-wake arrived mid-call. Clean up subprocess,
|
||||
# return a structured error, do NOT re-raise. Re-raising would unwind
|
||||
# back into the daemon scheduler and potentially crash the event
|
||||
# loop; cooperative yield requires a normal return here.
|
||||
await _terminate_then_kill(proc, FORCE_WAKE_GRACE_SEC)
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "force_wake_killed",
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
|
||||
if proc.returncode != 0:
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "nonzero_exit",
|
||||
"exit_code": proc.returncode,
|
||||
"stderr": stderr.decode("utf-8", errors="replace")[:500],
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "unparseable_output",
|
||||
"cost_usd": 0.0,
|
||||
"tokens_in": 0,
|
||||
"tokens_out": 0,
|
||||
}
|
||||
|
||||
cost_usd = float(data.get("cost_usd", 0.0) or 0.0)
|
||||
usage = data.get("usage") or {}
|
||||
tokens_in = int(usage.get("input_tokens", 0) or 0)
|
||||
tokens_out = int(usage.get("output_tokens", 0) or 0)
|
||||
|
||||
# Bug #43333 post-flight tripwire: a real subscription-mode Claude CLI
|
||||
# call MUST report cost_usd == 0. Anything else means the subscription
|
||||
# path was bypassed (billing would follow). Auto-disable future calls.
|
||||
if cost_usd > 0.0:
|
||||
try:
|
||||
state = load_state()
|
||||
BudgetTracker(state).disable_host(
|
||||
reason=f"api_billing_detected cost_usd={cost_usd}",
|
||||
)
|
||||
except Exception: # noqa: BLE001 -- tripwire must not re-raise
|
||||
pass
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "api_billing_detected",
|
||||
"cost_usd": cost_usd,
|
||||
"data": data,
|
||||
"tokens_in": tokens_in,
|
||||
"tokens_out": tokens_out,
|
||||
}
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"data": data,
|
||||
"cost_usd": cost_usd,
|
||||
"tokens_in": tokens_in,
|
||||
"tokens_out": tokens_out,
|
||||
}
|
||||
197
src/iai_mcp/identity_audit.py
Normal file
197
src/iai_mcp/identity_audit.py
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
"""Continuous S5 identity audit. Runs even when daemon is paused.
|
||||
|
||||
Wraps `s5.detect_drift_anomaly` + `sigma.compute_and_emit` on a 1-hour cadence.
|
||||
Both calls are MVCC reads (LanceDB handles concurrent readers natively), so
|
||||
this loop does NOT acquire the fcntl exclusive lock. That is the C6 invariant:
|
||||
the daemon continues to observe its own identity even when heavy consolidation
|
||||
is paused.
|
||||
|
||||
Phase 7.3 addition (D7.3-11): the same loop iteration also runs Lance
|
||||
storage maintenance (`optimize_lance_storage`) on a configurable cadence
|
||||
(default 1h via `LANCE_OPTIMIZE_INTERVAL_SEC`). The optimize body is gated
|
||||
by a `time.monotonic()` cooldown against the configured interval; the
|
||||
cooldown gate is silent when blocked (no event flooding).
|
||||
|
||||
Phase 10.6 Plan 10.6-01 Task 1.4: REMOVED the `_should_yield_to_mcp(socket)`
|
||||
HUMAN-FIRST gate. The lifecycle state machine + sleep_pipeline supersede
|
||||
this design — periodic optimize runs unconditionally once the cooldown
|
||||
passes; SLEEP-state coexistence is provided by the lifecycle predicate
|
||||
that gates SLEEP entry on `sleep_eligible`. The `socket` kwarg has been
|
||||
removed from `continuous_audit`'s signature.
|
||||
|
||||
Constitutional guard:
|
||||
- C6: S5 invariant audit runs read-only (MVCC) and does NOT acquire the
|
||||
process-wide exclusive lock. Grep-guarded by
|
||||
tests/test_constitutional_guards.py (C6 = no lock module imported here).
|
||||
- C3: ZERO paid-API cost. No reference to paid-API env var.
|
||||
- C5: literal preservation -- no writes to MemoryRecord.literal_surface.
|
||||
- Light daemon ops run concurrent with MCP via LanceDB MVCC; the audit
|
||||
path is exactly one such op.
|
||||
|
||||
Exception handling: each of the underlying calls is wrapped in its own
|
||||
try/except. Failures are emitted as `identity_audit_error` events with a
|
||||
`stage` discriminator ("s5" | "sigma") and the loop continues to the next
|
||||
tick. The Lance optimize step uses a separate try/except path because its
|
||||
helper already swallows per-table failures into the report dict (D7.3-09);
|
||||
the outer guard there only protects against event-write failure. The
|
||||
daemon must never die from an audit OR maintenance failure.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
from iai_mcp import maintenance as _maintenance
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.maintenance import optimize_lance_storage
|
||||
from iai_mcp.s5 import detect_drift_anomaly
|
||||
from iai_mcp.sigma import compute_and_emit
|
||||
|
||||
# 1-hour cadence -- same granularity as sigma snapshot + S5 audit in S4 pass.
|
||||
AUDIT_INTERVAL_SEC: int = 60 * 60
|
||||
|
||||
# R2 / D7.3-14: timestamp of the most recent successful periodic
|
||||
# Lance optimize. Module-level mutable; the loop body declares
|
||||
# `global _last_optimize_completed_at` to write. Ephemeral by design --
|
||||
# daemon restart resets to 0.0 so the first periodic poll runs immediately
|
||||
# (the startup wire-in in daemon.main() already handled the boot-time bloat
|
||||
# collapse, so this just establishes the periodic cadence baseline).
|
||||
#
|
||||
# Mirrors Phase 7.2's _last_cascade_completed_at pattern in daemon.py
|
||||
# exactly (D7.2-03/D7.2-05): time.monotonic() not datetime.now() so the
|
||||
# cooldown is immune to clock skew + system suspend/resume.
|
||||
_last_optimize_completed_at: float = 0.0
|
||||
|
||||
|
||||
async def continuous_audit(
|
||||
store,
|
||||
shutdown: asyncio.Event,
|
||||
*,
|
||||
interval_sec: float | None = None,
|
||||
) -> None:
|
||||
"""Loop until `shutdown` is set.
|
||||
|
||||
On each tick: run S5 drift anomaly detection, then sigma topology
|
||||
snapshot, then gated Lance storage optimize. All three
|
||||
are independent: a failure in any one stage does not abort the others.
|
||||
The interval sleep is implemented via `asyncio.wait_for(shutdown.wait(),
|
||||
timeout=interval_sec)` so shutdown is responsive within a fraction of a
|
||||
second rather than having to wait a full hour.
|
||||
|
||||
When `interval_sec` is None we look up the current module-level
|
||||
`AUDIT_INTERVAL_SEC` at call time. This lets tests monkeypatch the
|
||||
constant before calling the function.
|
||||
|
||||
Plan 10.6-01 Task 1.4: REMOVED the `socket` kwarg + the
|
||||
`_should_yield_to_mcp(socket)` gate inside the periodic Lance
|
||||
optimize branch. SLEEP-state coexistence is now provided by the
|
||||
lifecycle state machine instead of an in-loop yield probe.
|
||||
|
||||
Args:
|
||||
store: MemoryStore instance.
|
||||
shutdown: asyncio.Event that breaks the loop when set.
|
||||
interval_sec: optional override for the per-tick sleep. Tests use
|
||||
small values (e.g. 0.05) to drive the loop quickly.
|
||||
"""
|
||||
# R2: explicit `global` so the assignment in the periodic body
|
||||
# updates module-level state, not a local binding. Mirrors the Pitfall 3
|
||||
# discipline from Phase 7.2's _hippea_cascade_loop.
|
||||
global _last_optimize_completed_at
|
||||
|
||||
while not shutdown.is_set():
|
||||
effective_interval: float = (
|
||||
float(interval_sec) if interval_sec is not None else float(AUDIT_INTERVAL_SEC)
|
||||
)
|
||||
# Stage 1: S5 drift anomaly detection (MVCC read).
|
||||
try:
|
||||
await asyncio.to_thread(detect_drift_anomaly, store, 5)
|
||||
except Exception as exc: # noqa: BLE001 -- daemon must never die
|
||||
try:
|
||||
await asyncio.to_thread(
|
||||
write_event,
|
||||
store,
|
||||
"identity_audit_error",
|
||||
{"stage": "s5", "error": str(exc)[:500]},
|
||||
severity="warning",
|
||||
)
|
||||
except Exception:
|
||||
# Even the event write failed -- swallow silently so the loop
|
||||
# can continue. Next tick gets a fresh chance.
|
||||
pass
|
||||
|
||||
# Stage 2: sigma topology snapshot + emit (MVCC read).
|
||||
try:
|
||||
await asyncio.to_thread(compute_and_emit, store)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
try:
|
||||
await asyncio.to_thread(
|
||||
write_event,
|
||||
store,
|
||||
"identity_audit_error",
|
||||
{"stage": "sigma", "error": str(exc)[:500]},
|
||||
severity="warning",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Stage 3 (Phase 7.3 R2/R3): gated periodic Lance storage optimize.
|
||||
# Plan 10.6-01 Task 1.4 simplified: single gate
|
||||
# (interval cooldown). The D7.3-11 MCP-active yield
|
||||
# gate via `_should_yield_to_mcp(socket)` was removed; the
|
||||
# lifecycle state machine handles SLEEP-state coexistence
|
||||
# outside this loop.
|
||||
try:
|
||||
# Access the module attribute at call time (not at import time)
|
||||
# so test fixtures can monkeypatch
|
||||
# `maintenance.LANCE_OPTIMIZE_INTERVAL_SEC` and observe the new
|
||||
# value without needing `importlib.reload(identity_audit)`.
|
||||
interval_sec_now = _maintenance.LANCE_OPTIMIZE_INTERVAL_SEC
|
||||
retention_sec_now = _maintenance.LANCE_OPTIMIZE_RETENTION_SEC
|
||||
elapsed_since_last = time.monotonic() - _last_optimize_completed_at
|
||||
if elapsed_since_last < interval_sec_now:
|
||||
# D7.3-19: silent skip -- no event. The cooldown gates
|
||||
# work, it does not consume a ledger slot.
|
||||
pass
|
||||
else:
|
||||
periodic_t0 = time.monotonic()
|
||||
try:
|
||||
periodic_report = await asyncio.to_thread(
|
||||
optimize_lance_storage, store,
|
||||
)
|
||||
try:
|
||||
await asyncio.to_thread(
|
||||
write_event,
|
||||
store,
|
||||
"lance_storage_optimized",
|
||||
{
|
||||
"phase": "periodic",
|
||||
"retention_days": (
|
||||
retention_sec_now / 86400.0
|
||||
),
|
||||
"per_table": periodic_report,
|
||||
"total_elapsed_sec": round(
|
||||
time.monotonic() - periodic_t0, 3,
|
||||
),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
# D7.3-14: stamp completion timestamp regardless of
|
||||
# success/exception so a failed optimize still gates
|
||||
# the next run by LANCE_OPTIMIZE_INTERVAL_SEC.
|
||||
_last_optimize_completed_at = time.monotonic()
|
||||
except Exception:
|
||||
# Outer defense-in-depth: a bug in the gate logic itself must
|
||||
# not crash the audit loop (C6 invariant: the daemon must
|
||||
# continue observing its own identity even when maintenance
|
||||
# work fails). Same discipline as the S5/sigma stages above.
|
||||
pass
|
||||
|
||||
# Shutdown-responsive sleep: return early if shutdown fires.
|
||||
try:
|
||||
await asyncio.wait_for(shutdown.wait(), timeout=effective_interval)
|
||||
break # shutdown fired mid-sleep
|
||||
except asyncio.TimeoutError:
|
||||
continue # normal path: time for next audit tick
|
||||
342
src/iai_mcp/idle_detector.py
Normal file
342
src/iai_mcp/idle_detector.py
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
"""Phase 10.4 L6 — hardware-aware idle detector for the wake/sleep cycle.
|
||||
|
||||
Combines three hardware-grounded signals into a single ``sleep_eligible``
|
||||
predicate the daemon's state machine consumes when deciding whether to
|
||||
transition into a sleep cycle:
|
||||
|
||||
1. **Heartbeat-idle (30 min):** no FRESH wrapper heartbeats in the last 30
|
||||
minutes — supplied externally by ``HeartbeatScanner.heartbeat_idle_30min``.
|
||||
2. **HIDIdleTime:** ``ioreg -c IOHIDSystem`` exposes nanoseconds since the
|
||||
last user input event. Convert ns→sec, compare against ``≥ 30 min``.
|
||||
3. **pmset events:** macOS power-manager log entries for ``System Sleep`` or
|
||||
``Display is turned off`` within the last ``window_min`` minutes.
|
||||
|
||||
``sleep_eligible`` is the **disjunction** of the three: any one signal is
|
||||
sufficient. This matches the proposal v2 §2 L6 rule — there is no
|
||||
wall-clock fallback, only hardware-grounded evidence of inactivity.
|
||||
|
||||
Hard constraints (carried from CONTEXT 10.4):
|
||||
- ALL subprocess calls use array form ``[bin, arg, ...]`` with
|
||||
``shell=False`` and a finite ``timeout``. NEVER ``shell=True``. NEVER
|
||||
f-string interpolation into command strings.
|
||||
- Idle CPU near zero — this module is invoked on lifecycle TICK (every 30 s),
|
||||
not faster. ``pmset -g log`` can be slow (≈1 s) so we tail the last 200
|
||||
lines of output rather than re-parsing the entire log.
|
||||
- macOS-only: ``ioreg`` and ``pmset`` are macOS binaries. On non-macOS the
|
||||
detector returns ``None`` / ``False`` gracefully — cross-platform support
|
||||
is deferred per proposal v2 §6.6.
|
||||
- No new third-party dependencies — stdlib only.
|
||||
|
||||
Validates: WAKE-09.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
|
||||
# Module-level constants -------------------------------------------------------
|
||||
|
||||
#: Absolute path to the macOS ``ioreg`` binary. Hard-coded to avoid PATH-based
|
||||
#: hijacks (a planted ``ioreg`` in the user's PATH could feed us spoofed
|
||||
#: HIDIdleTime values that would falsely keep the daemon awake or asleep).
|
||||
_IOREG_BIN = "/usr/sbin/ioreg"
|
||||
|
||||
#: Absolute path to the macOS ``pmset`` binary. Same PATH-hijack rationale.
|
||||
_PMSET_BIN = "/usr/bin/pmset"
|
||||
|
||||
#: Subprocess timeout for ``ioreg`` (seconds). The call is a straight kernel
|
||||
#: registry dump and returns within ~50 ms on a healthy system; a 5 s ceiling
|
||||
#: keeps a hung kernel-extension probe from blocking the lifecycle TICK.
|
||||
_IOREG_TIMEOUT_SEC = 5
|
||||
|
||||
#: Subprocess timeout for ``pmset -g log``. ``pmset`` walks the system power
|
||||
#: log and on a long-uptime machine can take ~1 s; 10 s ceiling.
|
||||
_PMSET_TIMEOUT_SEC = 10
|
||||
|
||||
#: Number of trailing lines to scan from ``pmset -g log``. The log is
|
||||
#: append-only and ordered by time, so the most-recent events are at the end.
|
||||
#: 200 lines covers ~last 24 h on a typical workstation; the window check
|
||||
#: filters by timestamp anyway.
|
||||
_PMSET_TAIL_LINES = 200
|
||||
|
||||
#: Regex for the HIDIdleTime line. Format: ``"HIDIdleTime" = 12345678901``.
|
||||
_HID_IDLE_RE = re.compile(r'"HIDIdleTime"\s*=\s*(\d+)')
|
||||
|
||||
#: Substrings that indicate a sleep / display-off event in pmset log output.
|
||||
_PMSET_SLEEP_MARKERS = ("System Sleep", "Display is turned off")
|
||||
|
||||
#: Default window for ``pmset_recent_sleep`` (minutes). Aligned with the
|
||||
#: proposal v2 §2 L6 wording: "in last 5 min".
|
||||
_PMSET_DEFAULT_WINDOW_MIN = 5
|
||||
|
||||
#: Hardware-idle threshold for the disjunction in ``sleep_eligible`` —
|
||||
#: ``HIDIdleTime ≥ 30 min`` is sufficient evidence of user inactivity.
|
||||
_HID_IDLE_THRESHOLD_SEC = 30 * 60
|
||||
|
||||
#: Regex anchoring a pmset log line's leading timestamp. The format is
|
||||
#: ``YYYY-MM-DD HH:MM:SS ±HHMM`` (e.g. ``2026-05-02 15:00:00 -0400``).
|
||||
_PMSET_TS_RE = re.compile(
|
||||
r"^(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2})\s+([+-]\d{4})"
|
||||
)
|
||||
|
||||
#: Strptime pattern for the timestamp captured by ``_PMSET_TS_RE``.
|
||||
_PMSET_TS_FMT = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
|
||||
# Public dataclass -------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class IdleStatus:
|
||||
"""Snapshot of the L6 detector for the doctor row (n) display.
|
||||
|
||||
Attributes:
|
||||
hid_idle_sec: Seconds since last user input, or ``None`` if ``ioreg``
|
||||
is unavailable or its output cannot be parsed.
|
||||
pmset_recent_sleep: True iff a System / Display Sleep event was seen
|
||||
within the configured window. False on parse failure or missing
|
||||
tool — biased toward "no recent sleep" so the doctor row reports
|
||||
a clean state rather than a false-positive sleep.
|
||||
available_signals: Subset of ``["HIDIdleTime", "pmset"]`` listing
|
||||
which hardware sources actually returned data on this probe.
|
||||
Empty list means we have no hardware grounding right now and
|
||||
the L6 disjunction must rely on the heartbeat-idle signal.
|
||||
"""
|
||||
|
||||
hid_idle_sec: int | None = None
|
||||
pmset_recent_sleep: bool = False
|
||||
available_signals: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# IdleDetector -----------------------------------------------------------------
|
||||
|
||||
|
||||
class IdleDetector:
|
||||
"""Hardware-grounded idle probe for the daemon state machine.
|
||||
|
||||
Standalone module — wires this into the daemon's TICK so
|
||||
``sleep_eligible`` gates the BEDTIME transition. Each public method
|
||||
can be called independently; ``status()`` aggregates them for the
|
||||
doctor row.
|
||||
"""
|
||||
|
||||
# ---- HIDIdleTime via ioreg --------------------------------------
|
||||
|
||||
def hid_idle_time_sec(self) -> int | None:
|
||||
"""Return seconds since last HID input, or ``None`` on any failure.
|
||||
|
||||
Spawns ``/usr/sbin/ioreg -c IOHIDSystem`` (array form, ``shell=False``,
|
||||
5 s timeout, ``check=False``). Parses the first ``"HIDIdleTime" =
|
||||
<ns>`` match and integer-divides by 1e9. Any error path — missing
|
||||
tool, non-zero exit, parse miss, timeout — collapses to ``None`` so
|
||||
the caller treats the signal as absent rather than zero (zero would
|
||||
falsely imply "active right now").
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[_IOREG_BIN, "-c", "IOHIDSystem"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_IOREG_TIMEOUT_SEC,
|
||||
check=False,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
except subprocess.TimeoutExpired:
|
||||
return None
|
||||
except OSError:
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
|
||||
match = _HID_IDLE_RE.search(result.stdout or "")
|
||||
if match is None:
|
||||
return None
|
||||
try:
|
||||
ns = int(match.group(1))
|
||||
except ValueError:
|
||||
return None
|
||||
if ns < 0:
|
||||
return None
|
||||
return ns // 1_000_000_000
|
||||
|
||||
# ---- pmset event detection --------------------------------------
|
||||
|
||||
def pmset_recent_sleep(
|
||||
self, window_min: int = _PMSET_DEFAULT_WINDOW_MIN
|
||||
) -> bool:
|
||||
"""True iff a System/Display Sleep event was recorded in the window.
|
||||
|
||||
Spawns ``/usr/bin/pmset -g log`` (array form, ``shell=False``, 10 s
|
||||
timeout, ``check=False``). Tails the last ``_PMSET_TAIL_LINES``
|
||||
lines of stdout, parses the leading timestamp, and reports True if
|
||||
any line within ``window_min`` minutes of "now" contains one of the
|
||||
``_PMSET_SLEEP_MARKERS`` substrings.
|
||||
|
||||
Failure modes (missing tool, non-zero exit, no parseable lines) all
|
||||
collapse to ``False`` — biased toward "no recent sleep" so an
|
||||
unavailable signal does not trigger the L6 disjunction on its own.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[_PMSET_BIN, "-g", "log"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_PMSET_TIMEOUT_SEC,
|
||||
check=False,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
except subprocess.TimeoutExpired:
|
||||
return False
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
if result.returncode != 0:
|
||||
return False
|
||||
|
||||
return self._scan_pmset_lines(result.stdout or "", window_min)
|
||||
|
||||
@staticmethod
|
||||
def _scan_pmset_lines(stdout: str, window_min: int) -> bool:
|
||||
"""Helper — pure-function scan over pmset log text.
|
||||
|
||||
Split out for unit testing without subprocess mocking. Walks the
|
||||
last ``_PMSET_TAIL_LINES`` lines, returns True at the first match
|
||||
within the window. Parse failures on individual lines are skipped.
|
||||
"""
|
||||
if window_min <= 0:
|
||||
return False
|
||||
# Build a UTC "now" once; pmset timestamps come with explicit ±HHMM
|
||||
# offsets so we convert each parsed timestamp to UTC for comparison.
|
||||
now_utc = datetime.now(timezone.utc)
|
||||
cutoff = now_utc - timedelta(minutes=window_min)
|
||||
|
||||
# Tail the last N lines so we don't re-scan a multi-megabyte log.
|
||||
lines = stdout.splitlines()
|
||||
tail = lines[-_PMSET_TAIL_LINES:] if len(lines) > _PMSET_TAIL_LINES else lines
|
||||
|
||||
for line in tail:
|
||||
if not any(marker in line for marker in _PMSET_SLEEP_MARKERS):
|
||||
continue
|
||||
ts = _parse_pmset_timestamp(line)
|
||||
if ts is None:
|
||||
continue
|
||||
if ts >= cutoff:
|
||||
return True
|
||||
return False
|
||||
|
||||
# ---- Disjunction predicate consumed by the state machine --------
|
||||
|
||||
def sleep_eligible(self, heartbeat_idle_30min: bool) -> bool:
|
||||
"""L6 disjunction: any of three hardware-grounded signals is sufficient.
|
||||
|
||||
Args:
|
||||
heartbeat_idle_30min: True iff no FRESH wrapper heartbeat in the
|
||||
last 30 minutes (supplied by
|
||||
``HeartbeatScanner.heartbeat_idle_30min``).
|
||||
|
||||
Returns:
|
||||
``heartbeat_idle_30min OR (hid_idle_time_sec ≥ 30 min) OR
|
||||
pmset_recent_sleep()``. Short-circuits on the first True so a
|
||||
heartbeat-idle session does not pay for ``ioreg`` + ``pmset``
|
||||
spawns it does not need.
|
||||
"""
|
||||
if heartbeat_idle_30min:
|
||||
return True
|
||||
|
||||
hid_idle = self.hid_idle_time_sec()
|
||||
if hid_idle is not None and hid_idle >= _HID_IDLE_THRESHOLD_SEC:
|
||||
return True
|
||||
|
||||
return self.pmset_recent_sleep()
|
||||
|
||||
# ---- Aggregated snapshot for doctor row (n) ---------------------
|
||||
|
||||
def status(self) -> IdleStatus:
|
||||
"""Return an ``IdleStatus`` snapshot for the doctor checklist.
|
||||
|
||||
Calls both probes regardless of disjunction short-circuit so the
|
||||
doctor surface always reflects the *actual* per-signal availability
|
||||
(a doctor that hides ``pmset`` whenever ``HIDIdleTime`` already
|
||||
triggers would not help the user diagnose a missing pmset log).
|
||||
"""
|
||||
hid_idle = self.hid_idle_time_sec()
|
||||
pmset_seen = self.pmset_recent_sleep()
|
||||
|
||||
signals: list[str] = []
|
||||
if hid_idle is not None:
|
||||
signals.append("HIDIdleTime")
|
||||
# pmset_recent_sleep returning False does not imply pmset is missing
|
||||
# — it only means no event in the window. We can't reliably tell
|
||||
# "tool present but quiet" from "tool absent" without re-spawning,
|
||||
# so we bias the doctor display toward listing pmset as available
|
||||
# whenever the call succeeded (i.e. did not raise / non-zero-exit).
|
||||
if _pmset_responsive():
|
||||
signals.append("pmset")
|
||||
|
||||
return IdleStatus(
|
||||
hid_idle_sec=hid_idle,
|
||||
pmset_recent_sleep=pmset_seen,
|
||||
available_signals=signals,
|
||||
)
|
||||
|
||||
|
||||
# Module-private helpers -------------------------------------------------------
|
||||
|
||||
|
||||
def _parse_pmset_timestamp(line: str) -> datetime | None:
|
||||
"""Return the leading timestamp of a pmset log line as UTC, or None.
|
||||
|
||||
Matches ``YYYY-MM-DD HH:MM:SS ±HHMM`` at the start of the line. The
|
||||
``±HHMM`` offset is parsed manually because ``%z`` on older Python
|
||||
builds is finicky with shorthand offsets — we apply the offset to a
|
||||
naive datetime and tag it as UTC.
|
||||
"""
|
||||
m = _PMSET_TS_RE.match(line)
|
||||
if m is None:
|
||||
return None
|
||||
ts_str, offset_str = m.group(1), m.group(2)
|
||||
try:
|
||||
naive = datetime.strptime(ts_str, _PMSET_TS_FMT)
|
||||
except ValueError:
|
||||
return None
|
||||
sign = 1 if offset_str[0] == "+" else -1
|
||||
try:
|
||||
hours = int(offset_str[1:3])
|
||||
minutes = int(offset_str[3:5])
|
||||
except ValueError:
|
||||
return None
|
||||
offset = timedelta(hours=hours, minutes=minutes) * sign
|
||||
# Treat naive timestamp as in the offset's local zone, then convert to
|
||||
# UTC by subtracting the offset.
|
||||
return (naive - offset).replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def _pmset_responsive() -> bool:
|
||||
"""Probe whether ``/usr/bin/pmset`` exists and exits 0 for a trivial call.
|
||||
|
||||
Used by ``IdleDetector.status`` to populate ``available_signals``
|
||||
without inferring availability from the (legitimate) "no recent sleep"
|
||||
output. ``pmset -g`` (no subcommand) prints the current power state
|
||||
and exits 0 quickly; missing-binary or non-zero-exit ⇒ unavailable.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[_PMSET_BIN, "-g"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=_PMSET_TIMEOUT_SEC,
|
||||
check=False,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
except subprocess.TimeoutExpired:
|
||||
return False
|
||||
except OSError:
|
||||
return False
|
||||
return result.returncode == 0
|
||||
267
src/iai_mcp/insight.py
Normal file
267
src/iai_mcp/insight.py
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
"""Lucid moment orchestration -- (D-13 Option A).
|
||||
|
||||
The "main insight of the day": exactly ONE `claude -p` subprocess call per
|
||||
night, at the end of the last REM cycle. The prompt is built from 3 locally-
|
||||
extracted schema patterns + 1 surprising episode; Claude distils them into a
|
||||
single unifying insight of 1-2 sentences which we store as a semantic-tier
|
||||
record tagged `overnight_insight`.
|
||||
|
||||
Constitutional guards:
|
||||
- LOCAL is the primary worker. This module owns the single surgical
|
||||
Claude call; all other consolidation work is pure-numpy/NetworkX/TF-IDF.
|
||||
- the call goes through host_cli.invoke_host_once which scrubs
|
||||
the paid-API env var and validates the credentials.json subscription mode
|
||||
before spawning the subprocess. This module NEVER references the paid-API
|
||||
env var by name.
|
||||
- pre-flight budget gate via BudgetTracker.can_spend. A call that
|
||||
would exceed the daily cap (overflow into weekly buffer) is silently
|
||||
skipped, queued implicitly for the next night.
|
||||
- Bug #43333: cost_usd > 0 from invoke_host_once is recorded by the wrapper
|
||||
(BudgetTracker.disable_host). This module short-circuits on host_disabled
|
||||
so the bad call never repeats.
|
||||
- / C5: the inserted MemoryRecord is assembled once from Claude's
|
||||
text response; we do NOT rewrite literal_surface after insert.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from iai_mcp.host_cli import (
|
||||
BudgetTracker,
|
||||
invoke_host_once,
|
||||
verify_credentials_subscription,
|
||||
)
|
||||
from iai_mcp.daemon_state import load_state
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.schema import induce_schemas_tier0
|
||||
from iai_mcp.tz import load_user_tz
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
# Option A prompt template. The fragments "3 locally-found patterns",
|
||||
# "1 surprising episode", "unifying insight", and "1-2 sentences" are verbatim
|
||||
# per the locked decision; grep tests assert they appear unmodified.
|
||||
INSIGHT_PROMPT_TEMPLATE: str = (
|
||||
"Here are 3 locally-found patterns from today + 1 surprising episode. "
|
||||
"What is the unifying insight? Reply in 1-2 sentences.\n\n"
|
||||
"Patterns:\n{patterns}\n\n"
|
||||
"Surprise:\n{surprise}"
|
||||
)
|
||||
|
||||
# Conservative pre-flight token estimate for the one nightly call -- covers
|
||||
# the prompt frame + patterns + surprise payload. Actual spend is recorded
|
||||
# post-call via BudgetTracker.record(tokens_in, tokens_out).
|
||||
PROMPT_ESTIMATE_TOKENS: int = 500
|
||||
|
||||
# Kinds of events considered "surprising" for the prompt.
|
||||
_SURPRISE_KINDS: frozenset[str] = frozenset({
|
||||
"art_gate_high_novelty",
|
||||
"contradiction_detected",
|
||||
"s4_contradiction",
|
||||
"s5_drift",
|
||||
})
|
||||
|
||||
|
||||
def _gather_patterns(store) -> list[str]:
|
||||
"""Top-3 recent schema candidates by confidence. Graceful on empty."""
|
||||
try:
|
||||
schemas = induce_schemas_tier0(store) or []
|
||||
except Exception: # noqa: BLE001 -- pattern extraction must never crash insight
|
||||
schemas = []
|
||||
|
||||
def _conf(s: Any) -> float:
|
||||
# SchemaCandidate has .confidence; dicts may use the same key.
|
||||
val = getattr(s, "confidence", None)
|
||||
if val is None and isinstance(s, dict):
|
||||
val = s.get("confidence")
|
||||
try:
|
||||
return float(val or 0.0)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
|
||||
def _text(s: Any) -> str:
|
||||
# SchemaCandidate exposes .pattern; dicts use "pattern" / "description".
|
||||
for attr in ("pattern", "description", "summary"):
|
||||
val = getattr(s, attr, None)
|
||||
if val:
|
||||
return str(val)
|
||||
if isinstance(s, dict) and s.get(attr):
|
||||
return str(s[attr])
|
||||
return str(s)
|
||||
|
||||
schemas_sorted = sorted(schemas, key=_conf, reverse=True)
|
||||
top3 = schemas_sorted[:3]
|
||||
if not top3:
|
||||
return ["[no patterns yet]"]
|
||||
return [_text(s) for s in top3]
|
||||
|
||||
|
||||
def _gather_surprise(store) -> str:
|
||||
"""Most recent surprising event over the last 24h. Graceful on empty."""
|
||||
try:
|
||||
since = datetime.now(timezone.utc).replace(
|
||||
hour=0, minute=0, second=0, microsecond=0,
|
||||
)
|
||||
candidates = query_events(store, since=since, limit=1000) or []
|
||||
except Exception: # noqa: BLE001 -- event query must never crash insight
|
||||
candidates = []
|
||||
|
||||
for event in candidates:
|
||||
if event.get("kind") in _SURPRISE_KINDS:
|
||||
data = event.get("data") or event
|
||||
return str(data)[:500]
|
||||
return "[no surprise yet]"
|
||||
|
||||
|
||||
async def generate_overnight_insight(store, session_id: str) -> dict:
|
||||
"""Orchestrate the Option A Claude call.
|
||||
|
||||
Returns a structured dict. Shape (always present): ok (bool), reason
|
||||
(str | None), text (str | None). Success result also carries
|
||||
tokens_in / tokens_out for the caller's bookkeeping.
|
||||
|
||||
Pre-flight gate sequence (every one MUST pass before spawning subprocess):
|
||||
1. verify_credentials_subscription (bug #43333 layer 2)
|
||||
2. BudgetTracker.host_disabled_after_billing_event (bug #43333 layer 3)
|
||||
3. BudgetTracker.can_spend(PROMPT_ESTIMATE_TOKENS) (D-15 budget)
|
||||
"""
|
||||
creds = verify_credentials_subscription()
|
||||
if not creds.get("ok"):
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "credentials_check_failed",
|
||||
"text": None,
|
||||
"details": creds,
|
||||
}
|
||||
|
||||
state = load_state()
|
||||
tracker = BudgetTracker(state)
|
||||
|
||||
try:
|
||||
tz = load_user_tz()
|
||||
except Exception: # noqa: BLE001 -- tz lookup never crashes the call path
|
||||
tz = timezone.utc # naive fallback; reset_if_new_day handles both
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
tracker.reset_if_new_day(now, tz)
|
||||
|
||||
if tracker.host_disabled_after_billing_event():
|
||||
return {"ok": False, "reason": "host_disabled_c3", "text": None}
|
||||
|
||||
if not tracker.can_spend(PROMPT_ESTIMATE_TOKENS):
|
||||
return {"ok": False, "reason": "budget_exceeded", "text": None}
|
||||
|
||||
patterns = _gather_patterns(store)
|
||||
surprise = _gather_surprise(store)
|
||||
prompt = INSIGHT_PROMPT_TEMPLATE.format(
|
||||
patterns="\n".join(f"- {p}" for p in patterns),
|
||||
surprise=surprise,
|
||||
)
|
||||
|
||||
result = await invoke_host_once(prompt, model="haiku")
|
||||
|
||||
# Record any tokens the call actually spent (host_cli returns tokens
|
||||
# even on non-ok paths when the subprocess completed).
|
||||
tokens_in = int(result.get("tokens_in", 0) or 0)
|
||||
tokens_out = int(result.get("tokens_out", 0) or 0)
|
||||
if tokens_in + tokens_out > 0:
|
||||
tracker.record(tokens_in, tokens_out, now)
|
||||
|
||||
if not result.get("ok"):
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": result.get("reason", "claude_call_failed"),
|
||||
"text": None,
|
||||
"details": {k: v for k, v in result.items() if k != "data"},
|
||||
}
|
||||
|
||||
data = result.get("data") or {}
|
||||
insight_text = str(data.get("result", "")).strip()
|
||||
if not insight_text:
|
||||
return {"ok": False, "reason": "empty_insight", "text": None}
|
||||
|
||||
# Build the L1-tier record. MemoryRecord requires a large
|
||||
# set of fields per schema; we default every non-essential field
|
||||
# to a neutral value so the shield/crypto pipeline treats the insight as
|
||||
# a plain semantic record subject to S4/S5 on-read contradiction.
|
||||
embed_dim = getattr(store, "embed_dim", None) or 384
|
||||
record = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="semantic",
|
||||
literal_surface=insight_text,
|
||||
aaak_index="",
|
||||
embedding=[0.0] * int(embed_dim),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[{
|
||||
"ts": now.isoformat(),
|
||||
"cue": "overnight_insight",
|
||||
"session_id": session_id,
|
||||
}],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["overnight_insight"],
|
||||
language="en", # the prompt is English-framed; insight is English.
|
||||
)
|
||||
# Dataclass has `tags` (list) not `tag` (scalar); we also expose `tag`
|
||||
# via attribute assignment for callers that prefer the scalar form. This
|
||||
# is NOT a literal_surface mutation so it does not violate C5 MEM-01.
|
||||
try:
|
||||
object.__setattr__(record, "tag", "overnight_insight")
|
||||
except Exception: # noqa: BLE001 -- attribute attach is best-effort
|
||||
pass
|
||||
|
||||
try:
|
||||
# R4 (researcher finding #3): wrap bare-sync store.insert
|
||||
# to avoid blocking the asyncio event loop. Reached from
|
||||
# dream.run_rem_cycle when claude_enabled=True (last cycle of REM).
|
||||
# store.insert touches LanceDB write + encryption — not safe-fast.
|
||||
await asyncio.to_thread(store.insert, record)
|
||||
except Exception as exc: # noqa: BLE001 -- store errors must not crash daemon
|
||||
try:
|
||||
write_event(
|
||||
store,
|
||||
"overnight_insight_store_error",
|
||||
{"error": str(exc)[:500]},
|
||||
severity="warning",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"ok": False,
|
||||
"reason": "store_insert_failed",
|
||||
"text": insight_text,
|
||||
"error": str(exc)[:500],
|
||||
}
|
||||
|
||||
try:
|
||||
write_event(
|
||||
store,
|
||||
"overnight_insight_generated",
|
||||
{
|
||||
"session_id": session_id,
|
||||
"text_len": len(insight_text),
|
||||
"tokens_in": tokens_in,
|
||||
"tokens_out": tokens_out,
|
||||
},
|
||||
)
|
||||
except Exception: # noqa: BLE001 -- event emission failure is non-fatal
|
||||
pass
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"text": insight_text,
|
||||
"reason": None,
|
||||
"tokens_in": tokens_in,
|
||||
"tokens_out": tokens_out,
|
||||
}
|
||||
166
src/iai_mcp/learn.py
Normal file
166
src/iai_mcp/learn.py
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
"""Learning layer (LEARN-01/02/05/06, Task 2).
|
||||
|
||||
Four mechanisms live here:
|
||||
|
||||
1. LEARN-01 (Bayesian profile update) is implemented in `iai_mcp.profile`
|
||||
as `bayesian_update`; this module re-exports the RetrievalFeedback and
|
||||
policy utilities used by the pipeline + core dispatch.
|
||||
|
||||
2. LEARN-02 retrieval-policy RL -- simple tabular gradient on score
|
||||
weights. Feedback sources:
|
||||
- user acted on hit (used) -> boost W_COSINE
|
||||
- user issued contradict (corrected) -> reduce W_COSINE
|
||||
- user re-asked same cue (re_asked) -> reduce W_COSINE
|
||||
|
||||
3. LEARN-05 meta-learning -- ε-greedy bandit over retrieval strategies
|
||||
keyed by query type.
|
||||
|
||||
4. LEARN-06 identity refinement -- reads s5_invariant_update /
|
||||
s5_invariant_proposal events and drifts s5_trust_score up for
|
||||
consistently-agreeing anchors, down for frequently-rejected ones.
|
||||
|
||||
All writes go through the D-STORAGE events table; no .jsonl files.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- constants
|
||||
|
||||
LEARN_RATE: float = 0.05
|
||||
MAX_WEIGHT: float = 5.0
|
||||
MIN_WEIGHT: float = 0.0
|
||||
EPSILON_EXPLORE: float = 0.1 # LEARN-05 bandit exploration probability
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- feedback
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetrievalFeedback:
|
||||
"""Implicit feedback signal on a memory_recall response."""
|
||||
|
||||
query_type: str # e.g. "fact_lookup" | "open_ended" | "contradiction_check"
|
||||
hit_ids: list[UUID]
|
||||
used_ids: list[UUID] = field(default_factory=list)
|
||||
corrected: bool = False # user issued memory_contradict on a hit
|
||||
re_asked: bool = False # user re-issued the same cue within 5 turns
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- LEARN-02
|
||||
|
||||
|
||||
def update_retrieval_weights(
|
||||
feedback: RetrievalFeedback,
|
||||
current_weights: dict[str, float],
|
||||
) -> dict[str, float]:
|
||||
"""LEARN-02 tabular gradient on score weights.
|
||||
|
||||
Primary signal: use-rate = |used_ids ∩ hit_ids| / |hit_ids|.
|
||||
delta = (use_rate - 0.5) * LEARN_RATE
|
||||
Correction penalty: -LEARN_RATE
|
||||
Re-ask penalty: -LEARN_RATE * 0.5
|
||||
|
||||
All weights clamped to [MIN_WEIGHT, MAX_WEIGHT].
|
||||
Returns a new dict (does not mutate the input).
|
||||
"""
|
||||
w = dict(current_weights)
|
||||
delta = 0.0
|
||||
if feedback.hit_ids:
|
||||
hits_set = set(feedback.hit_ids)
|
||||
used_set = set(feedback.used_ids)
|
||||
use_rate = len(hits_set & used_set) / len(feedback.hit_ids)
|
||||
delta = (use_rate - 0.5) * LEARN_RATE
|
||||
if feedback.corrected:
|
||||
delta -= LEARN_RATE
|
||||
if feedback.re_asked:
|
||||
delta -= LEARN_RATE * 0.5
|
||||
|
||||
w_cos = w.get("W_COSINE", 1.0)
|
||||
w["W_COSINE"] = max(MIN_WEIGHT, min(MAX_WEIGHT, w_cos + delta))
|
||||
|
||||
# Clamp other weights in case of external mutation.
|
||||
for k in ("W_AAAK", "W_DEGREE", "W_AGE"):
|
||||
if k in w:
|
||||
w[k] = max(MIN_WEIGHT, min(MAX_WEIGHT, w[k]))
|
||||
return w
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- LEARN-05
|
||||
|
||||
|
||||
def pick_retrieval_strategy(
|
||||
query_type: str,
|
||||
history: dict,
|
||||
strategies: list[str] | None = None,
|
||||
) -> str:
|
||||
"""ε-greedy bandit over retrieval strategies per query type.
|
||||
|
||||
`history` shape:
|
||||
{
|
||||
"<query_type>": {
|
||||
"<strategy>": {"mean": float, "n": int},
|
||||
...
|
||||
},
|
||||
...
|
||||
}
|
||||
|
||||
Returns the strategy with the highest mean for this query_type except on
|
||||
the ε fraction of calls where a random strategy is explored.
|
||||
"""
|
||||
strategies = strategies or ["pipeline_default", "greedy_2hop", "rich_club_first"]
|
||||
if random.random() < EPSILON_EXPLORE:
|
||||
return random.choice(strategies)
|
||||
rewards = history.get(query_type, {})
|
||||
if not rewards:
|
||||
return strategies[0]
|
||||
return max(
|
||||
strategies,
|
||||
key=lambda s: rewards.get(s, {}).get("mean", 0.0),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- LEARN-06
|
||||
|
||||
|
||||
TRUST_INCREMENT_PER_COMMIT: float = 0.02
|
||||
TRUST_DECREMENT_PER_REJECT: float = 0.01
|
||||
|
||||
|
||||
def refine_s5_trust_score(
|
||||
store: MemoryStore,
|
||||
record_id: UUID,
|
||||
current: float,
|
||||
) -> float:
|
||||
"""LEARN-06: trust score drifts based on consensus history.
|
||||
|
||||
+TRUST_INCREMENT per s5_invariant_update event with agree_count >= 3
|
||||
-TRUST_DECREMENT per s5_invariant_proposal with passes_vigilance == False
|
||||
|
||||
Clamped to [0, 1].
|
||||
"""
|
||||
updates = query_events(store, kind="s5_invariant_update", limit=200)
|
||||
commits = sum(
|
||||
1 for e in updates
|
||||
if e["data"].get("anchor_id") == str(record_id)
|
||||
and int(e["data"].get("agree_count", 0)) >= 3
|
||||
)
|
||||
rejects_events = query_events(store, kind="s5_invariant_proposal", limit=500)
|
||||
rejects = sum(
|
||||
1 for e in rejects_events
|
||||
if e["data"].get("anchor_id") == str(record_id)
|
||||
and not e["data"].get("passes_vigilance", True)
|
||||
)
|
||||
new_score = (
|
||||
current
|
||||
+ TRUST_INCREMENT_PER_COMMIT * commits
|
||||
- TRUST_DECREMENT_PER_REJECT * rejects
|
||||
)
|
||||
return max(0.0, min(1.0, new_score))
|
||||
336
src/iai_mcp/lifecycle.py
Normal file
336
src/iai_mcp/lifecycle.py
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
"""Phase 10.1 -- Lifecycle State Machine + Shadow-Run Mode.
|
||||
|
||||
Realises LOCKED contracts L1 (hibernation depth: kill process) and
|
||||
L2 (state authority: daemon-only writer for `lifecycle_state.json`).
|
||||
|
||||
The four lifecycle states (WAKE, DROWSY, SLEEP, HIBERNATION) form a
|
||||
deterministic FSM. Transitions are pure functions of the current state
|
||||
and the dispatched event (with optional payload guards); side effects
|
||||
(persistence + event-log append + shadow-run warning) happen ONLY in
|
||||
`dispatch`.
|
||||
|
||||
Phase 10.6 Plan 10.6-01 Task 1.6: flipped `shadow_run` default from
|
||||
True to False. HIBERNATION transitions now actually exit the daemon
|
||||
process via the global shutdown event in `daemon.main()`'s lifecycle
|
||||
tick. The legacy `_rss_watchdog_loop` was removed in Task 1.4; this
|
||||
state machine is the sole owner of shutdown authority.
|
||||
|
||||
Shadow-run mode is preserved as an opt-in for testing: passing
|
||||
`shadow_run=True` to `LifecycleStateMachine.__init__` keeps the old
|
||||
"persist + log + emit shadow_run_warning, do NOT exit" behaviour so
|
||||
the panel R7 validation script can drive transitions without
|
||||
terminating the daemon process.
|
||||
|
||||
Single-writer enforcement (L2): a separate lock file
|
||||
`~/.iai-mcp/.lifecycle.lock` carries the `fcntl.flock(LOCK_EX|LOCK_NB)`.
|
||||
The data file `lifecycle_state.json` is atomically replaced via
|
||||
`os.replace` (Phase 04-01 pattern), which swaps the inode — any lock
|
||||
held on the data file's fd would not protect the new file. The lock
|
||||
file is never renamed, so the lock survives `save_state` cycles.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import os
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator
|
||||
|
||||
from iai_mcp.lifecycle_event_log import LifecycleEventLog
|
||||
from iai_mcp.lifecycle_state import (
|
||||
LIFECYCLE_STATE_PATH,
|
||||
LifecycleState,
|
||||
LifecycleStateRecord,
|
||||
default_state,
|
||||
load_state,
|
||||
save_state,
|
||||
)
|
||||
|
||||
# Default lock path lives next to lifecycle_state.json. Hidden so it
|
||||
# does not show up in `ls`. Pattern matches `daemon-state.json` /
|
||||
# `.daemon-state.json` precedent.
|
||||
DEFAULT_LOCK_PATH: Path = Path.home() / ".iai-mcp" / ".lifecycle.lock"
|
||||
|
||||
|
||||
class LifecycleStateLocked(RuntimeError):
|
||||
"""Raised when another process holds the lifecycle_state.json lock.
|
||||
|
||||
Per L2 the daemon is the sole authority. A wrapper that finds the
|
||||
lock held by the daemon should signal events via Unix socket
|
||||
(when daemon alive) or write `~/.iai-mcp/wake.signal` (when
|
||||
daemon hibernated) — never bypass the lock with a direct write.
|
||||
"""
|
||||
|
||||
|
||||
class LifecycleEvent(str, Enum):
|
||||
"""Events that drive transitions."""
|
||||
|
||||
HEARTBEAT_REFRESH = "heartbeat_refresh"
|
||||
IDLE_5MIN = "idle_5min"
|
||||
IDLE_30MIN = "idle_30min"
|
||||
SLEEP_ELIGIBLE = "sleep_eligible"
|
||||
REQUEST_ARRIVED = "request_arrived"
|
||||
SLEEP_CYCLE_DONE = "sleep_cycle_done"
|
||||
HIBERNATION_GRACE_EXPIRED = "hibernation_grace_expired"
|
||||
WAKE_SIGNAL = "wake_signal"
|
||||
TICK = "tick"
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
"""ISO-8601 UTC timestamp; central so tests can monkey-patch."""
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure transition function — exposed at module scope for property tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def compute_transition(
|
||||
state: LifecycleState,
|
||||
event: LifecycleEvent,
|
||||
payload: dict[str, Any] | None = None,
|
||||
) -> LifecycleState | None:
|
||||
"""Return the target state, or None if `event` is a no-op for `state`.
|
||||
|
||||
Pure function — no I/O, no side effects, deterministic. The
|
||||
transition table is encoded inline here rather than a dict because
|
||||
the guard-bearing rows (`(DROWSY, IDLE_30MIN)` AND `sleep_eligible`)
|
||||
are easier to read as straight-line code than a `(state, event,
|
||||
guard) -> state` lookup with conditional fallback.
|
||||
|
||||
Transition table:
|
||||
|
||||
| From | Event | To |
|
||||
| WAKE | IDLE_5MIN | DROWSY |
|
||||
| DROWSY | HEARTBEAT_REFRESH | WAKE |
|
||||
| DROWSY | IDLE_30MIN AND sleep_eligible | SLEEP |
|
||||
| SLEEP | REQUEST_ARRIVED | WAKE |
|
||||
| SLEEP | SLEEP_CYCLE_DONE AND still_idle | HIBERNATION |
|
||||
| HIBERNATION | WAKE_SIGNAL | WAKE |
|
||||
| * | REQUEST_ARRIVED | WAKE (catch-all)
|
||||
|
||||
Catch-all: REQUEST_ARRIVED from any state goes to WAKE; that
|
||||
matches the SLEEP-specific rule above and adds DROWSY/HIBERNATION
|
||||
coverage. (HIBERNATION → WAKE on REQUEST_ARRIVED is a future-phase
|
||||
cold-start path — a wrapper that has REQUEST_ARRIVED to dispatch
|
||||
has already woken the daemon via wake.signal first; this branch
|
||||
exists for in-process test scaffolding and defence-in-depth.)
|
||||
"""
|
||||
payload = payload if payload is not None else {}
|
||||
|
||||
# Catch-all REQUEST_ARRIVED → WAKE; check first so subsequent
|
||||
# branches do not need to repeat the rule per source state.
|
||||
if event is LifecycleEvent.REQUEST_ARRIVED:
|
||||
return LifecycleState.WAKE
|
||||
|
||||
if state is LifecycleState.WAKE:
|
||||
if event is LifecycleEvent.IDLE_5MIN:
|
||||
return LifecycleState.DROWSY
|
||||
return None
|
||||
|
||||
if state is LifecycleState.DROWSY:
|
||||
if event is LifecycleEvent.HEARTBEAT_REFRESH:
|
||||
return LifecycleState.WAKE
|
||||
if event is LifecycleEvent.IDLE_30MIN and payload.get("sleep_eligible"):
|
||||
return LifecycleState.SLEEP
|
||||
return None
|
||||
|
||||
if state is LifecycleState.SLEEP:
|
||||
if event is LifecycleEvent.SLEEP_CYCLE_DONE and payload.get("still_idle"):
|
||||
return LifecycleState.HIBERNATION
|
||||
return None
|
||||
|
||||
if state is LifecycleState.HIBERNATION:
|
||||
if event is LifecycleEvent.WAKE_SIGNAL:
|
||||
return LifecycleState.WAKE
|
||||
# HIBERNATION_GRACE_EXPIRED is a future-phase trigger that
|
||||
# currently has no destination — kept as a known no-op so
|
||||
# the dispatcher does not raise on it.
|
||||
return None
|
||||
|
||||
return None # unreachable; defensive against future state additions
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File-lock context manager — separate file per advisor recommendation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@contextmanager
|
||||
def _lifecycle_lock(lock_path: Path) -> Iterator[int]:
|
||||
"""Acquire `fcntl.flock(LOCK_EX | LOCK_NB)` on a sibling lock file.
|
||||
|
||||
Raises `LifecycleStateLocked` if the lock is held by another
|
||||
process. The lock file persists across releases — it is the
|
||||
"named-mutex" handle, not the data. The data file
|
||||
`lifecycle_state.json` is atomically replaced separately and
|
||||
therefore must NOT carry the lock (os.replace swaps the inode).
|
||||
"""
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd = os.open(str(lock_path), os.O_RDWR | os.O_CREAT, 0o600)
|
||||
try:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except OSError as exc:
|
||||
if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK):
|
||||
raise LifecycleStateLocked(
|
||||
f"another process holds {lock_path}"
|
||||
) from exc
|
||||
raise
|
||||
try:
|
||||
yield fd
|
||||
finally:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
# Best effort — the close below releases the lock
|
||||
# whether or not the explicit unlock succeeded.
|
||||
pass
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State machine class
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class LifecycleStateMachine:
|
||||
"""Side-effecting wrapper around `compute_transition`.
|
||||
|
||||
Owns:
|
||||
- `lifecycle_state.json` reads + writes (single-writer enforced).
|
||||
- Event log emission (`state_transition`, `shadow_run_warning`).
|
||||
- `shadow_run` flag (default False since Phase 10.6; True is a transition-test escape hatch).
|
||||
|
||||
Construction is cheap; the lock is acquired only inside
|
||||
`dispatch`. Tests can drive transitions either via `dispatch`
|
||||
(full pipeline) or via `compute_transition` (pure-function
|
||||
coverage).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
state_path: Path | None = None,
|
||||
event_log: LifecycleEventLog | None = None,
|
||||
lock_path: Path | None = None,
|
||||
shadow_run: bool = False,
|
||||
) -> None:
|
||||
self._state_path = state_path if state_path is not None else LIFECYCLE_STATE_PATH
|
||||
self._event_log = event_log if event_log is not None else LifecycleEventLog()
|
||||
self._lock_path = lock_path if lock_path is not None else DEFAULT_LOCK_PATH
|
||||
self._shadow_run = shadow_run
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Read-only helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def shadow_run(self) -> bool:
|
||||
return self._shadow_run
|
||||
|
||||
@property
|
||||
def current_state(self) -> LifecycleState:
|
||||
record = load_state(self._state_path)
|
||||
return LifecycleState(record["current_state"])
|
||||
|
||||
def snapshot(self) -> LifecycleStateRecord:
|
||||
"""Return the on-disk record (or default if absent)."""
|
||||
return load_state(self._state_path)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Pure transition (no I/O) — re-exposed for callers using an instance
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def compute_transition(
|
||||
self,
|
||||
state: LifecycleState,
|
||||
event: LifecycleEvent,
|
||||
payload: dict[str, Any] | None = None,
|
||||
) -> LifecycleState | None:
|
||||
return compute_transition(state, event, payload)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Dispatcher — single-writer, persists + logs
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def dispatch(
|
||||
self,
|
||||
event: LifecycleEvent,
|
||||
**payload: Any,
|
||||
) -> LifecycleState:
|
||||
"""Apply `event` to the current state, persist, log; return new state.
|
||||
|
||||
Acquires the lock for the duration of the read-compute-write
|
||||
cycle so the disk record cannot be raced by a second writer.
|
||||
Always returns the post-dispatch state — even when the event
|
||||
was a no-op (transition target was None), the caller gets the
|
||||
unchanged current state back. That keeps callers from having
|
||||
to special-case None.
|
||||
"""
|
||||
with _lifecycle_lock(self._lock_path):
|
||||
current_record = load_state(self._state_path)
|
||||
current_state = LifecycleState(current_record["current_state"])
|
||||
|
||||
target = compute_transition(current_state, event, payload)
|
||||
|
||||
now_iso = _utc_now_iso()
|
||||
# last_activity advances on any user-attributable event so
|
||||
# idle timers reset correctly.
|
||||
updated_record: LifecycleStateRecord = dict(current_record) # type: ignore[assignment]
|
||||
if event in {
|
||||
LifecycleEvent.HEARTBEAT_REFRESH,
|
||||
LifecycleEvent.REQUEST_ARRIVED,
|
||||
LifecycleEvent.WAKE_SIGNAL,
|
||||
}:
|
||||
updated_record["last_activity_ts"] = now_iso
|
||||
updated_record["wrapper_event_seq"] = (
|
||||
current_record.get("wrapper_event_seq", 0) + 1
|
||||
)
|
||||
|
||||
updated_record["shadow_run"] = self._shadow_run
|
||||
|
||||
if target is None:
|
||||
# No state change — persist any incremental wrapper-event
|
||||
# bookkeeping (last_activity_ts, seq) but skip the
|
||||
# transition log line.
|
||||
if updated_record != current_record:
|
||||
save_state(updated_record, self._state_path)
|
||||
return current_state
|
||||
|
||||
# State change. Update record and persist atomically.
|
||||
updated_record["current_state"] = target.value
|
||||
updated_record["since_ts"] = now_iso
|
||||
save_state(updated_record, self._state_path)
|
||||
|
||||
# Always log the transition.
|
||||
self._event_log.append(
|
||||
{
|
||||
"event": "state_transition",
|
||||
"from": current_state.value,
|
||||
"to": target.value,
|
||||
"trigger": event.value,
|
||||
}
|
||||
)
|
||||
|
||||
# Shadow-run guard for HIBERNATION: the new state is
|
||||
# persisted on disk (so observers see it), and a warning
|
||||
# event documents that the legacy watchdog still owns
|
||||
# shutdown semantics.
|
||||
if target is LifecycleState.HIBERNATION and self._shadow_run:
|
||||
self._event_log.append(
|
||||
{
|
||||
"event": "shadow_run_warning",
|
||||
"would_action": "hibernate_kill_process",
|
||||
"blocked_by": "shadow_run=True",
|
||||
"note": (
|
||||
"shadow_run=True is a test-only legacy guard "
|
||||
"preserved for transition tests; production "
|
||||
"daemons run with shadow_run=False where this "
|
||||
"branch never fires."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
return target
|
||||
231
src/iai_mcp/lifecycle_event_log.py
Normal file
231
src/iai_mcp/lifecycle_event_log.py
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
"""Phase 10.1 -- JSONL event log for lifecycle state machine validation.
|
||||
|
||||
Per panel verdict R7, the lifecycle state machine needs an append-only
|
||||
event log to validate transitions in shadow-run mode and to provide a
|
||||
post-mortem trail when something misbehaves. The log is the empirical
|
||||
ground truth for "did the machine compute the right state at the right
|
||||
moment", separate from the live `lifecycle_state.json` snapshot.
|
||||
|
||||
Format: JSONL (one JSON record per line), file per UTC date, kept under
|
||||
`~/.iai-mcp/logs/lifecycle-events-YYYY-MM-DD.jsonl`. Daily rotation
|
||||
keyed off the UTC date of the appended event so writes near local
|
||||
midnight do not silently fragment across two files in unpredictable
|
||||
timezones. 30-day retention with gzip compression for older files
|
||||
matches the retention spec.
|
||||
|
||||
Atomic line writes: each `append` opens the file with `O_APPEND |
|
||||
O_CREAT` and uses `fcntl.flock(LOCK_EX)` to serialise concurrent writers
|
||||
across processes. POSIX guarantees `O_APPEND` writes <= PIPE_BUF bytes
|
||||
are atomic on local filesystems; the explicit lock keeps us safe past
|
||||
that threshold (a single JSONL line for our event shapes is well under
|
||||
PIPE_BUF=512, but the lock costs ~microseconds and saves us debugging
|
||||
on the day a payload grows).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# Default location. Overridable via constructor `log_dir` for tests.
|
||||
DEFAULT_LOG_DIR: Path = Path.home() / ".iai-mcp" / "logs"
|
||||
|
||||
# Event kinds emitted by the state machine and helpers; treat as the
|
||||
# closed set for now — adding a kind requires updating downstream
|
||||
# consumers (panel R7 validation script in a future phase).
|
||||
KNOWN_EVENT_KINDS: frozenset[str] = frozenset(
|
||||
{
|
||||
"state_transition",
|
||||
"wrapper_event",
|
||||
"shadow_run_warning",
|
||||
"sleep_step_started",
|
||||
"sleep_step_completed",
|
||||
"quarantine_entered",
|
||||
"quarantine_lifted",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
"""Single point of `datetime.now(UTC)` -- patchable in tests."""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def _utc_date_string(dt: datetime | None = None) -> str:
|
||||
"""Return the UTC date as `YYYY-MM-DD` for filename derivation."""
|
||||
moment = dt if dt is not None else _utc_now()
|
||||
if moment.tzinfo is None:
|
||||
moment = moment.replace(tzinfo=timezone.utc)
|
||||
return moment.astimezone(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
class LifecycleEventLog:
|
||||
"""Append-only JSONL event log with daily rotation + retention.
|
||||
|
||||
Public surface:
|
||||
append(event) -- write one event line, lock + fsync.
|
||||
rotate_old_files(...) -- gzip files older than retention.
|
||||
current_file() -- return path to today's log file.
|
||||
|
||||
Thread/process safety: a per-call `fcntl.flock` on the destination
|
||||
file makes concurrent writers (daemon, hooks) safe. The lock is
|
||||
released as soon as the bytes hit disk; we do NOT keep a long-lived
|
||||
handle, so the file can rotate / be archived between calls without
|
||||
leaving a stale fd open.
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir: Path | None = None) -> None:
|
||||
self._log_dir = log_dir if log_dir is not None else DEFAULT_LOG_DIR
|
||||
self._log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Path derivation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def file_for_date(self, date_str: str) -> Path:
|
||||
"""Return the JSONL path for the given `YYYY-MM-DD` date string."""
|
||||
return self._log_dir / f"lifecycle-events-{date_str}.jsonl"
|
||||
|
||||
def current_file(self, now: datetime | None = None) -> Path:
|
||||
"""Return the path that `append` would write to right now."""
|
||||
return self.file_for_date(_utc_date_string(now))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Appender
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def append(self, event: dict[str, Any], now: datetime | None = None) -> None:
|
||||
"""Append one event as a JSONL line; auto-rotate by UTC date.
|
||||
|
||||
Adds `ts` (current UTC ISO-8601) if the caller did not pass one.
|
||||
Verifies `event["event"]` is a non-empty string but does NOT
|
||||
gate on `KNOWN_EVENT_KINDS` — adding a new kind should not
|
||||
require a code change to the log writer.
|
||||
|
||||
Concurrency: held lock via `fcntl.flock(LOCK_EX)`. Crash mid
|
||||
write: the partial line is on disk because we are O_APPEND
|
||||
without buffering, but `fsync` keeps the *prior* lines
|
||||
durable. Readers MUST tolerate a truncated final line (trim
|
||||
or skip on JSON decode error).
|
||||
"""
|
||||
if not isinstance(event, dict):
|
||||
raise TypeError(
|
||||
f"event must be a dict, got {type(event).__name__}"
|
||||
)
|
||||
kind = event.get("event")
|
||||
if not isinstance(kind, str) or not kind:
|
||||
raise ValueError("event['event'] must be a non-empty string")
|
||||
|
||||
moment = now if now is not None else _utc_now()
|
||||
if "ts" not in event:
|
||||
# Mutate a shallow copy so the caller's dict stays clean.
|
||||
event = {"ts": moment.astimezone(timezone.utc).isoformat(), **event}
|
||||
|
||||
line = json.dumps(event, separators=(",", ":")) + "\n"
|
||||
target = self.current_file(moment)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Open with O_APPEND so seeks land at EOF even under concurrent
|
||||
# write; flock for cross-process serialisation.
|
||||
fd = os.open(
|
||||
str(target),
|
||||
os.O_WRONLY | os.O_APPEND | os.O_CREAT,
|
||||
0o600,
|
||||
)
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_EX)
|
||||
try:
|
||||
os.write(fd, line.encode("utf-8"))
|
||||
os.fsync(fd)
|
||||
finally:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Retention / rotation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def rotate_old_files(
|
||||
self,
|
||||
retention_days: int = 30,
|
||||
now: datetime | None = None,
|
||||
) -> int:
|
||||
"""Gzip log files whose UTC date is older than `retention_days`.
|
||||
|
||||
Already-gzipped files (`*.jsonl.gz`) are left alone. Returns
|
||||
the number of files newly compressed in this call. Files older
|
||||
than `retention_days` that are *also* already gzipped are kept
|
||||
forever in this phase — the spec asks for compression after
|
||||
the window, not deletion. (Deletion is a future-phase decision.)
|
||||
"""
|
||||
moment = now if now is not None else _utc_now()
|
||||
cutoff_date = (moment - timedelta(days=retention_days)).date()
|
||||
|
||||
compressed = 0
|
||||
for path in self._log_dir.glob("lifecycle-events-*.jsonl"):
|
||||
stem = path.stem # lifecycle-events-YYYY-MM-DD
|
||||
try:
|
||||
date_part = stem.rsplit("-", 3)[-3:] # ['YYYY','MM','DD']
|
||||
file_date = datetime.strptime(
|
||||
"-".join(date_part), "%Y-%m-%d"
|
||||
).date()
|
||||
except (ValueError, IndexError):
|
||||
# Unrecognised filename — skip rather than guess.
|
||||
continue
|
||||
if file_date > cutoff_date:
|
||||
continue
|
||||
|
||||
gz_path = path.with_suffix(".jsonl.gz")
|
||||
if gz_path.exists():
|
||||
# Idempotent: already compressed in a prior run.
|
||||
continue
|
||||
try:
|
||||
with path.open("rb") as src, gzip.open(gz_path, "wb") as dst:
|
||||
shutil.copyfileobj(src, dst)
|
||||
# Match prior chmod to keep the tarball user-only.
|
||||
os.chmod(gz_path, 0o600)
|
||||
# Remove the plaintext only after the gzip is durable.
|
||||
os.unlink(path)
|
||||
compressed += 1
|
||||
except OSError as exc:
|
||||
# Best-effort: a single broken file should not stop
|
||||
# the next iterations.
|
||||
if exc.errno in (errno.EACCES, errno.EPERM):
|
||||
continue
|
||||
# Unknown OSError — let the caller see it.
|
||||
raise
|
||||
return compressed
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Read helpers (non-essential but useful for tests + CLI)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def read_all(self, date_str: str | None = None) -> list[dict[str, Any]]:
|
||||
"""Read all events from the file for `date_str` (or today).
|
||||
|
||||
Skips truncated final lines silently — only fully-decoded JSON
|
||||
records are returned. Returns [] if the file does not exist.
|
||||
"""
|
||||
target = self.file_for_date(
|
||||
date_str if date_str is not None else _utc_date_string()
|
||||
)
|
||||
if not target.exists():
|
||||
return []
|
||||
out: list[dict[str, Any]] = []
|
||||
with target.open("r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
out.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return out
|
||||
341
src/iai_mcp/lifecycle_lock.py
Normal file
341
src/iai_mcp/lifecycle_lock.py
Normal file
|
|
@ -0,0 +1,341 @@
|
|||
"""Phase 10.6 -- single-machine ``~/.iai-mcp/.locked`` lockfile.
|
||||
|
||||
Realises LOCKED contract (single-machine assumption): the
|
||||
daemon writes ``~/.iai-mcp/.locked`` on startup with PID + hostname +
|
||||
started_at. A second daemon attempt on the same host raises
|
||||
``LifecycleLockConflict``; a daemon on a different host (e.g. via
|
||||
iCloud / NFS sync of ``~/.iai-mcp``) detects the foreign hostname and
|
||||
takes over with a warning.
|
||||
|
||||
This is **distinct from** ``ProcessLock`` (Phase 04-01,
|
||||
``~/.iai-mcp/.lock``): that fcntl flock guards LanceDB writers / heavy
|
||||
consolidation against concurrent in-host processes. The ``.locked``
|
||||
lockfile is a higher-level, human-readable singleton marker for the
|
||||
lifecycle state machine (LSM); it does NOT use ``fcntl.flock`` because
|
||||
single-machine is the assumption and the JSON content (PID +
|
||||
hostname) is the diagnostic surface that ``iai-mcp lifecycle
|
||||
force-unlock`` consumes.
|
||||
|
||||
Design constraints (carried from CONTEXT 10.6):
|
||||
|
||||
- stdlib only -- ``os``, ``socket``, ``json``, ``pathlib``, ``datetime``.
|
||||
- POSIX-atomic write via ``tempfile.mkstemp`` + ``os.replace`` (same
|
||||
pattern as ``daemon_state.save_state`` / ``lifecycle_state.save_state``).
|
||||
- 0o600 file mode -- consistent with the rest of the project's state files.
|
||||
- Hostname recorded so iCloud / NFS sync of ``~/.iai-mcp`` does NOT
|
||||
produce a deadlock when the user moves to a second Mac.
|
||||
- PID-liveness check uses ``os.kill(pid, 0)`` (same trick as
|
||||
``heartbeat_scanner._is_pid_alive``).
|
||||
|
||||
Validates: WAKE-13.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import TypedDict
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Defaults / constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _default_lock_path() -> Path:
|
||||
"""Resolve the default lockfile path, honoring ``IAI_MCP_STORE``.
|
||||
|
||||
Tests + multi-tenant deployments override the iai-mcp data root via
|
||||
the ``IAI_MCP_STORE`` env var (HIGH-4 LOCK precedent, Plan 07-04).
|
||||
Falling back to ``~/.iai-mcp`` keeps the production default
|
||||
untouched.
|
||||
"""
|
||||
env_path = os.environ.get("IAI_MCP_STORE")
|
||||
root = Path(env_path) if env_path else (Path.home() / ".iai-mcp")
|
||||
return root / ".locked"
|
||||
|
||||
|
||||
# Production lock-file path. Re-resolved via the helper so monkey-
|
||||
# patching ``IAI_MCP_STORE`` in tests redirects the production
|
||||
# default automatically. Tests can also pass an explicit ``lock_path``
|
||||
# argument to ``LifecycleLock``.
|
||||
DEFAULT_LOCK_PATH: Path = _default_lock_path()
|
||||
|
||||
#: Schema version persisted alongside the payload so a future bump can
|
||||
#: be detected at takeover time.
|
||||
SCHEMA_VERSION: int = 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class LifecycleLockConflict(RuntimeError):
|
||||
"""Raised when ``acquire()`` finds a live daemon on the same host.
|
||||
|
||||
The exception carries the existing lockfile content as a dict so the
|
||||
caller (daemon main, ``iai-mcp lifecycle force-unlock``) can surface
|
||||
PID / started_at to the operator without a second disk read.
|
||||
"""
|
||||
|
||||
def __init__(self, message: str, existing: "LockPayload | None" = None) -> None:
|
||||
super().__init__(message)
|
||||
self.existing = existing
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Typed payload schema
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class LockPayload(TypedDict):
|
||||
"""On-disk schema for ``.locked``."""
|
||||
|
||||
pid: int
|
||||
hostname: str
|
||||
started_at: str # ISO-8601 UTC
|
||||
schema_version: int
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-private helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
"""Return ISO-8601 UTC timestamp -- single point so tests can patch."""
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def _current_hostname() -> str:
|
||||
"""Return ``socket.gethostname()``; central so tests can monkey-patch."""
|
||||
return socket.gethostname()
|
||||
|
||||
|
||||
def _is_pid_alive(pid: int) -> bool:
|
||||
"""Return True iff ``pid`` exists in the kernel process table.
|
||||
|
||||
Mirrors the discipline in ``heartbeat_scanner._is_pid_alive``:
|
||||
``os.kill(pid, 0)`` sends no signal but raises ``ProcessLookupError``
|
||||
when the PID has been reaped. ``PermissionError`` (EPERM) means the
|
||||
process exists but we cannot signal it -- still alive for liveness
|
||||
purposes. Negative / zero PIDs are dead.
|
||||
"""
|
||||
if pid <= 0:
|
||||
return False
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
except ProcessLookupError:
|
||||
return False
|
||||
except PermissionError:
|
||||
return True
|
||||
return True
|
||||
|
||||
|
||||
def _validate_payload(raw: object) -> LockPayload:
|
||||
"""Reject malformed JSON; return a typed copy on success.
|
||||
|
||||
Schema check kept light -- enough to catch operator hand-edits and
|
||||
out-of-band writes from a stale schema version. We do NOT require
|
||||
``schema_version`` to equal ``SCHEMA_VERSION``; a higher schema is
|
||||
treated as forward-compatible (the daemon refuses to overwrite it
|
||||
only if PID is alive on same host -- the conflict path).
|
||||
"""
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError(
|
||||
f"lockfile payload must be a JSON object, got {type(raw).__name__}"
|
||||
)
|
||||
pid = raw.get("pid")
|
||||
if not isinstance(pid, int) or pid <= 0:
|
||||
raise ValueError(f"lockfile.pid must be a positive int, got {pid!r}")
|
||||
hostname = raw.get("hostname")
|
||||
if not isinstance(hostname, str) or not hostname:
|
||||
raise ValueError(
|
||||
f"lockfile.hostname must be a non-empty string, got {hostname!r}"
|
||||
)
|
||||
started_at = raw.get("started_at")
|
||||
if not isinstance(started_at, str) or not started_at:
|
||||
raise ValueError(
|
||||
f"lockfile.started_at must be a non-empty string, got {started_at!r}"
|
||||
)
|
||||
sv = raw.get("schema_version")
|
||||
if not isinstance(sv, int) or sv <= 0:
|
||||
raise ValueError(
|
||||
f"lockfile.schema_version must be a positive int, got {sv!r}"
|
||||
)
|
||||
return {
|
||||
"pid": pid,
|
||||
"hostname": hostname,
|
||||
"started_at": started_at,
|
||||
"schema_version": sv,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LifecycleLock
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class LifecycleLock:
|
||||
"""Single-machine lockfile for the lifecycle state machine.
|
||||
|
||||
Construction is cheap; no I/O happens until ``acquire()`` is called.
|
||||
Tests instantiate with an explicit ``lock_path`` under ``tmp_path``
|
||||
so production state is never touched.
|
||||
"""
|
||||
|
||||
def __init__(self, lock_path: Path | None = None) -> None:
|
||||
# Resolve at construction time (not import time) so a test
|
||||
# that monkey-patches IAI_MCP_STORE before instantiating sees
|
||||
# the redirected path. Production callers pass no argument
|
||||
# and get the canonical ~/.iai-mcp/.locked.
|
||||
self._lock_path = (
|
||||
lock_path if lock_path is not None else _default_lock_path()
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Read accessors
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@property
|
||||
def lock_path(self) -> Path:
|
||||
"""Filesystem location of the ``.locked`` file."""
|
||||
return self._lock_path
|
||||
|
||||
def read(self) -> LockPayload | None:
|
||||
"""Return the on-disk payload, or ``None`` if absent / corrupt.
|
||||
|
||||
Corrupt-file behaviour is "no lock" rather than raising: an
|
||||
operator hand-edit that produces invalid JSON should not block
|
||||
a fresh daemon boot. ``acquire()`` will then overwrite the file.
|
||||
"""
|
||||
if not self._lock_path.exists():
|
||||
return None
|
||||
try:
|
||||
raw = json.loads(self._lock_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return None
|
||||
try:
|
||||
return _validate_payload(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
def is_held_by_self(self) -> bool:
|
||||
"""True iff the on-disk lockfile names this process + this host.
|
||||
|
||||
Used by the daemon to short-circuit a redundant ``acquire()``
|
||||
on a fast restart where the file was never released (e.g. a
|
||||
crash that bypassed the ``finally`` cleanup -- in that case
|
||||
the PID will not match either, so this returns False and
|
||||
``acquire()`` does the dead-PID takeover).
|
||||
"""
|
||||
payload = self.read()
|
||||
if payload is None:
|
||||
return False
|
||||
return (
|
||||
payload["pid"] == os.getpid()
|
||||
and payload["hostname"] == _current_hostname()
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Acquire / release
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def acquire(self) -> None:
|
||||
"""Write the lockfile, claiming the singleton slot for this process.
|
||||
|
||||
Decision tree:
|
||||
|
||||
1. No lockfile present -> write fresh.
|
||||
2. Lockfile present, corrupt JSON -> overwrite (treat as absent).
|
||||
3. Lockfile present, foreign hostname -> overwrite + log a warning
|
||||
(cross-host scenario via iCloud / NFS sync; daemon on the new
|
||||
host wins because the original host's daemon cannot reach
|
||||
this filesystem).
|
||||
4. Lockfile present, same hostname, dead PID -> overwrite (the
|
||||
previous daemon crashed before releasing).
|
||||
5. Lockfile present, same hostname, live PID -> ``raise
|
||||
LifecycleLockConflict`` (a real concurrent boot attempt).
|
||||
|
||||
Atomic write via ``tempfile.mkstemp`` + ``os.replace`` -- same
|
||||
pattern as ``lifecycle_state.save_state`` / ``daemon_state.save_state``.
|
||||
"""
|
||||
existing = self.read()
|
||||
if existing is not None:
|
||||
# Live PID on same host -> conflict.
|
||||
if existing["hostname"] == _current_hostname() and _is_pid_alive(
|
||||
existing["pid"]
|
||||
):
|
||||
raise LifecycleLockConflict(
|
||||
f"daemon already running: pid={existing['pid']} "
|
||||
f"hostname={existing['hostname']} "
|
||||
f"started_at={existing['started_at']}",
|
||||
existing=existing,
|
||||
)
|
||||
# Dead PID OR foreign hostname -> takeover (no error). The
|
||||
# foreign-hostname branch corresponds to the cross-host
|
||||
# iCloud / NFS sync scenario; we silently overwrite because
|
||||
# the only viable remediation is "the new host wins"
|
||||
# (the original host's daemon cannot share state with us
|
||||
# over a sync filesystem, by definition).
|
||||
|
||||
payload: LockPayload = {
|
||||
"pid": os.getpid(),
|
||||
"hostname": _current_hostname(),
|
||||
"started_at": _utc_now_iso(),
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
}
|
||||
|
||||
self._lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp = tempfile.mkstemp(
|
||||
prefix=".locked.",
|
||||
suffix=".tmp",
|
||||
dir=str(self._lock_path.parent),
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(payload, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.chmod(tmp, 0o600)
|
||||
os.replace(tmp, self._lock_path)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
def release(self) -> None:
|
||||
"""Delete the lockfile. Idempotent -- absent file is not an error.
|
||||
|
||||
Called from the daemon's graceful-shutdown ``finally`` block. A
|
||||
crash before this point leaves the file intact; the next
|
||||
``acquire()`` will detect the dead PID and overwrite.
|
||||
"""
|
||||
try:
|
||||
self._lock_path.unlink()
|
||||
except FileNotFoundError:
|
||||
return
|
||||
|
||||
def force_unlock(self) -> LockPayload | None:
|
||||
"""Delete the lockfile unconditionally; return the prior content.
|
||||
|
||||
Operator-facing helper used by ``iai-mcp lifecycle force-unlock``
|
||||
when a daemon crashed before ``release()`` and the dead-PID
|
||||
takeover did not catch the case (e.g. the operator wants to
|
||||
clear a foreign-hostname lock without booting a daemon first).
|
||||
|
||||
Returns the parsed prior payload (or ``None`` if absent /
|
||||
corrupt) so the caller can print PID / hostname / started_at
|
||||
in the diagnostic output.
|
||||
"""
|
||||
previous = self.read()
|
||||
try:
|
||||
self._lock_path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return previous
|
||||
233
src/iai_mcp/lifecycle_state.py
Normal file
233
src/iai_mcp/lifecycle_state.py
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
"""Phase 10.1 -- typed schema + atomic load/save for lifecycle_state.json.
|
||||
|
||||
The 4-state lifecycle (WAKE / DROWSY / SLEEP / HIBERNATION) needs a single
|
||||
source of truth on disk. Per LOCKED contract L2 (panel verdict R2), the
|
||||
daemon is the ONLY writer of `~/.iai-mcp/lifecycle_state.json`; wrappers
|
||||
signal events via Unix socket OR atomic-write `~/.iai-mcp/wake.signal`
|
||||
filesystem marker.
|
||||
|
||||
Persistence pattern mirrors `daemon_state.py` (Phase 04-01) and
|
||||
`maintenance.py` (Phase 07.11-03):
|
||||
- Writes via `tempfile.mkstemp` + `os.replace` (POSIX atomic rename).
|
||||
- Crash mid-write leaves the prior file intact; readers either see
|
||||
the old complete blob or the new complete blob, never partial bytes.
|
||||
- File mode 0o600 (user-only, matches T-04-07 mitigation).
|
||||
|
||||
Schema mirrors lifecycle_state.json spec.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import TypedDict
|
||||
|
||||
# Default location. Overridable for tests via the `path` arg of load/save.
|
||||
LIFECYCLE_STATE_PATH: Path = Path.home() / ".iai-mcp" / "lifecycle_state.json"
|
||||
|
||||
|
||||
class LifecycleState(str, Enum):
|
||||
"""Four lifecycle states."""
|
||||
|
||||
WAKE = "WAKE"
|
||||
DROWSY = "DROWSY"
|
||||
SLEEP = "SLEEP"
|
||||
HIBERNATION = "HIBERNATION"
|
||||
|
||||
|
||||
class SleepCycleProgress(TypedDict, total=False):
|
||||
"""Per-attempt progress of the multi-step sleep pipeline.
|
||||
|
||||
All fields optional so the dict can be partially populated mid-cycle;
|
||||
`last_completed_step=0` and `attempt=1` represent a freshly-started cycle.
|
||||
"""
|
||||
|
||||
last_completed_step: int
|
||||
attempt: int
|
||||
last_error: str | None
|
||||
started_at: str # ISO-8601 UTC
|
||||
|
||||
|
||||
class Quarantine(TypedDict):
|
||||
"""A failing sleep step can quarantine the cycle until `until_ts`."""
|
||||
|
||||
until_ts: str # ISO-8601 UTC
|
||||
reason: str
|
||||
since_ts: str # ISO-8601 UTC
|
||||
|
||||
|
||||
class LifecycleStateRecord(TypedDict):
|
||||
"""On-disk schema for `lifecycle_state.json`.
|
||||
|
||||
`sleep_cycle_progress` and `quarantine` are nullable; the rest are
|
||||
always present in a well-formed record. `shadow_run` toggles whether
|
||||
the state machine actually executes process termination on
|
||||
HIBERNATION (False post-Phase 10.6) or merely logs the would-action.
|
||||
"""
|
||||
|
||||
current_state: str # one of LifecycleState values
|
||||
since_ts: str # ISO-8601 UTC
|
||||
last_activity_ts: str # ISO-8601 UTC
|
||||
wrapper_event_seq: int
|
||||
sleep_cycle_progress: SleepCycleProgress | None
|
||||
quarantine: Quarantine | None
|
||||
shadow_run: bool
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
"""Return ISO-8601 UTC timestamp with explicit `+00:00` suffix.
|
||||
|
||||
`isoformat()` on a UTC-aware datetime emits `+00:00` rather than `Z`.
|
||||
Both forms are valid ISO-8601; downstream readers (CLI status, event
|
||||
log, Hypothesis tests) parse via `datetime.fromisoformat` which
|
||||
accepts the offset form.
|
||||
"""
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
def default_state() -> LifecycleStateRecord:
|
||||
"""Return a fresh WAKE record with shadow_run=False (Phase 10.6 default).
|
||||
|
||||
Used by `load_state` when the file is absent or malformed (self-heal),
|
||||
and by tests / callers that need a known starting point.
|
||||
|
||||
Plan 10.6-01 Task 1.6 flipped the default from True to False:
|
||||
HIBERNATION transitions now actually exit the daemon process via the
|
||||
global shutdown event in `daemon.main()`. The legacy RSS-watchdog has
|
||||
been removed in Task 1.4; the lifecycle state machine owns shutdown
|
||||
authority.
|
||||
"""
|
||||
now = _utc_now_iso()
|
||||
return {
|
||||
"current_state": LifecycleState.WAKE.value,
|
||||
"since_ts": now,
|
||||
"last_activity_ts": now,
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": False,
|
||||
}
|
||||
|
||||
|
||||
def _validate_record(raw: object) -> LifecycleStateRecord:
|
||||
"""Reject malformed JSON; return a typed copy on success.
|
||||
|
||||
A minimal schema check — enough to catch hand-edited corruption and
|
||||
out-of-band writes from a stale schema version, without pulling in
|
||||
pydantic for runtime validation. Reads stay zero-allocation past the
|
||||
JSON parse step.
|
||||
"""
|
||||
if not isinstance(raw, dict):
|
||||
raise ValueError(
|
||||
f"lifecycle_state record must be a JSON object, got {type(raw).__name__}"
|
||||
)
|
||||
|
||||
required_str_keys = ("current_state", "since_ts", "last_activity_ts")
|
||||
for k in required_str_keys:
|
||||
v = raw.get(k)
|
||||
if not isinstance(v, str) or not v:
|
||||
raise ValueError(f"lifecycle_state.{k} must be a non-empty string, got {v!r}")
|
||||
|
||||
state_value = raw["current_state"]
|
||||
if state_value not in {s.value for s in LifecycleState}:
|
||||
raise ValueError(
|
||||
f"lifecycle_state.current_state {state_value!r} is not a valid LifecycleState"
|
||||
)
|
||||
|
||||
seq = raw.get("wrapper_event_seq")
|
||||
if not isinstance(seq, int) or seq < 0:
|
||||
raise ValueError(
|
||||
f"lifecycle_state.wrapper_event_seq must be a non-negative int, got {seq!r}"
|
||||
)
|
||||
|
||||
shadow = raw.get("shadow_run")
|
||||
if not isinstance(shadow, bool):
|
||||
raise ValueError(
|
||||
f"lifecycle_state.shadow_run must be a bool, got {shadow!r}"
|
||||
)
|
||||
|
||||
progress = raw.get("sleep_cycle_progress")
|
||||
if progress is not None and not isinstance(progress, dict):
|
||||
raise ValueError(
|
||||
f"lifecycle_state.sleep_cycle_progress must be dict or null, got {progress!r}"
|
||||
)
|
||||
|
||||
quarantine = raw.get("quarantine")
|
||||
if quarantine is not None:
|
||||
if not isinstance(quarantine, dict):
|
||||
raise ValueError(
|
||||
f"lifecycle_state.quarantine must be dict or null, got {quarantine!r}"
|
||||
)
|
||||
for k in ("until_ts", "reason", "since_ts"):
|
||||
if not isinstance(quarantine.get(k), str):
|
||||
raise ValueError(
|
||||
f"lifecycle_state.quarantine.{k} must be string"
|
||||
)
|
||||
|
||||
# Cast is safe after the checks above; mypy/pylance accept the dict.
|
||||
return raw # type: ignore[return-value]
|
||||
|
||||
|
||||
def load_state(path: Path | None = None) -> LifecycleStateRecord:
|
||||
"""Read `lifecycle_state.json`; return `default_state()` if absent.
|
||||
|
||||
On JSON-decode error or schema-validation error: also returns a
|
||||
fresh default state. The legacy file is left in place (no auto-delete)
|
||||
so an operator can inspect it; `save_state` will overwrite it on the
|
||||
next persist.
|
||||
"""
|
||||
target = path if path is not None else LIFECYCLE_STATE_PATH
|
||||
if not target.exists():
|
||||
return default_state()
|
||||
try:
|
||||
raw = json.loads(target.read_text())
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return default_state()
|
||||
try:
|
||||
return _validate_record(raw)
|
||||
except ValueError:
|
||||
return default_state()
|
||||
|
||||
|
||||
def save_state(record: LifecycleStateRecord, path: Path | None = None) -> None:
|
||||
"""Atomically persist `record` via tempfile + os.replace.
|
||||
|
||||
Mirrors `daemon_state.save_state` (Phase 04-01) bullet-for-bullet:
|
||||
creates parent dir if missing; writes to a sibling temp file in the
|
||||
same directory (required so os.replace is an atomic same-filesystem
|
||||
rename); fsyncs the file contents before rename so the data is on
|
||||
disk; chmods 0o600 before the swap so the visible file is never
|
||||
world-readable; on exception unlinks the temp file so /tmp does not
|
||||
accumulate.
|
||||
"""
|
||||
target = path if path is not None else LIFECYCLE_STATE_PATH
|
||||
# Validate before writing so callers get an early ValueError on
|
||||
# malformed records rather than persisting garbage to disk.
|
||||
_validate_record(record)
|
||||
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp = tempfile.mkstemp(
|
||||
prefix=".lifecycle_state.",
|
||||
suffix=".tmp",
|
||||
dir=str(target.parent),
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(record, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.chmod(tmp, 0o600)
|
||||
os.replace(tmp, target)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
179
src/iai_mcp/maintenance.py
Normal file
179
src/iai_mcp/maintenance.py
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
"""periodic Lance storage maintenance.
|
||||
|
||||
Forensic trigger (2026-04-27): the daemon was running 248% CPU sustained for
|
||||
1h14min because `records.lance` had grown to 10,841 versions / 3.66 GB for
|
||||
only 7,130 rows over 9 days. There has never been a `table.optimize()` call
|
||||
site in production code. Offline `optimize(cleanup_older_than=timedelta(days=1))`
|
||||
reclaimed 84% disk and dropped `build_runtime_graph` cold latency 13.3s ->
|
||||
0.13s (102x). codifies that fix as a daemon-managed periodic job
|
||||
so version manifests + soft-deleted rows do not re-accumulate.
|
||||
|
||||
Architecture:
|
||||
- D7.3-01: periodic + startup, NOT write-triggered (post-write hook would
|
||||
amplify write latency unboundedly).
|
||||
- D7.3-02: single-process inside the daemon (no worker process).
|
||||
- D7.3-03: helper is SYNC; callers wrap in `asyncio.to_thread`. Phase 7.2's
|
||||
AST fence (tests/test_no_bare_sync_in_async.py) enforces this discipline
|
||||
via `BLOCKING_NAMES` (D7.3-26).
|
||||
- D7.3-09: helper NEVER raises. Per-table failures captured in the per-table
|
||||
dict's `error` field. The daemon must not die from an optimize failure.
|
||||
- D7.3-13/D7.3-21: 1-day default retention matches Lance docs FAQ.
|
||||
|
||||
Two env overrides (read once at import per D7.3-22):
|
||||
- IAI_MCP_LANCE_OPTIMIZE_INTERVAL_SEC (default 3600s = 1h cadence)
|
||||
- IAI_MCP_LANCE_OPTIMIZE_RETENTION_SEC (default 86400s = 1 day)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
# D7.3-20: 1-hour periodic cadence (12x the cascade-poll cadence; same order
|
||||
# of magnitude as the maintenance work itself; far longer than typical session
|
||||
# length so optimize rarely interferes; short enough that bloat stays bounded).
|
||||
LANCE_OPTIMIZE_INTERVAL_SEC: float = float(
|
||||
os.environ.get("IAI_MCP_LANCE_OPTIMIZE_INTERVAL_SEC", "3600.0"),
|
||||
)
|
||||
|
||||
# D7.3-21: 1-day retention matches Lance's documented `cleanup_older_than`
|
||||
# example. Aggressive enough to free disk fast; conservative enough for
|
||||
# point-in-time time-travel reads within the same day.
|
||||
LANCE_OPTIMIZE_RETENTION_SEC: float = float(
|
||||
os.environ.get("IAI_MCP_LANCE_OPTIMIZE_RETENTION_SEC", "86400.0"),
|
||||
)
|
||||
|
||||
# Daemon-owned tables; matches src/iai_mcp/store.py constants
|
||||
# (RECORDS_TABLE/EDGES_TABLE/EVENTS_TABLE) but kept literal so this module
|
||||
# does not pull MemoryStore at import time.
|
||||
_TABLES_TO_OPTIMIZE: tuple[str, ...] = ("records", "edges", "events")
|
||||
|
||||
|
||||
def _measure_table_size_bytes(store: Any, table_name: str) -> int:
|
||||
"""Sum the size of every file under <storage_root>/lancedb/<table>.lance/.
|
||||
|
||||
Returns 0 on any measurement failure so size metrics are best-effort:
|
||||
a measurement failure must NOT cause the helper itself to raise. The
|
||||
actual `tbl.optimize()` call is independent — disk-size telemetry is
|
||||
purely observational and exists for the operator-facing event payload.
|
||||
"""
|
||||
try:
|
||||
# MemoryStore.root is the user-supplied (or env-derived) storage
|
||||
# root; the LanceDB connection lives at root/lancedb (see store.py
|
||||
# line 202). Each table is a `<name>.lance` directory underneath.
|
||||
root = getattr(store, "root", None)
|
||||
if root is None:
|
||||
return 0
|
||||
table_dir = Path(root) / "lancedb" / f"{table_name}.lance"
|
||||
if not table_dir.exists():
|
||||
return 0
|
||||
total = 0
|
||||
for p in table_dir.rglob("*"):
|
||||
try:
|
||||
if p.is_file():
|
||||
total += p.stat().st_size
|
||||
except OSError:
|
||||
# File could be unlinked mid-scan during an active optimize;
|
||||
# skip it, keep counting the rest.
|
||||
continue
|
||||
return total
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def optimize_lance_storage(
|
||||
store: Any,
|
||||
*,
|
||||
retention: timedelta | None = None,
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""Run `tbl.optimize(cleanup_older_than=retention)` on each daemon-owned
|
||||
LanceDB table (records, edges, events).
|
||||
|
||||
Args:
|
||||
store: MemoryStore-shaped object exposing `.db` (lancedb.Connection).
|
||||
Duck-typed so test fixtures can pass a stub. The function only
|
||||
reads `store.db` and `store.root` (latter optional for size
|
||||
telemetry).
|
||||
retention: timedelta passed to LanceDB's `cleanup_older_than`. If
|
||||
None, defaults to `timedelta(seconds=LANCE_OPTIMIZE_RETENTION_SEC)`
|
||||
which is 1 day in production.
|
||||
|
||||
Returns:
|
||||
Flat dict keyed by table name (`records`, `edges`, `events`). Each
|
||||
value is a per-table dict::
|
||||
|
||||
{
|
||||
"rows_before": int, # tbl.count_rows() pre-optimize
|
||||
"rows_after": int, # tbl.count_rows() post-optimize
|
||||
"versions_before": int, # len(tbl.list_versions()) pre
|
||||
"versions_after": int, # len(tbl.list_versions()) post
|
||||
"size_bytes_before": int, # du -sb on .lance/ pre, 0 on err
|
||||
"size_bytes_after": int, # du -sb on .lance/ post, 0 on err
|
||||
"elapsed_sec": float, # wall-clock for optimize()
|
||||
"error": str, # ONLY present on failure
|
||||
}
|
||||
|
||||
Per D7.3-09: this helper NEVER raises. Per-table failure captured in
|
||||
the table's `error` field; the other tables are still processed.
|
||||
"""
|
||||
if retention is None:
|
||||
retention = timedelta(seconds=LANCE_OPTIMIZE_RETENTION_SEC)
|
||||
|
||||
report: dict[str, dict[str, Any]] = {}
|
||||
db = getattr(store, "db", None)
|
||||
|
||||
for table_name in _TABLES_TO_OPTIMIZE:
|
||||
per_table: dict[str, Any] = {
|
||||
"rows_before": 0,
|
||||
"rows_after": 0,
|
||||
"versions_before": 0,
|
||||
"versions_after": 0,
|
||||
"size_bytes_before": 0,
|
||||
"size_bytes_after": 0,
|
||||
"elapsed_sec": 0.0,
|
||||
}
|
||||
try:
|
||||
if db is None:
|
||||
raise RuntimeError("store has no .db attribute")
|
||||
tbl = db.open_table(table_name)
|
||||
try:
|
||||
per_table["rows_before"] = int(tbl.count_rows())
|
||||
except Exception:
|
||||
per_table["rows_before"] = 0
|
||||
try:
|
||||
per_table["versions_before"] = len(tbl.list_versions())
|
||||
except Exception:
|
||||
per_table["versions_before"] = 0
|
||||
per_table["size_bytes_before"] = _measure_table_size_bytes(
|
||||
store, table_name,
|
||||
)
|
||||
|
||||
t0 = time.monotonic()
|
||||
tbl.optimize(cleanup_older_than=retention)
|
||||
per_table["elapsed_sec"] = round(time.monotonic() - t0, 3)
|
||||
|
||||
# Re-open the table after optimize: some LanceDB versions return
|
||||
# cached metadata on the original handle until refresh.
|
||||
try:
|
||||
tbl_after = db.open_table(table_name)
|
||||
except Exception:
|
||||
tbl_after = tbl
|
||||
try:
|
||||
per_table["rows_after"] = int(tbl_after.count_rows())
|
||||
except Exception:
|
||||
per_table["rows_after"] = per_table["rows_before"]
|
||||
try:
|
||||
per_table["versions_after"] = len(tbl_after.list_versions())
|
||||
except Exception:
|
||||
per_table["versions_after"] = per_table["versions_before"]
|
||||
per_table["size_bytes_after"] = _measure_table_size_bytes(
|
||||
store, table_name,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 -- helper MUST NOT raise (D7.3-09)
|
||||
per_table["error"] = str(exc)[:500]
|
||||
|
||||
report[table_name] = per_table
|
||||
|
||||
return report
|
||||
1979
src/iai_mcp/migrate.py
Normal file
1979
src/iai_mcp/migrate.py
Normal file
File diff suppressed because it is too large
Load diff
1429
src/iai_mcp/pipeline.py
Normal file
1429
src/iai_mcp/pipeline.py
Normal file
File diff suppressed because it is too large
Load diff
634
src/iai_mcp/profile.py
Normal file
634
src/iai_mcp/profile.py
Normal file
|
|
@ -0,0 +1,634 @@
|
|||
"""11-knob profile registry (D-11 + wake_depth, Plan 07.12-02 removals).
|
||||
|
||||
Plan 02-03 activated the Phase-2 autistic-kernel knobs. flipped
|
||||
AUTIST-13 camouflaging_relaxation to live. appended the sealed
|
||||
operator-facing knob `wake_depth` — selects session-start payload size
|
||||
(minimal = <=30 raw tok lazy handle; standard = Phase-1 1388 tok eager dump;
|
||||
deep = <=2000 tok expanded rich_club). Plan 07.12-02 REMOVED 4 dead KnobSpec
|
||||
entries (AUTIST-02 sensory_channel_weights, event_vs_time_cue,
|
||||
AUTIST-11 alexithymia_accommodation, double_empathy) — none was
|
||||
read in any production scoring/response path; double_empathy was promoted
|
||||
to a passive system invariant in CLAUDE.md, event_vs_time_cue was documented
|
||||
as a deferred future capability.
|
||||
|
||||
Registry shape:
|
||||
- 10 live autistic-kernel knobs (AUTIST-01,03,04,05,06,07,09,10,13,14)
|
||||
- 1 live Phase-5 operator knob (MCP-12 wake_depth, default "minimal")
|
||||
- 0 deferred
|
||||
|
||||
The registry is a module-level frozen-dataclass dict so
|
||||
1. `assert len(PROFILE_KNOBS) == 11`
|
||||
2. test_profile.py can grep exact knob names in order
|
||||
3. Session-start assembler reads the live subset in O(1)
|
||||
|
||||
Schema validation covers:
|
||||
- `enum:a|b|c` -- value must be exactly one of the listed tokens
|
||||
- `bool` -- isinstance(value, bool)
|
||||
- `int_range:lo..hi` -- integer in [lo, hi] inclusive
|
||||
- `float_range:lo..hi` -- float in [lo, hi] inclusive
|
||||
- `dict:<keytype>:<valuetype>` -- per-key recursive validation
|
||||
(e.g. `dict:str:float_range:0.0..1.0`)
|
||||
- anything else -- reject (typo guard)
|
||||
|
||||
Plan 02-03 runtime-gain mechanism exposed via two helpers:
|
||||
- bayesian_update: weighted ensemble posterior update
|
||||
- profile_modulation_for_record: per-record edge-weight gain dict
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- schema
|
||||
@dataclass(frozen=True)
|
||||
class KnobSpec:
|
||||
"""Static spec for one autistic-kernel knob."""
|
||||
|
||||
name: str
|
||||
phase: int # 1 | 2 | 3
|
||||
default: Any # Phase-1 default, or Phase-2/3 placeholder default
|
||||
description: str
|
||||
value_schema: str # "enum:a|b|c" | "bool" | "int_range:0..5" | "float_range:0.0..1.0"
|
||||
requirement_id: str # AUTIST-01..14
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ registry
|
||||
# 11 sealed knobs: 10 autistic-kernel + wake_depth
|
||||
# (Plan 07.12-02 removed sensory_channel_weights, AUTIST-08
|
||||
# event_vs_time_cue, alexithymia_accommodation, double_empathy).
|
||||
# flipped 9 Phase-2 knobs to phase=1.
|
||||
# flipped camouflaging_relaxation to phase=1.
|
||||
# appended wake_depth (MCP-12, operator-facing).
|
||||
PROFILE_KNOBS: dict[str, KnobSpec] = {
|
||||
"monotropism_depth": KnobSpec(
|
||||
"monotropism_depth",
|
||||
1,
|
||||
{}, # per-domain dict; empty default (unknown domains -> no gain)
|
||||
"Monotropism depth per domain (voluntary tunnel; HIPPEA precision)",
|
||||
"dict:str:float_range:0.0..1.0",
|
||||
"AUTIST-01",
|
||||
),
|
||||
"dunn_quadrant": KnobSpec(
|
||||
"dunn_quadrant",
|
||||
1,
|
||||
"neutral",
|
||||
"Sensory threshold x regulation posture (Dunn four-quadrant; "
|
||||
"drives HIPPEA precision weighting at runtime)",
|
||||
"enum:neutral|low-registration|seeking|sensitive|avoiding",
|
||||
"AUTIST-03",
|
||||
),
|
||||
"literal_preservation": KnobSpec(
|
||||
"literal_preservation",
|
||||
1,
|
||||
"strong",
|
||||
"Verbatim vs semantic summary (raw always retained)",
|
||||
"enum:strong|medium|loose",
|
||||
"AUTIST-04",
|
||||
),
|
||||
"demand_avoidance_tolerance": KnobSpec(
|
||||
"demand_avoidance_tolerance",
|
||||
1,
|
||||
"collaborative",
|
||||
"PDA-aware collaborative phrasing vs imperative",
|
||||
"enum:collaborative|neutral|imperative",
|
||||
"AUTIST-05",
|
||||
),
|
||||
"masking_off": KnobSpec(
|
||||
"masking_off",
|
||||
1,
|
||||
True,
|
||||
"No small-talk, no performative empathy, literal pragmatics",
|
||||
"bool",
|
||||
"AUTIST-06",
|
||||
),
|
||||
"task_support": KnobSpec(
|
||||
"task_support",
|
||||
1,
|
||||
"cued_recognition",
|
||||
"Blank-recall vs cued-recognition with adjacent suggestions (Bowler)",
|
||||
"enum:blank_recall|cued_recognition",
|
||||
"AUTIST-07",
|
||||
),
|
||||
"interest_boost": KnobSpec(
|
||||
"interest_boost",
|
||||
1,
|
||||
0.0,
|
||||
"Salience amplification adjacent to monotropism domains",
|
||||
"float_range:0.0..1.0",
|
||||
"AUTIST-09",
|
||||
),
|
||||
"inertia_awareness": KnobSpec(
|
||||
"inertia_awareness",
|
||||
1,
|
||||
False,
|
||||
"Ambient passive capture in high-inertia windows",
|
||||
"bool",
|
||||
"AUTIST-10",
|
||||
),
|
||||
"camouflaging_relaxation": KnobSpec(
|
||||
"camouflaging_relaxation",
|
||||
1,
|
||||
0.0,
|
||||
"Detect over-formal writing, gradually relax formality (Phase 1 live)",
|
||||
"float_range:0.0..1.0",
|
||||
"AUTIST-13",
|
||||
),
|
||||
"scene_construction_scaffold": KnobSpec(
|
||||
"scene_construction_scaffold",
|
||||
1,
|
||||
True,
|
||||
"Scene-construction scaffold intensity for episodic encoding",
|
||||
"bool",
|
||||
"AUTIST-14",
|
||||
),
|
||||
# D5-06: 15th sealed knob (operator-facing, not autistic-kernel).
|
||||
# wake_depth drives session-start payload size. minimal (default) = ≤30 raw
|
||||
# tok pointer handle (lazy; brain stays server-side); standard = Phase-1
|
||||
# 1388 tok eager dump (back-compat per D5-10); deep = ≤2000 tok expanded
|
||||
# rich_club. Set via existing profile_get_set tool; no new MCP surface.
|
||||
"wake_depth": KnobSpec(
|
||||
"wake_depth",
|
||||
1, # phase — live in (counts toward PHASE_1_LIVE)
|
||||
"minimal",
|
||||
(
|
||||
"Session-start payload size: minimal=<=30 raw (lazy, default), "
|
||||
"standard=Phase-1 eager (back-compat), deep=<=2000 (full)"
|
||||
),
|
||||
"enum:minimal|standard|deep",
|
||||
"MCP-12",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
PHASE_1_LIVE: frozenset[str] = frozenset(
|
||||
{name for name, spec in PROFILE_KNOBS.items() if spec.phase == 1}
|
||||
)
|
||||
PHASE_2_DEFERRED: frozenset[str] = frozenset(
|
||||
{name for name, spec in PROFILE_KNOBS.items() if spec.phase == 2}
|
||||
)
|
||||
PHASE_3_DEFERRED: frozenset[str] = frozenset(
|
||||
{name for name, spec in PROFILE_KNOBS.items() if spec.phase == 3}
|
||||
)
|
||||
|
||||
|
||||
# Plan 07.12-02: 11-knob shape is load-bearing. Enforced at import time.
|
||||
# History:
|
||||
# - flipped the 9 Phase-2 knobs to phase=1 (PHASE_1_LIVE=13).
|
||||
# - FLIPPED camouflaging_relaxation to phase=1 (PHASE_1_LIVE=14).
|
||||
# - APPENDS wake_depth as the 15th sealed knob (PHASE_1_LIVE=15).
|
||||
# - Plan 07.12-02 REMOVES 4 dead KnobSpec entries (AUTIST-02 sensory,
|
||||
# event_vs_time_cue, alexithymia, double_empathy).
|
||||
# Final shape: 10 AUTIST + 1 wake_depth = 11 sealed knobs.
|
||||
assert len(PROFILE_KNOBS) == 11, (
|
||||
"Plan 07.12-02: 10 autistic-kernel knobs + wake_depth = 11 sealed entries"
|
||||
)
|
||||
assert len(PHASE_1_LIVE) == 11, (
|
||||
"Plan 07.12-02: 10 autistic-kernel knobs + wake_depth are live"
|
||||
)
|
||||
assert len(PHASE_2_DEFERRED) == 0, "Plan 02-03 empties PHASE_2_DEFERRED"
|
||||
assert len(PHASE_3_DEFERRED) == 0, "PHASE_3_DEFERRED emptied"
|
||||
|
||||
|
||||
# Bayesian signal weights (Plan 02-03 LEARN-01)
|
||||
SIGNAL_WEIGHT: dict[str, float] = {
|
||||
"implicit": 0.3,
|
||||
"inferred": 0.5,
|
||||
"explicit": 1.0,
|
||||
}
|
||||
|
||||
|
||||
# profile sentinel UUID -- target node for every profile_modulates edge.
|
||||
# Deterministic so the edges table can be scanned without a side table. The
|
||||
# UUID is ff-nonsense so no record ever collides with it.
|
||||
PROFILE_SENTINEL_UUID_STR = "00000000-0000-0000-0000-0000000000f1"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- state
|
||||
def default_state() -> dict[str, Any]:
|
||||
"""Initial per-process state: the live knobs with defaults.
|
||||
|
||||
Deferred knobs do not appear in state because profile_set rejects them;
|
||||
profile_get on a deferred knob returns status/phase/requirement_id directly
|
||||
from the registry.
|
||||
"""
|
||||
return {
|
||||
name: spec.default
|
||||
for name, spec in PROFILE_KNOBS.items()
|
||||
if spec.phase == 1
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- validation
|
||||
def _validate(schema: str, value: Any) -> tuple[bool, str]:
|
||||
"""Return (ok, reason). Reason empty on success.
|
||||
|
||||
extends the validators to support `dict:<keytype>:<valuetype>`
|
||||
via recursive per-key validation. Unknown schemas (typos) are rejected.
|
||||
"""
|
||||
if schema == "bool":
|
||||
# Note: `isinstance(True, int)` is True in Python, so check bool first.
|
||||
if isinstance(value, bool):
|
||||
return True, ""
|
||||
return False, f"value must be bool, got {type(value).__name__}"
|
||||
|
||||
if schema.startswith("enum:"):
|
||||
allowed = schema[len("enum:"):].split("|")
|
||||
if value in allowed:
|
||||
return True, ""
|
||||
return False, f"value {value!r} not in enum {allowed}"
|
||||
|
||||
if schema.startswith("int_range:"):
|
||||
bounds = schema[len("int_range:"):]
|
||||
try:
|
||||
lo_s, hi_s = bounds.split("..")
|
||||
lo, hi = int(lo_s), int(hi_s)
|
||||
except (ValueError, TypeError):
|
||||
return False, f"malformed int_range schema {schema!r}"
|
||||
if isinstance(value, bool):
|
||||
return False, "value must be int, got bool"
|
||||
if not isinstance(value, int):
|
||||
return False, f"value must be int, got {type(value).__name__}"
|
||||
if value < lo or value > hi:
|
||||
return False, f"value {value} out of range [{lo}, {hi}]"
|
||||
return True, ""
|
||||
|
||||
if schema.startswith("float_range:"):
|
||||
bounds = schema[len("float_range:"):]
|
||||
try:
|
||||
lo_s, hi_s = bounds.split("..")
|
||||
lo, hi = float(lo_s), float(hi_s)
|
||||
except (ValueError, TypeError):
|
||||
return False, f"malformed float_range schema {schema!r}"
|
||||
if isinstance(value, bool):
|
||||
return False, "value must be float, got bool"
|
||||
if not isinstance(value, (int, float)):
|
||||
return False, f"value must be float, got {type(value).__name__}"
|
||||
v = float(value)
|
||||
if v < lo or v > hi:
|
||||
return False, f"value {v} out of range [{lo}, {hi}]"
|
||||
return True, ""
|
||||
|
||||
if schema.startswith("dict:"):
|
||||
body = schema[len("dict:"):]
|
||||
key_type, _, val_type = body.partition(":")
|
||||
if not val_type:
|
||||
return False, f"malformed dict schema {schema!r}"
|
||||
if not isinstance(value, dict):
|
||||
return False, f"value must be dict, got {type(value).__name__}"
|
||||
for k, v in value.items():
|
||||
if key_type == "str" and not isinstance(k, str):
|
||||
return False, f"dict key must be str, got {type(k).__name__}"
|
||||
ok, reason = _validate(val_type, v)
|
||||
if not ok:
|
||||
return False, f"in key {k!r}: {reason}"
|
||||
return True, ""
|
||||
|
||||
# Unknown schema -> reject (covers accidental typos in KnobSpec.value_schema).
|
||||
return False, f"unknown value_schema {schema!r}"
|
||||
|
||||
|
||||
# ------------------------------------------------------------- public surface
|
||||
def profile_get(knob: str | None, state: dict[str, Any]) -> dict:
|
||||
"""Read a knob (or the full registry surface).
|
||||
|
||||
- knob=None -> full registry: {live: {11}, deferred: {0}, total_knobs: 11}.
|
||||
- knob in PHASE_1_LIVE -> {"knob": n, "value": state[n]}.
|
||||
- knob in deferred (P3) -> status/phase/requirement_id payload.
|
||||
- unknown knob -> {"knob": n, "status": "unknown"}.
|
||||
|
||||
Plan 07.12-02: total_knobs is 11 (10 AUTIST + wake_depth) after AUTIST-02/08/11/12 removal.
|
||||
"""
|
||||
if knob is None:
|
||||
live = {
|
||||
n: state.get(n, PROFILE_KNOBS[n].default)
|
||||
for n in sorted(PHASE_1_LIVE)
|
||||
}
|
||||
deferred = {}
|
||||
for n in sorted(PHASE_2_DEFERRED | PHASE_3_DEFERRED):
|
||||
spec = PROFILE_KNOBS[n]
|
||||
deferred[n] = {
|
||||
"status": "not-yet-implemented",
|
||||
"phase": spec.phase,
|
||||
"requirement_id": spec.requirement_id,
|
||||
"description": spec.description,
|
||||
}
|
||||
return {"live": live, "deferred": deferred, "total_knobs": 11}
|
||||
|
||||
if knob in PHASE_1_LIVE:
|
||||
spec = PROFILE_KNOBS[knob]
|
||||
return {"knob": knob, "value": state.get(knob, spec.default)}
|
||||
|
||||
if knob in PROFILE_KNOBS:
|
||||
spec = PROFILE_KNOBS[knob]
|
||||
return {
|
||||
"knob": knob,
|
||||
"status": "not-yet-implemented",
|
||||
"phase": spec.phase,
|
||||
"requirement_id": spec.requirement_id,
|
||||
}
|
||||
|
||||
return {"knob": knob, "status": "unknown"}
|
||||
|
||||
|
||||
def profile_set(
|
||||
knob: str,
|
||||
value: Any,
|
||||
state: dict[str, Any],
|
||||
*,
|
||||
store: "object | None" = None,
|
||||
) -> dict:
|
||||
"""Write a live knob. Rejects unknown/deferred/invalid-value writes.
|
||||
|
||||
Rule priority:
|
||||
1. unknown knob -> {"status": "error", "reason": "unknown knob"}
|
||||
2. Phase-2 knob -> {"status": "error", "reason": "deferred to Phase 2"}
|
||||
(Plan 02-03 empties this set but the branch is retained for safety.)
|
||||
3. Phase-3 knob -> {"status": "error", "reason": "deferred to Phase 3"}
|
||||
4. schema fail -> {"status": "error", "reason": <validator message>}
|
||||
5. success -> mutates state; returns {"status": "ok", knob, value}
|
||||
|
||||
(M4 LIVE prerequisite): when ``store`` is provided AND the
|
||||
write actually changes the value, emit ``kind='profile_updated'`` so
|
||||
M4 profile-variance can be computed live. No-op writes (old == new) do
|
||||
NOT emit (avoid event flood). The ``store`` kwarg is optional so old
|
||||
callers (e.g. core.dispatch profile_set branch) keep working unchanged.
|
||||
"""
|
||||
if knob not in PROFILE_KNOBS:
|
||||
return {"status": "error", "reason": "unknown knob", "knob": knob}
|
||||
|
||||
spec = PROFILE_KNOBS[knob]
|
||||
if spec.phase == 2:
|
||||
return {
|
||||
"status": "error",
|
||||
"reason": "deferred to Phase 2",
|
||||
"knob": knob,
|
||||
"requirement_id": spec.requirement_id,
|
||||
}
|
||||
if spec.phase == 3:
|
||||
return {
|
||||
"status": "error",
|
||||
"reason": "deferred to Phase 3",
|
||||
"knob": knob,
|
||||
"requirement_id": spec.requirement_id,
|
||||
}
|
||||
|
||||
ok, reason = _validate(spec.value_schema, value)
|
||||
if not ok:
|
||||
return {
|
||||
"status": "error",
|
||||
"reason": reason,
|
||||
"knob": knob,
|
||||
"schema": spec.value_schema,
|
||||
}
|
||||
|
||||
old_value = state.get(knob, spec.default)
|
||||
state[knob] = value
|
||||
|
||||
# M4 LIVE: emit only on actual change to avoid no-op flood.
|
||||
if store is not None and old_value != value:
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
from iai_mcp.events import write_event
|
||||
write_event(
|
||||
store,
|
||||
kind="profile_updated",
|
||||
data={
|
||||
"knob": knob,
|
||||
"old": old_value,
|
||||
"new": value,
|
||||
"requirement_id": spec.requirement_id,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
except Exception:
|
||||
# Diagnostic only: never block the profile_set on emit failure.
|
||||
pass
|
||||
|
||||
return {"status": "ok", "knob": knob, "value": value}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- Bayesian
|
||||
|
||||
|
||||
def bayesian_update(
|
||||
knob: str,
|
||||
signal: str,
|
||||
observed: Any,
|
||||
state: dict,
|
||||
posterior: dict,
|
||||
) -> tuple[Any, dict]:
|
||||
"""D-20 weighted-ensemble posterior update on a knob value.
|
||||
|
||||
Conjugate-prior form per schema type:
|
||||
- bool -> Beta(alpha, beta); alpha += w*obs, beta += w*(1-obs)
|
||||
New value is the Beta mode (alpha > beta -> True).
|
||||
- enum -> Dirichlet(alphas); alphas[obs] += w
|
||||
New value is argmax(alphas).
|
||||
- float_range -> Normal mean via weighted running average
|
||||
- int_range -> rounded weighted running average
|
||||
- dict:... -> per-key recursive update (observed must also be a dict)
|
||||
|
||||
Returns (new_value, new_posterior). `posterior` is a dict keyed by knob
|
||||
name with an internal per-knob sub-dict carrying alpha/beta/alphas/mean/n.
|
||||
"""
|
||||
w = SIGNAL_WEIGHT.get(signal, 0.0)
|
||||
if w == 0.0:
|
||||
return state.get(knob, PROFILE_KNOBS[knob].default if knob in PROFILE_KNOBS else None), posterior
|
||||
|
||||
spec = PROFILE_KNOBS.get(knob)
|
||||
if spec is None:
|
||||
return state.get(knob), posterior
|
||||
|
||||
sch = spec.value_schema
|
||||
p = dict(posterior)
|
||||
kp = dict(p.get(knob, {}))
|
||||
|
||||
current = state.get(knob, spec.default)
|
||||
|
||||
if sch == "bool":
|
||||
alpha = float(kp.get("alpha", 1.0))
|
||||
beta = float(kp.get("beta", 1.0))
|
||||
if observed is True:
|
||||
alpha += w
|
||||
elif observed is False:
|
||||
beta += w
|
||||
else:
|
||||
# Invalid observation for bool; degrade silently.
|
||||
return current, p
|
||||
kp["alpha"] = alpha
|
||||
kp["beta"] = beta
|
||||
new_value = alpha >= beta
|
||||
elif sch.startswith("enum:"):
|
||||
allowed = sch[len("enum:"):].split("|")
|
||||
alphas: dict[str, float] = dict(kp.get("alphas", {}))
|
||||
if observed not in allowed:
|
||||
return current, p
|
||||
alphas[observed] = alphas.get(observed, 1.0) + w
|
||||
kp["alphas"] = alphas
|
||||
# Seed with current as implicit prior boost if no entries yet.
|
||||
if current in allowed and current not in alphas:
|
||||
alphas[current] = alphas.get(current, 1.0) + 0.001
|
||||
new_value = max(alphas.keys(), key=lambda k: alphas[k])
|
||||
elif sch.startswith("float_range:"):
|
||||
# Weighted running mean.
|
||||
try:
|
||||
obs_f = float(observed)
|
||||
except (TypeError, ValueError):
|
||||
return current, p
|
||||
prev_sum = float(kp.get("weighted_sum", float(current) if isinstance(current, (int, float)) else 0.0))
|
||||
prev_wts = float(kp.get("total_weight", 0.0))
|
||||
new_sum = prev_sum + w * obs_f
|
||||
new_wts = prev_wts + w
|
||||
mean = new_sum / new_wts if new_wts > 0 else obs_f
|
||||
# Clamp to the schema range.
|
||||
bounds = sch[len("float_range:"):]
|
||||
lo_s, hi_s = bounds.split("..")
|
||||
lo, hi = float(lo_s), float(hi_s)
|
||||
mean = max(lo, min(hi, mean))
|
||||
kp["weighted_sum"] = new_sum
|
||||
kp["total_weight"] = new_wts
|
||||
kp["mean"] = mean
|
||||
new_value = mean
|
||||
elif sch.startswith("int_range:"):
|
||||
try:
|
||||
obs_f = float(observed)
|
||||
except (TypeError, ValueError):
|
||||
return current, p
|
||||
prev_sum = float(kp.get("weighted_sum", float(current) if isinstance(current, (int, float)) else 0.0))
|
||||
prev_wts = float(kp.get("total_weight", 0.0))
|
||||
new_sum = prev_sum + w * obs_f
|
||||
new_wts = prev_wts + w
|
||||
mean = new_sum / new_wts if new_wts > 0 else obs_f
|
||||
bounds = sch[len("int_range:"):]
|
||||
lo_s, hi_s = bounds.split("..")
|
||||
lo, hi = int(lo_s), int(hi_s)
|
||||
new_value = max(lo, min(hi, int(round(mean))))
|
||||
kp["weighted_sum"] = new_sum
|
||||
kp["total_weight"] = new_wts
|
||||
kp["mean"] = mean
|
||||
elif sch.startswith("dict:"):
|
||||
# Per-key recursive update. `observed` must be dict-of-same-shape.
|
||||
if not isinstance(observed, dict):
|
||||
return current, p
|
||||
body = sch[len("dict:"):]
|
||||
_key_type, _, val_type = body.partition(":")
|
||||
per_key_posts: dict[str, dict] = dict(kp.get("per_key", {}))
|
||||
current_dict: dict = dict(current) if isinstance(current, dict) else {}
|
||||
for k, v in observed.items():
|
||||
# Mini-recursion: synthesise a float-style update for the inner value.
|
||||
sub_spec = val_type
|
||||
sub_kp = dict(per_key_posts.get(k, {}))
|
||||
if sub_spec.startswith("float_range:"):
|
||||
try:
|
||||
obs_f = float(v)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
prev_sum = float(sub_kp.get("weighted_sum", float(current_dict.get(k, 0.0))))
|
||||
prev_wts = float(sub_kp.get("total_weight", 0.0))
|
||||
new_sum = prev_sum + w * obs_f
|
||||
new_wts = prev_wts + w
|
||||
mean = new_sum / new_wts if new_wts > 0 else obs_f
|
||||
bounds = sub_spec[len("float_range:"):]
|
||||
lo_s, hi_s = bounds.split("..")
|
||||
lo, hi = float(lo_s), float(hi_s)
|
||||
mean = max(lo, min(hi, mean))
|
||||
sub_kp["weighted_sum"] = new_sum
|
||||
sub_kp["total_weight"] = new_wts
|
||||
sub_kp["mean"] = mean
|
||||
per_key_posts[k] = sub_kp
|
||||
current_dict[k] = mean
|
||||
kp["per_key"] = per_key_posts
|
||||
new_value = current_dict
|
||||
else:
|
||||
return current, p
|
||||
|
||||
p[knob] = kp
|
||||
state[knob] = new_value
|
||||
return new_value, p
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- gain
|
||||
|
||||
|
||||
def profile_modulation_for_record(
|
||||
record,
|
||||
profile_state: dict,
|
||||
*,
|
||||
knobs_applied: dict | None = None,
|
||||
) -> dict[str, float]:
|
||||
"""Compute edge-weight gain dict for a record.
|
||||
|
||||
Returned gains are multiplicative (>=1.0 means amplify, <1.0 means damp).
|
||||
Keys match the knob name. Empty dict means no active modulation.
|
||||
|
||||
Current gain sources:
|
||||
- `monotropism_depth`: gain = 1.0 + depth for the record's domain tag.
|
||||
- `interest_boost`: gain = 1.0 + boost (amplifies every record).
|
||||
- `dunn_quadrant`: seeking -> 1.2, avoiding -> 0.8, else no entry.
|
||||
- `special_interest_amplification`: extension (no-op here).
|
||||
|
||||
The record's own `profile_modulation_gain` dict is NOT mutated here; the
|
||||
caller (pipeline_recall) copies the gains onto the record cache after
|
||||
computing them.
|
||||
|
||||
Phase 07.12-03: when ``knobs_applied`` is provided (a dict), records
|
||||
/ / provenance strings into it whenever
|
||||
the corresponding gain branch fires. The accumulator is owned by the
|
||||
caller (typically core.dispatch); this function mutates it in place,
|
||||
pass-by-reference — never reassigns, never returns it.
|
||||
|
||||
BLOCKER 3 (CONTEXT D-04, 2026-04-30): provenance strings MUST contain
|
||||
'profile.py' so the production-path integration test can prove the
|
||||
upstream-gains accumulator is wired in this file (not stubbed elsewhere).
|
||||
Back-compat: callers that don't pass the kwarg behave exactly as before.
|
||||
"""
|
||||
gains: dict[str, float] = {}
|
||||
|
||||
# Monotropism depth per domain tag.
|
||||
md = profile_state.get("monotropism_depth", {})
|
||||
if isinstance(md, dict) and md:
|
||||
for tag in (record.tags or []):
|
||||
if tag.startswith("domain:"):
|
||||
dom = tag.split(":", 1)[1]
|
||||
if dom in md:
|
||||
depth = md[dom]
|
||||
try:
|
||||
gains["monotropism_depth"] = 1.0 + float(depth)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
if knobs_applied is not None:
|
||||
knobs_applied["AUTIST-01"] = (
|
||||
"profile.py:profile_modulation_for_record:monotropism_depth"
|
||||
)
|
||||
break
|
||||
|
||||
# Interest boost amplifies any record. (verified line range: 613-616)
|
||||
ib = profile_state.get("interest_boost", 0.0)
|
||||
try:
|
||||
if float(ib) > 0:
|
||||
gains["interest_boost"] = 1.0 + float(ib)
|
||||
if knobs_applied is not None:
|
||||
knobs_applied["AUTIST-09"] = (
|
||||
"profile.py:profile_modulation_for_record:interest_boost"
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Dunn quadrant posture. (verified line range: 621-625)
|
||||
dq = profile_state.get("dunn_quadrant")
|
||||
if dq == "seeking":
|
||||
gains["dunn_quadrant"] = 1.2
|
||||
if knobs_applied is not None:
|
||||
knobs_applied["AUTIST-03"] = (
|
||||
"profile.py:profile_modulation_for_record:dunn_quadrant=seeking"
|
||||
)
|
||||
elif dq == "avoiding":
|
||||
gains["dunn_quadrant"] = 0.8
|
||||
if knobs_applied is not None:
|
||||
knobs_applied["AUTIST-03"] = (
|
||||
"profile.py:profile_modulation_for_record:dunn_quadrant=avoiding"
|
||||
)
|
||||
|
||||
return gains
|
||||
399
src/iai_mcp/provenance_queue.py
Normal file
399
src/iai_mcp/provenance_queue.py
Normal file
|
|
@ -0,0 +1,399 @@
|
|||
"""Plan 05-14 — async provenance write queue (OPS-10 / M-02).
|
||||
|
||||
Moves provenance writes off the recall critical path. A single daemon
|
||||
thread drains a bounded queue.Queue of (record_id, entry) pairs and
|
||||
flushes them via the existing ``MemoryStore.append_provenance_batch``
|
||||
exactly as the sync path did.
|
||||
|
||||
Why this is the right shape:
|
||||
- provenance writes are pure SIDE EFFECTS; pipeline_recall never reads
|
||||
their result. Textbook fire-and-forget candidate.
|
||||
- The biological analogue: consolidation writes happen during rest, not
|
||||
during retrieval (CLS / sleep replay).
|
||||
- The existing ``AsyncWriteQueue`` is for record inserts,
|
||||
which must be durable before their return (S4 viability check reads
|
||||
them back). Provenance has no such contract — a simpler, purpose-built
|
||||
queue avoids the coroutine/event-loop machinery that asyncio imposes.
|
||||
|
||||
Constitutional fences:
|
||||
- Rule 1: worker swallows all exceptions (recall must never fail due
|
||||
to a provenance-write failure).
|
||||
- entries are never dropped during normal operation; on shutdown
|
||||
the atexit hook drains the queue. W1/when the
|
||||
in-memory queue is full under overload, batches are spilled to
|
||||
``~/.iai-mcp/.provenance-overflow/<unix_ms>-<n>.jsonl``. The worker
|
||||
drains the spill dir on idle and re-enqueues the batches. Zero drops
|
||||
on the happy path; the only path that can drop is disk-write failure
|
||||
(alarmed via the ``provenance_queue_spill_failed`` stderr event).
|
||||
- C3 / C6: stdlib only. No extra dependencies.
|
||||
|
||||
Python 3.11+.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import json
|
||||
import queue
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from uuid import UUID
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
# Sentinel pushed on the queue to wake the worker for stop/flush.
|
||||
_STOP = object()
|
||||
_FLUSH = object()
|
||||
|
||||
# W1/D-01 — overflow spill-to-disk.
|
||||
OVERFLOW_DIR_NAME = ".provenance-overflow"
|
||||
# Worker idle poll: 5s upper bound on overflow-drain responsiveness.
|
||||
# Bounded so under sustained overload the spill drain catches up
|
||||
# within a small constant time after _q clears.
|
||||
_WORKER_IDLE_POLL_S = 5.0
|
||||
|
||||
|
||||
class ProvenanceWriteQueue:
|
||||
"""Single-daemon-thread coalescing queue for provenance batches.
|
||||
|
||||
Usage:
|
||||
q = ProvenanceWriteQueue(store, coalesce_ms=50)
|
||||
q.start() # idempotent
|
||||
q.enqueue([(record_id, entry_dict), ...]) # non-blocking
|
||||
q.flush(timeout=2.0) # drain + wait
|
||||
q.stop() # drain + join
|
||||
|
||||
The worker loop:
|
||||
1. Blocking `.get()` on the queue (wakes on enqueue or sentinel).
|
||||
2. Opportunistic drain up to ``max_batch_pairs`` pairs OR until
|
||||
the queue has been empty for ``coalesce_ms``.
|
||||
3. Single call to ``store.append_provenance_batch(pairs,
|
||||
records_cache=None)``.
|
||||
4. Back to (1).
|
||||
|
||||
All worker exceptions are logged to stderr as structured JSON events
|
||||
and swallowed.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: "MemoryStore",
|
||||
*,
|
||||
coalesce_ms: int = 50,
|
||||
max_queue_size: int = 4096,
|
||||
max_batch_pairs: int = 256,
|
||||
) -> None:
|
||||
self._store = store
|
||||
self._coalesce_s = max(1, int(coalesce_ms)) / 1000.0
|
||||
self._max_batch = int(max_batch_pairs)
|
||||
# Queue items are either lists of (UUID, dict) pairs or the
|
||||
# _STOP / _FLUSH sentinels.
|
||||
self._q: queue.Queue = queue.Queue(maxsize=int(max_queue_size))
|
||||
self._thread: threading.Thread | None = None
|
||||
self._started = False
|
||||
self._stop_requested = False
|
||||
# flush synchronisation: drained_event is set by the worker when
|
||||
# it has processed everything up to a _FLUSH sentinel.
|
||||
self._flush_event = threading.Event()
|
||||
self._atexit_registered = False
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# ------------------------------------------------------------------ lifecycle
|
||||
|
||||
def start(self) -> None:
|
||||
"""Start the worker thread. Idempotent."""
|
||||
with self._lock:
|
||||
if self._started:
|
||||
return
|
||||
self._started = True
|
||||
self._stop_requested = False
|
||||
self._thread = threading.Thread(
|
||||
target=self._run,
|
||||
name="iai-mcp-provenance-queue",
|
||||
daemon=True,
|
||||
)
|
||||
self._thread.start()
|
||||
if not self._atexit_registered:
|
||||
atexit.register(self._atexit_flush)
|
||||
self._atexit_registered = True
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Signal the worker, drain remaining items, join the thread.
|
||||
|
||||
Idempotent. After stop the queue is no longer usable; call
|
||||
start() to revive (fresh worker, same queue instance).
|
||||
"""
|
||||
with self._lock:
|
||||
if not self._started:
|
||||
return
|
||||
self._stop_requested = True
|
||||
try:
|
||||
self._q.put_nowait(_STOP)
|
||||
except queue.Full:
|
||||
# Drop one item to make room for the sentinel.
|
||||
try:
|
||||
self._q.get_nowait()
|
||||
self._q.put_nowait(_STOP)
|
||||
except queue.Empty:
|
||||
pass
|
||||
t = self._thread
|
||||
if t is not None:
|
||||
t.join(timeout=5.0)
|
||||
with self._lock:
|
||||
self._started = False
|
||||
self._thread = None
|
||||
|
||||
def flush(self, timeout: float = 2.0) -> None:
|
||||
"""Wait until the worker has drained everything enqueued so far.
|
||||
|
||||
Puts a _FLUSH sentinel; the worker signals _flush_event once it
|
||||
has processed all pairs that were in the queue at that point.
|
||||
Times out silently — the caller is responsible for deciding
|
||||
whether to retry; recall latency is never blocked by flush().
|
||||
"""
|
||||
if not self._started:
|
||||
return
|
||||
self._flush_event.clear()
|
||||
try:
|
||||
self._q.put(_FLUSH, timeout=timeout)
|
||||
except queue.Full:
|
||||
return
|
||||
self._flush_event.wait(timeout=timeout)
|
||||
|
||||
# ---------------------------------------------------------------- public write
|
||||
|
||||
def enqueue(self, pairs: "list[tuple[UUID, dict]]") -> None:
|
||||
"""Non-blocking enqueue.
|
||||
|
||||
W1/when the in-memory queue is full, the batch
|
||||
spills to ``~/.iai-mcp/.provenance-overflow/<ts>-<n>.jsonl``.
|
||||
The worker thread drains the spill dir on idle and re-enqueues
|
||||
the batches. zero drops under overload (only path that
|
||||
can drop is disk-write failure, which is itself alarmed).
|
||||
"""
|
||||
if not pairs:
|
||||
return
|
||||
try:
|
||||
self._q.put_nowait(list(pairs))
|
||||
return
|
||||
except queue.Full:
|
||||
pass
|
||||
# In-memory queue full — spill to disk. Worker will pick this
|
||||
# up on its next idle cycle. Recall hot path is unaffected
|
||||
# (this branch only fires on the WRITE side under overload).
|
||||
self._spill_to_disk(list(pairs))
|
||||
try:
|
||||
sys.stderr.write(
|
||||
'{"event":"provenance_queue_overflow_spill","n_pairs":'
|
||||
+ str(len(pairs))
|
||||
+ "}\n"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ---------------------------------------------------------------- spill / drain
|
||||
|
||||
def _spill_to_disk(self, pairs: list) -> None:
|
||||
"""Persist a rejected batch to ``~/.iai-mcp/.provenance-overflow/``.
|
||||
|
||||
Per-batch JSONL file: one line per (uuid_str, entry_dict) pair.
|
||||
File-level atomicity — the worker re-enqueues the entire file's
|
||||
contents in one call, then unlinks. Format:
|
||||
|
||||
{"id": "<uuid>", "entry": {...}}\n
|
||||
{"id": "<uuid>", "entry": {...}}\n
|
||||
|
||||
Failure modes:
|
||||
- Disk full / permission denied: emits structured stderr event
|
||||
``provenance_queue_spill_failed``. This is the ONLY drop path
|
||||
remaining post-07.9 W1; it's a system-level alarm condition,
|
||||
not a normal-operation outcome.
|
||||
"""
|
||||
if not pairs:
|
||||
return
|
||||
try:
|
||||
overflow_dir = Path.home() / ".iai-mcp" / OVERFLOW_DIR_NAME
|
||||
overflow_dir.mkdir(parents=True, exist_ok=True)
|
||||
ts_ms = int(time.time() * 1000)
|
||||
# Tag with the batch length and a short pid suffix so two
|
||||
# spills inside the same millisecond never collide.
|
||||
fpath = overflow_dir / f"{ts_ms}-{len(pairs)}-{id(pairs) & 0xFFFF:04x}.jsonl"
|
||||
tmp_path = fpath.with_suffix(fpath.suffix + ".tmp")
|
||||
with tmp_path.open("w", encoding="utf-8") as fh:
|
||||
for rid, entry in pairs:
|
||||
fh.write(json.dumps({"id": str(rid), "entry": entry}) + "\n")
|
||||
tmp_path.rename(fpath) # atomic rename keeps drain from
|
||||
# ever reading a half-written file.
|
||||
except Exception as exc:
|
||||
try:
|
||||
sys.stderr.write(
|
||||
'{"event":"provenance_queue_spill_failed","error":'
|
||||
+ _json_str(str(exc))
|
||||
+ ',"n_pairs":' + str(len(pairs)) + '}\n'
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _drain_overflow_dir(self) -> int:
|
||||
"""Re-enqueue any spilled batches into ``_q``.
|
||||
|
||||
Called by the worker on idle (between blocking `_q.get()` cycles).
|
||||
Per-file atomicity: re-enqueue ALL pairs from a file via a single
|
||||
``_q.put`` call, then unlink. If ``_q`` is still full, leave the
|
||||
file on disk for the next idle cycle.
|
||||
|
||||
Returns the number of pairs successfully re-enqueued in this pass.
|
||||
"""
|
||||
overflow_dir = Path.home() / ".iai-mcp" / OVERFLOW_DIR_NAME
|
||||
if not overflow_dir.exists():
|
||||
return 0
|
||||
n_re_enqueued = 0
|
||||
# sorted() so older spill files drain first (FIFO durability).
|
||||
for fpath in sorted(overflow_dir.glob("*.jsonl")):
|
||||
try:
|
||||
pairs: list = []
|
||||
with fpath.open(encoding="utf-8") as fh:
|
||||
for line in fh:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
obj = json.loads(line)
|
||||
pairs.append((UUID(obj["id"]), obj["entry"]))
|
||||
if not pairs:
|
||||
fpath.unlink()
|
||||
continue
|
||||
# Short-timeout put: this is the worker thread, so
|
||||
# blocking briefly is fine, but a long block would
|
||||
# delay normal-path enqueues that arrive during drain.
|
||||
try:
|
||||
self._q.put(pairs, timeout=0.5)
|
||||
except queue.Full:
|
||||
# Queue still saturated — leave the file for the
|
||||
# next idle cycle. Don't unlink.
|
||||
return n_re_enqueued
|
||||
fpath.unlink()
|
||||
n_re_enqueued += len(pairs)
|
||||
except Exception as exc:
|
||||
# Malformed spill file: preserve evidence, do not lose data.
|
||||
try:
|
||||
failed = fpath.with_suffix(f".failed-{int(time.time())}.jsonl")
|
||||
fpath.rename(failed)
|
||||
sys.stderr.write(
|
||||
'{"event":"provenance_queue_spill_drain_failed","error":'
|
||||
+ _json_str(str(exc)) + '}\n'
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return n_re_enqueued
|
||||
|
||||
# ------------------------------------------------------------------ internals
|
||||
|
||||
def _run(self) -> None:
|
||||
"""Worker loop.
|
||||
|
||||
W1/between blocking `_q.get()` cycles the worker
|
||||
drains any spilled overflow files at ``~/.iai-mcp/.provenance-overflow/``.
|
||||
Bounded poll: idle-timeout = ``_WORKER_IDLE_POLL_S`` so the spill
|
||||
drain runs at most once per ``_WORKER_IDLE_POLL_S`` seconds when
|
||||
the queue is empty.
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
item = self._q.get(timeout=_WORKER_IDLE_POLL_S)
|
||||
except queue.Empty:
|
||||
# Idle tick — try to drain the overflow dir back into _q.
|
||||
# Defensive: any error during drain is logged + swallowed.
|
||||
try:
|
||||
self._drain_overflow_dir()
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
except Exception:
|
||||
continue
|
||||
if item is _STOP:
|
||||
# Drain remaining real items before exit.
|
||||
self._drain_remaining()
|
||||
return
|
||||
if item is _FLUSH:
|
||||
# Drain everything enqueued before this sentinel.
|
||||
self._drain_remaining()
|
||||
self._flush_event.set()
|
||||
continue
|
||||
# Normal batch. Coalesce: pull more pending items until we
|
||||
# hit max_batch_pairs or a short idle window.
|
||||
pairs: list = list(item)
|
||||
while len(pairs) < self._max_batch:
|
||||
try:
|
||||
nxt = self._q.get(timeout=self._coalesce_s)
|
||||
except queue.Empty:
|
||||
break
|
||||
if nxt is _STOP:
|
||||
# Flush what we have, then exit.
|
||||
self._flush_batch(pairs)
|
||||
self._drain_remaining()
|
||||
return
|
||||
if nxt is _FLUSH:
|
||||
self._flush_batch(pairs)
|
||||
self._drain_remaining()
|
||||
self._flush_event.set()
|
||||
pairs = []
|
||||
break
|
||||
pairs.extend(nxt)
|
||||
if pairs:
|
||||
self._flush_batch(pairs)
|
||||
|
||||
def _drain_remaining(self) -> None:
|
||||
"""Pull everything currently in the queue and flush as one batch."""
|
||||
pairs: list = []
|
||||
saw_flush = False
|
||||
while True:
|
||||
try:
|
||||
item = self._q.get_nowait()
|
||||
except queue.Empty:
|
||||
break
|
||||
if item is _STOP:
|
||||
continue
|
||||
if item is _FLUSH:
|
||||
saw_flush = True
|
||||
continue
|
||||
pairs.extend(item)
|
||||
if pairs:
|
||||
self._flush_batch(pairs)
|
||||
if saw_flush:
|
||||
self._flush_event.set()
|
||||
|
||||
def _flush_batch(self, pairs: list) -> None:
|
||||
"""Call store.append_provenance_batch, swallow all exceptions (Rule 1)."""
|
||||
if not pairs:
|
||||
return
|
||||
try:
|
||||
self._store.append_provenance_batch(pairs, records_cache=None)
|
||||
except Exception as exc:
|
||||
try:
|
||||
sys.stderr.write(
|
||||
'{"event":"provenance_queue_flush_failed","n_pairs":'
|
||||
+ str(len(pairs))
|
||||
+ ',"error":'
|
||||
+ _json_str(str(exc))
|
||||
+ "}\n"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _atexit_flush(self) -> None:
|
||||
"""atexit handler — drain and stop the worker. Never raises."""
|
||||
try:
|
||||
if self._started:
|
||||
self.flush(timeout=2.0)
|
||||
self.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _json_str(s: str) -> str:
|
||||
"""Minimal JSON string escape for stderr structured logs."""
|
||||
return '"' + s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + '"'
|
||||
145
src/iai_mcp/quiet_window.py
Normal file
145
src/iai_mcp/quiet_window.py
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
"""Phase 4 -- activity-learned quiet-window scheduler (DAEMON-03).
|
||||
|
||||
Learn the user's quiet window from their own `session_started` event history.
|
||||
48 buckets of 30-min granularity over a 7-day rolling window. Find the longest
|
||||
contiguous span where bucket activity < threshold. Min 3h, max 8h. Bootstrap
|
||||
when <7 days of data: trigger on 2h MCP idle. Re-learn every 24h.
|
||||
|
||||
Constitutional guard:
|
||||
- learned from events, NOT clock-based.
|
||||
- global-product mandate -- no Western 9-5 assumption, no baked-in
|
||||
local-time default. Respects nocturnal / shift / time-zone-mobile users.
|
||||
- C3: no LLM code, no paid-API env var reference in this module.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
# Bucket sizing.
|
||||
BUCKET_COUNT = 48 # 30-min * 48 = 24h
|
||||
BUCKET_MINUTES = 30
|
||||
|
||||
# Window bounds.
|
||||
MIN_WINDOW_HOURS = 3 # discard spans shorter than 3h
|
||||
MAX_WINDOW_HOURS = 8 # human sleep ceiling
|
||||
|
||||
# Learning / bootstrap parameters.
|
||||
MIN_DAYS_FOR_LEARN = 7
|
||||
BOOTSTRAP_IDLE_HOURS = 2 # fallback trigger when <7d data
|
||||
|
||||
# Scheduler cadence gates (used by daemon; exported for caller convenience).
|
||||
WIND_DOWN_GATE_MINUTES_BEFORE = 30 # dual-gate: within 30min of quiet start
|
||||
DIGEST_SHOW_THRESHOLD_HOURS = 18 # morning digest gating (re-exported by daemon_state)
|
||||
|
||||
|
||||
def learn_quiet_window(
|
||||
store: MemoryStore,
|
||||
now: datetime,
|
||||
tz: ZoneInfo,
|
||||
) -> Optional[tuple[int, int]]:
|
||||
"""Learn the user's quiet window from 7-day session_started history.
|
||||
|
||||
Returns (start_bucket, duration_buckets) in LOCAL time, or None if
|
||||
insufficient data / no contiguous quiet span (caller falls back to the
|
||||
bootstrap idle rule).
|
||||
|
||||
start_bucket: 0..BUCKET_COUNT-1 index into 30-min-bucket local-time day.
|
||||
duration_buckets: number of 30-min buckets in the quiet span (3h=6, 8h=16).
|
||||
"""
|
||||
since = now - timedelta(days=MIN_DAYS_FOR_LEARN)
|
||||
events = query_events(store, kind="session_started", since=since, limit=10000)
|
||||
if not events:
|
||||
return None
|
||||
|
||||
# Count sessions per 30-min local-time bucket + track unique days seen.
|
||||
counts = [0] * BUCKET_COUNT
|
||||
days_seen: set[tuple[int, int, int]] = set()
|
||||
for e in events:
|
||||
ts = e["ts"]
|
||||
# Pandas may surface a Timestamp -- coerce to aware datetime.
|
||||
if not isinstance(ts, datetime):
|
||||
try:
|
||||
ts = ts.to_pydatetime()
|
||||
except Exception:
|
||||
continue
|
||||
if ts.tzinfo is None:
|
||||
ts = ts.replace(tzinfo=timezone.utc)
|
||||
try:
|
||||
ts_local = ts.astimezone(tz)
|
||||
except Exception:
|
||||
# DST edge: astimezone is robust on stdlib, but guard anyway.
|
||||
continue
|
||||
bucket = (ts_local.hour * 60 + ts_local.minute) // BUCKET_MINUTES
|
||||
if 0 <= bucket < BUCKET_COUNT:
|
||||
counts[bucket] += 1
|
||||
days_seen.add((ts_local.year, ts_local.month, ts_local.day))
|
||||
|
||||
if len(days_seen) < MIN_DAYS_FOR_LEARN:
|
||||
return None # bootstrap path -- caller uses 2h-idle.
|
||||
|
||||
# Low-activity threshold = 20% of peak.
|
||||
peak = max(counts)
|
||||
if peak == 0:
|
||||
return None
|
||||
threshold = max(1, int(peak * 0.2))
|
||||
|
||||
# Longest contiguous circular span of sub-threshold buckets.
|
||||
# Double-array walk to handle wrap-around across local midnight.
|
||||
doubled = counts + counts
|
||||
best_start, best_len = 0, 0
|
||||
cur_start, cur_len = None, 0
|
||||
for i, c in enumerate(doubled):
|
||||
if c < threshold:
|
||||
if cur_start is None:
|
||||
cur_start = i
|
||||
cur_len = 1
|
||||
else:
|
||||
cur_len += 1
|
||||
if cur_len > best_len:
|
||||
best_start = cur_start
|
||||
best_len = cur_len
|
||||
else:
|
||||
cur_start, cur_len = None, 0
|
||||
|
||||
min_buckets = MIN_WINDOW_HOURS * (60 // BUCKET_MINUTES) # 6
|
||||
max_buckets = MAX_WINDOW_HOURS * (60 // BUCKET_MINUTES) # 16
|
||||
if best_len < min_buckets:
|
||||
# 24/7 user with no contiguous quiet span -> fallback to idle-only.
|
||||
return None
|
||||
duration = min(best_len, max_buckets)
|
||||
# Don't allow a span longer than a full day after wrap.
|
||||
if duration > BUCKET_COUNT:
|
||||
duration = BUCKET_COUNT
|
||||
return (best_start % BUCKET_COUNT, duration)
|
||||
|
||||
|
||||
def should_relearn(last_learned_at: Optional[datetime], now: datetime) -> bool:
|
||||
"""Re-learn cadence: 24h since last learn (D-04 24h adaptation)."""
|
||||
if last_learned_at is None:
|
||||
return True
|
||||
if last_learned_at.tzinfo is None:
|
||||
last_learned_at = last_learned_at.replace(tzinfo=timezone.utc)
|
||||
if now.tzinfo is None:
|
||||
now = now.replace(tzinfo=timezone.utc)
|
||||
return (now - last_learned_at) >= timedelta(hours=24)
|
||||
|
||||
|
||||
def should_bootstrap_trigger(last_session_ts: Optional[datetime], now: datetime) -> bool:
|
||||
"""Bootstrap idle trigger: daemon fires when no MCP session for 2h.
|
||||
|
||||
Used when `learn_quiet_window` returns None (insufficient data or 24/7
|
||||
user). Also used by the daemon as the always-on idle rule in addition to
|
||||
the learned quiet window.
|
||||
"""
|
||||
if last_session_ts is None:
|
||||
return True
|
||||
if last_session_ts.tzinfo is None:
|
||||
last_session_ts = last_session_ts.replace(tzinfo=timezone.utc)
|
||||
if now.tzinfo is None:
|
||||
now = now.replace(tzinfo=timezone.utc)
|
||||
return (now - last_session_ts) >= timedelta(hours=BOOTSTRAP_IDLE_HOURS)
|
||||
439
src/iai_mcp/response_decorator.py
Normal file
439
src/iai_mcp/response_decorator.py
Normal file
|
|
@ -0,0 +1,439 @@
|
|||
"""Plan 05-03 TOK-13 / D5-04 -- server-side profile knob decorator.
|
||||
|
||||
`apply_profile(response, profile)` mutates a response dict in place based on
|
||||
the 11 sealed profile knobs. Every per-knob helper is silent-fail so a
|
||||
malformed knob value can never break the response path.
|
||||
|
||||
C3 invariant (Plan 04): this module is pure-local Python. NO paid-API SDK
|
||||
import. NO API-key env read. The static grep guard
|
||||
`test_no_api_key_in_response_decorator` enforces the invariant at CI time.
|
||||
|
||||
TOK-13 contract: knob NAMES never cross the MCP wire. They are read from
|
||||
the per-process profile state, applied to the response here, and the
|
||||
result goes back over JSON-RPC free of any knob identifiers.
|
||||
|
||||
Helper layout (10 dispatch helpers — one per AUTIST knob the decorator
|
||||
mutates; wake_depth has no helper here, see end note):
|
||||
- _apply_formality_relaxation (AUTIST-13 camouflaging_relaxation)
|
||||
- _apply_monotropic_focus (AUTIST-01 monotropism_depth)
|
||||
- _apply_literal_preservation
|
||||
- _apply_masking_off
|
||||
- _apply_task_support
|
||||
- _apply_scene_construction
|
||||
- _apply_dunn_quadrant
|
||||
- _apply_pda_tolerance (AUTIST-05 demand_avoidance_tolerance)
|
||||
- _apply_interest_boost
|
||||
- _apply_inertia_awareness
|
||||
|
||||
(Phase 07.12-02 removed the dead-knob helpers
|
||||
_apply_sensory_channel_weights / _apply_alexithymia / _apply_double_empathy
|
||||
along with the orphan helpers _apply_verbosity_level / _apply_surface_language
|
||||
that read non-sealed-knob fields.)
|
||||
|
||||
wake_depth affects the session-start payload, not the response
|
||||
shape, so it gets no helper here.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
# Phase 07.12-03: HELPER_TO_KNOB_ID maps each apply_profile helper (and the
|
||||
# upstream-gains / session-start virtual keys) to its knob requirement ID.
|
||||
# Used by the dispatch loop to populate response['_knobs_applied'] with
|
||||
# file:symbol provenance for every helper invocation. After Phase 07.12-02
|
||||
# the table contains:
|
||||
# - 8 helper-keyed entries (the AUTIST helpers wired in apply_profile that
|
||||
# produce response-level mutations)
|
||||
# - 2 upstream-gains entries (AUTIST-03 dunn_quadrant, interest_boost)
|
||||
# — provenance strings are written by profile.py:profile_modulation_for_record;
|
||||
# the dispatch loop ignores these virtual keys (HELPER_TO_KNOB_ID.get(...)
|
||||
# returns None for them when keyed by helper name).
|
||||
# - 1 session-start entry (MCP-12 wake_depth) — provenance points into
|
||||
# session.py:assemble_session_start; written by core.dispatch.
|
||||
#
|
||||
# DO NOT re-add removed-knob keys (AUTIST-02 sensory_channel_weights,
|
||||
# event_vs_time_cue, alexithymia_accommodation,
|
||||
# double_empathy) — Plan 07.12-02 deleted them from the registry.
|
||||
HELPER_TO_KNOB_ID: dict[str, str] = {
|
||||
# --- helper-keyed entries (8) — recorded by the dispatch loop -----------
|
||||
"_apply_monotropic_focus": "AUTIST-01", # monotropism_depth
|
||||
"_apply_literal_preservation": "AUTIST-04", # literal_preservation
|
||||
"_apply_pda_tolerance": "AUTIST-05", # demand_avoidance_tolerance
|
||||
"_apply_masking_off": "AUTIST-06", # masking_off
|
||||
"_apply_task_support": "AUTIST-07", # task_support
|
||||
"_apply_inertia_awareness": "AUTIST-10", # inertia_awareness
|
||||
"_apply_formality_relaxation": "AUTIST-13", # camouflaging_relaxation
|
||||
"_apply_scene_construction": "AUTIST-14", # scene_construction_scaffold
|
||||
# --- upstream-gains entries (2) — recorded by profile.py via the kwarg --
|
||||
# These are virtual lookup keys (NOT helper names). The dispatch loop's
|
||||
# HELPER_TO_KNOB_ID.get(helper_name) returns None for the existing pass-
|
||||
# through helpers _apply_dunn_quadrant / _apply_interest_boost because
|
||||
# those helpers are NOT in this table — the AUTHORITATIVE provenance for
|
||||
# the gain is profile.py:profile_modulation_for_record:613-625, written
|
||||
# by the upstream accumulator.
|
||||
"dunn_quadrant": "AUTIST-03", # via profile.py:621-625
|
||||
"interest_boost": "AUTIST-09", # via profile.py:613-616
|
||||
# --- session-start entry (1) — recorded by core.dispatch ---------------
|
||||
# wake_depth is operator-facing; the seed entry is set in
|
||||
# core.dispatch when the session-start path runs. Provenance points
|
||||
# into session.py:373 (assemble_session_start: wake_depth = state.get(...)).
|
||||
"wake_depth": "MCP-12",
|
||||
}
|
||||
|
||||
|
||||
def apply_profile(response: dict, profile: dict) -> dict:
|
||||
"""Apply the 10 dispatch profile knobs to ``response`` in place.
|
||||
|
||||
Contract:
|
||||
- Returns the same response for chainability.
|
||||
- Never raises. Each per-knob helper has its own try/except AND the
|
||||
central dispatch wraps every helper call with an outer guard so a
|
||||
monkey-patched or mis-named helper cannot break the hot path.
|
||||
- Malformed profile state is tolerated (unexpected types, missing keys).
|
||||
- No MCP-side knob names are added to the response.
|
||||
|
||||
Phase 07.12-03 telemetry: emits response['_knobs_applied'] — a dict
|
||||
mapping knob requirement IDs (e.g., 'AUTIST-01') to deterministic
|
||||
file:symbol provenance strings. Future code-readers can audit, per
|
||||
response, which knobs actually mutated which fields. CONTEXT D-04.
|
||||
|
||||
The accumulator is preserved across upstream paths: any entries
|
||||
seeded by core.dispatch BEFORE apply_profile runs (typically the
|
||||
upstream-gains entries for / and the wake_depth
|
||||
seed for MCP-12) survive — the dispatch loop only ADDS entries via
|
||||
helper-keyed lookup, never overwrites the dict shape.
|
||||
"""
|
||||
if not isinstance(response, dict) or not isinstance(profile, dict):
|
||||
return response
|
||||
|
||||
# Phase 07.12-03 BLOCKER 3 fix: preserve any upstream-seeded entries.
|
||||
# core.dispatch seeds knobs_applied for / (via
|
||||
# profile_modulation_for_record) + wake_depth before this
|
||||
# function runs. We extend, never overwrite the dict reference held
|
||||
# by core.dispatch.
|
||||
pre_seeded = response.get("_knobs_applied")
|
||||
if isinstance(pre_seeded, dict):
|
||||
applied: dict[str, str] = pre_seeded
|
||||
else:
|
||||
applied = {}
|
||||
|
||||
# Outer guard per helper call — tolerates a helper that was monkey-patched
|
||||
# to raise (seen in test_pre_existing_keys_untouched_on_exception) or an
|
||||
# accidental helper rewrite that skips the inner try/except.
|
||||
for helper in (
|
||||
_apply_formality_relaxation,
|
||||
_apply_monotropic_focus,
|
||||
_apply_literal_preservation,
|
||||
_apply_masking_off,
|
||||
_apply_task_support,
|
||||
_apply_scene_construction,
|
||||
_apply_dunn_quadrant,
|
||||
_apply_pda_tolerance,
|
||||
_apply_interest_boost,
|
||||
_apply_inertia_awareness,
|
||||
):
|
||||
helper_raised = False
|
||||
try:
|
||||
helper(response, profile)
|
||||
except Exception:
|
||||
helper_raised = True # silent-fail per D5-04 — no audit entry
|
||||
if helper_raised:
|
||||
continue
|
||||
helper_name = helper.__name__
|
||||
knob_id = HELPER_TO_KNOB_ID.get(helper_name)
|
||||
if knob_id is None:
|
||||
# Unmapped helper (e.g., _apply_dunn_quadrant, _apply_interest_boost
|
||||
# — their provenance lives in profile.py via the upstream gains
|
||||
# accumulator). Skip rather than corrupt the audit.
|
||||
continue
|
||||
provenance = f"response_decorator.py:{helper_name}"
|
||||
# No-op markers for the three known mode-gate sites (CONTEXT D-04
|
||||
# line 167 — "consulted and chose to do nothing" vs "knob is dead").
|
||||
if helper_name == "_apply_pda_tolerance":
|
||||
mode = profile.get("demand_avoidance_tolerance", "collaborative")
|
||||
if mode == "neutral":
|
||||
provenance = f"{provenance}:no-op (mode=neutral)"
|
||||
elif helper_name == "_apply_inertia_awareness":
|
||||
if not profile.get("inertia_awareness", False):
|
||||
provenance = f"{provenance}:no-op (knob=False)"
|
||||
elif not response.get("first_turn_recall"):
|
||||
provenance = f"{provenance}:no-op (subsequent turn)"
|
||||
elif helper_name == "_apply_scene_construction":
|
||||
if not profile.get("scene_construction_scaffold", True):
|
||||
provenance = f"{provenance}:no-op (knob=False)"
|
||||
applied[knob_id] = provenance
|
||||
|
||||
response["_knobs_applied"] = applied
|
||||
# wake_depth is the operator-facing knob; it drives session-start payload
|
||||
# shape, not response content. No helper here by design (D5-04). Its
|
||||
# entry is seeded by core.dispatch before apply_profile runs.
|
||||
return response
|
||||
|
||||
|
||||
# ---------------------------------------------------------- per-knob helpers
|
||||
# Each helper MUST be wrapped in try/except Exception: pass — a malformed
|
||||
# profile knob value cannot break the hot recall path.
|
||||
|
||||
|
||||
def _apply_formality_relaxation(response: dict, profile: dict) -> None:
|
||||
"""AUTIST-13 camouflaging_relaxation > 0.5 -> rewrite surface_text toward
|
||||
informal register.
|
||||
|
||||
The transform here is intentionally minimal (just strips trailing
|
||||
"Sir"/"Madam" honorifics). The weekly pass owns the heavy lift; this
|
||||
hook ensures response-time consistency.
|
||||
"""
|
||||
try:
|
||||
level = float(profile.get("camouflaging_relaxation", 0.0))
|
||||
if level <= 0.5:
|
||||
return
|
||||
for hit in response.get("hits", []) or []:
|
||||
if not isinstance(hit, dict):
|
||||
continue
|
||||
text = hit.get("literal_surface") or hit.get("surface_text")
|
||||
if not isinstance(text, str):
|
||||
continue
|
||||
# Drop stale honorifics if present (best-effort).
|
||||
stripped = text
|
||||
for honorific in (" Sir.", " Sir,", " Madam.", " Madam,"):
|
||||
stripped = stripped.replace(honorific, ".")
|
||||
if "surface_text" in hit:
|
||||
hit["surface_text"] = stripped
|
||||
# Leave literal_surface byte-exact (C5 invariant).
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_monotropic_focus(response: dict, profile: dict) -> None:
|
||||
"""AUTIST-01 monotropism_depth per domain -> narrow top-k to dominant.
|
||||
|
||||
When any domain in monotropism_depth has depth > 0.7, hits carrying a
|
||||
non-matching domain tag are down-ranked to the tail of the list. The
|
||||
transform is conservative: we reorder, never delete.
|
||||
"""
|
||||
try:
|
||||
md = profile.get("monotropism_depth")
|
||||
if not isinstance(md, dict) or not md:
|
||||
return
|
||||
hot_domains = {d for d, depth in md.items() if _as_float(depth, 0.0) > 0.7}
|
||||
if not hot_domains:
|
||||
return
|
||||
hits = response.get("hits")
|
||||
if not isinstance(hits, list) or not hits:
|
||||
return
|
||||
def _key(h):
|
||||
if not isinstance(h, dict):
|
||||
return 1
|
||||
tags = h.get("tags") or []
|
||||
for t in tags:
|
||||
if isinstance(t, str) and t.startswith("domain:"):
|
||||
return 0 if t.split(":", 1)[1] in hot_domains else 1
|
||||
return 1
|
||||
hits.sort(key=_key)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_literal_preservation(response: dict, profile: dict) -> None:
|
||||
"""strong -> keep literal_surface byte-exact (default); loose
|
||||
-> surface_text may be summarised. C5 invariant: literal_surface is
|
||||
never mutated.
|
||||
"""
|
||||
try:
|
||||
mode = profile.get("literal_preservation", "strong")
|
||||
if mode not in ("strong", "medium", "loose"):
|
||||
return
|
||||
# No-op by design: the hook exists for future summarisation logic but
|
||||
# must never mutate literal_surface per C5.
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_masking_off(response: dict, profile: dict) -> None:
|
||||
"""masking_off True -> strip performative empathy filler."""
|
||||
try:
|
||||
if not profile.get("masking_off", True):
|
||||
return
|
||||
filler = (
|
||||
"Great question! ",
|
||||
"Certainly! ",
|
||||
"Of course! ",
|
||||
)
|
||||
for hit in response.get("hits", []) or []:
|
||||
if not isinstance(hit, dict):
|
||||
continue
|
||||
txt = hit.get("surface_text")
|
||||
if isinstance(txt, str):
|
||||
for f in filler:
|
||||
if txt.startswith(f):
|
||||
hit["surface_text"] = txt[len(f):]
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_task_support(response: dict, profile: dict) -> None:
|
||||
"""cued_recognition -> adjacent_suggestions populated (no-op
|
||||
here because retrieve.recall already emits them); blank_recall -> strip
|
||||
suggestions to force free recall.
|
||||
"""
|
||||
try:
|
||||
mode = profile.get("task_support", "cued_recognition")
|
||||
if mode != "blank_recall":
|
||||
return
|
||||
for hit in response.get("hits", []) or []:
|
||||
if isinstance(hit, dict) and "adjacent_suggestions" in hit:
|
||||
hit["adjacent_suggestions"] = []
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_scene_construction(response: dict, profile: dict) -> None:
|
||||
"""scene_construction_scaffold autobiographical reconstruction
|
||||
hint (Phase 07.12-01).
|
||||
|
||||
PATTERNS.md option-3 reconciliation: the hit dict from _hit_to_json
|
||||
(core.py:712-719) does NOT carry tier/session_id/captured_at, so we drop
|
||||
the tier filter from the original design. When knob=True, attach
|
||||
_scene_hint to EVERY hit; downstream consumers ignore the hint on
|
||||
non-episodic content without harm. The 'advice' string is fixed —
|
||||
no LLM call.
|
||||
|
||||
When False: no _scene_hint key added (test asserts absence).
|
||||
"""
|
||||
try:
|
||||
if not profile.get("scene_construction_scaffold", True):
|
||||
return
|
||||
for hit in response.get("hits", []) or []:
|
||||
if not isinstance(hit, dict):
|
||||
continue
|
||||
hit["_scene_hint"] = {
|
||||
"session_id": hit.get("session_id"),
|
||||
"captured_at": hit.get("captured_at"),
|
||||
"advice": "use as scaffold for autobiographical reconstruction",
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_dunn_quadrant(response: dict, profile: dict) -> None:
|
||||
"""dunn_quadrant -> HIPPEA precision is upstream; no-op here."""
|
||||
try:
|
||||
_ = profile.get("dunn_quadrant", "neutral")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_pda_tolerance(response: dict, profile: dict) -> None:
|
||||
"""demand_avoidance_tolerance lexical softener (Phase 07.12-01).
|
||||
|
||||
- collaborative (default): replace leading imperatives in each
|
||||
adjacent_suggestion entry per the frozen substitution table from
|
||||
D-02. Only first-word matches; mid-sentence
|
||||
imperatives are NOT touched (avoids false positives in code blocks).
|
||||
- avoidant: prepend 'FYI: ' to every adjacent_suggestion entry.
|
||||
- neutral: bypass.
|
||||
"""
|
||||
try:
|
||||
mode = profile.get("demand_avoidance_tolerance", "collaborative")
|
||||
if mode == "neutral":
|
||||
return
|
||||
if mode == "avoidant":
|
||||
for hit in response.get("hits", []) or []:
|
||||
if not isinstance(hit, dict):
|
||||
continue
|
||||
suggestions = hit.get("adjacent_suggestions")
|
||||
if not isinstance(suggestions, list):
|
||||
continue
|
||||
hit["adjacent_suggestions"] = [
|
||||
f"FYI: {entry}" for entry in suggestions
|
||||
]
|
||||
return
|
||||
if mode == "collaborative":
|
||||
# Frozen table per CONTEXT — DO NOT extend without a phase decision.
|
||||
substitutions: tuple[tuple[str, str], ...] = (
|
||||
("Try ", "You could try "),
|
||||
("Do ", "Consider "),
|
||||
("Use ", "Try using "),
|
||||
("Run ", "Try running "),
|
||||
)
|
||||
for hit in response.get("hits", []) or []:
|
||||
if not isinstance(hit, dict):
|
||||
continue
|
||||
suggestions = hit.get("adjacent_suggestions")
|
||||
if not isinstance(suggestions, list):
|
||||
continue
|
||||
rewritten: list = []
|
||||
for entry in suggestions:
|
||||
if not isinstance(entry, str):
|
||||
rewritten.append(entry)
|
||||
continue
|
||||
new_entry = entry
|
||||
for prefix, replacement in substitutions:
|
||||
if entry.startswith(prefix):
|
||||
new_entry = replacement + entry[len(prefix):]
|
||||
break
|
||||
rewritten.append(new_entry)
|
||||
hit["adjacent_suggestions"] = rewritten
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_interest_boost(response: dict, profile: dict) -> None:
|
||||
"""interest_boost > 0 -> amplify hits in interest domains.
|
||||
Applied during scoring, not at response rewrite time; no-op here.
|
||||
"""
|
||||
try:
|
||||
_ = profile.get("interest_boost", 0.0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _apply_inertia_awareness(response: dict, profile: dict) -> None:
|
||||
"""inertia_awareness session-resumption cue (Phase 07.12-01).
|
||||
|
||||
BLOCKER 1 fix (CONTEXT D-02, 2026-04-30): the live upstream hook at
|
||||
core.py:1178 sets response["first_turn_recall"] to a DICT, not a bool.
|
||||
The gate MUST be a shape-agnostic truthy check — `is True` equality
|
||||
would silent-no-op in production.
|
||||
|
||||
When knob=True AND response["first_turn_recall"] is truthy (set by
|
||||
_first_turn_recall_hook at core.py:1178 on the first turn of a
|
||||
session), prepend a one-line resumption cue to the top-1 hit's
|
||||
literal_surface. The text is fixed (not LLM-generated) for determinism.
|
||||
|
||||
CONTEXT explicitly forbids the per-recall fallback: if the
|
||||
first_turn_recall flag is unreliable, escalate via checkpoint rather
|
||||
than silently re-introducing recall-noise.
|
||||
|
||||
Subsequent turns OR knob=False → no transform; literal_surface stays
|
||||
byte-exact (C5 invariant).
|
||||
"""
|
||||
try:
|
||||
if not profile.get("inertia_awareness", False):
|
||||
return
|
||||
# Truthy presence check — shape-agnostic (works for dict OR bool).
|
||||
# core.py:1178 sets this to a dict on the first turn; the truthy
|
||||
# check covers both production (dict) and any test path (bool).
|
||||
if not response.get("first_turn_recall"):
|
||||
return
|
||||
hits = response.get("hits") or []
|
||||
if not hits:
|
||||
return
|
||||
top = hits[0]
|
||||
if not isinstance(top, dict):
|
||||
return
|
||||
literal = top.get("literal_surface")
|
||||
if not isinstance(literal, str):
|
||||
return
|
||||
top["literal_surface"] = f"Resuming from your last session: {literal}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ----------------------------------------------------------------- utilities
|
||||
def _as_float(value, default: float) -> float:
|
||||
"""Coerce ``value`` to float; return ``default`` on failure."""
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
701
src/iai_mcp/retrieve.py
Normal file
701
src/iai_mcp/retrieve.py
Normal file
|
|
@ -0,0 +1,701 @@
|
|||
"""Retrieval + reinforcement + contradiction paths.
|
||||
|
||||
- `recall`: baseline cosine top-k -- kept as a fallback for the
|
||||
empty-store case and for regression tests.
|
||||
- `build_runtime_graph`: reconstruct a MemoryGraph + CommunityAssignment +
|
||||
rich-club from LanceDB state; consumed by core.py to drive `pipeline_recall`.
|
||||
- `reinforce_edges`, `contradict`: unchanged from Plan 01.
|
||||
- `link_temporal_next`: records a `record_inserted` event
|
||||
and creates a `temporal_next` edge from the previous same-session insertion
|
||||
to the new record if that event happened within the last 5 minutes.
|
||||
|
||||
Constitutional rules enforced here:
|
||||
- every recall appends a provenance entry to every returned record.
|
||||
- reinforce boosts pairwise Hebbian edges among co-retrieved ids.
|
||||
- edge-based: contradict creates a linked record, preserves original.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from itertools import combinations
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from iai_mcp.aaak import enforce_english_raw, generate_aaak_index
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import (
|
||||
EMBED_DIM,
|
||||
EdgeUpdate,
|
||||
MemoryHit,
|
||||
MemoryRecord,
|
||||
RecallResponse,
|
||||
ReconsolidationReceipt,
|
||||
)
|
||||
|
||||
|
||||
# Plan 07.11-02 / structured-log handle for the graph-build
|
||||
# decrypt-failure path. Same one-liner the rest of the project uses
|
||||
# (cf. capture.py:54, pipeline.py:33-imports). Used by the
|
||||
# `graph_build_decrypt_failed` event when AES-GCM decrypt of a
|
||||
# record's literal_surface raises during build_runtime_graph.
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Per-process rate limit for graph_build_decrypt_failed (rid -> monotonic ts).
|
||||
_GRAPH_DECRYPT_WARN_LAST: dict[str, float] = {}
|
||||
_GRAPH_DECRYPT_WARN_INTERVAL_SEC = 300.0
|
||||
|
||||
|
||||
# temporal_next window. Records inserted within this window
|
||||
# in the same session are linked with a temporal_next edge.
|
||||
TEMPORAL_NEXT_WINDOW = timedelta(minutes=5)
|
||||
|
||||
|
||||
def recall(
|
||||
store: MemoryStore,
|
||||
cue_embedding: list[float],
|
||||
cue_text: str,
|
||||
session_id: str,
|
||||
budget_tokens: int = 1500,
|
||||
k_hits: int = 5,
|
||||
k_anti: int = 3,
|
||||
mode: str = "verbatim",
|
||||
) -> RecallResponse:
|
||||
"""Phase 1 baseline retrieval.
|
||||
|
||||
Fetches top (k_hits + k_anti) by cosine similarity; treats the top k_hits as
|
||||
excitatory hits and the bottom k_anti as a naive anti-hit stub. Plan 02 will
|
||||
replace anti-hits with real contradicts-edge + AAAK-opposition logic.
|
||||
|
||||
Every returned hit gets a provenance entry appended.
|
||||
|
||||
R7: `mode` kwarg defaults to 'verbatim'. The baseline
|
||||
is the conservative fallback path (used by core.dispatch when the runtime
|
||||
graph is unavailable / build fails / store is empty). Defaulting to
|
||||
verbatim protects the North-Star ≥99% essential variable on the degraded
|
||||
path — the user never silently lands on a schema-dominated surface even
|
||||
when the full pipeline is unreachable. Verbatim mode applies the same
|
||||
tier filter + schema exclusion as pipeline_recall verbatim mode so the
|
||||
contract on hits[] is identical regardless of which route core dispatched
|
||||
to. Concept mode preserves today's pure-cosine baseline (no filter).
|
||||
"""
|
||||
raw = store.query_similar(cue_embedding, k=k_hits + k_anti)
|
||||
|
||||
# R7: verbatim mode candidate filter on the baseline path.
|
||||
# tier='episodic' AND no pattern:* tag — same exclusion contract as
|
||||
# pipeline_recall verbatim mode (R5). Also excludes D-09
|
||||
# tier='semantic_pruned' soft-deleted schemas naturally.
|
||||
if mode == "verbatim":
|
||||
raw = [
|
||||
(rec, score) for rec, score in raw
|
||||
if rec.tier == "episodic"
|
||||
and not any(t.startswith("pattern:") for t in (rec.tags or []))
|
||||
]
|
||||
|
||||
hits: list[MemoryHit] = []
|
||||
# (D5-01 effect c fix): collect provenance entries during the
|
||||
# hit-building loop, flush via ONE store.append_provenance_batch call
|
||||
# after the loop closes. Replaces the per-hit
|
||||
# `store.append_provenance(record.id, entry)` pattern that produced the
|
||||
# 64x wall-clock blow-up and rank perturbation under memory pressure
|
||||
# (pressplay 8 GB M1, 2026-04-19). Mirrors the L-02 fix already in
|
||||
# src/iai_mcp/pipeline.py::pipeline_recall (see D-SPEED SC-6).
|
||||
provenance_pending: list[tuple[UUID, dict]] = []
|
||||
now_iso = datetime.now(timezone.utc).isoformat()
|
||||
for record, score in raw[:k_hits]:
|
||||
hits.append(
|
||||
MemoryHit(
|
||||
record_id=record.id,
|
||||
score=float(score),
|
||||
reason=f"cosine {score:.3f}",
|
||||
literal_surface=record.literal_surface,
|
||||
adjacent_suggestions=[], # Plan 03 fills per AUTIST-07
|
||||
)
|
||||
)
|
||||
# every recall appends a provenance entry; write is batched
|
||||
# end-of-loop to preserve rank stability (Plan 05-02 effect c fix).
|
||||
provenance_pending.append((
|
||||
record.id,
|
||||
{
|
||||
"ts": now_iso,
|
||||
"cue": cue_text,
|
||||
"session_id": session_id,
|
||||
},
|
||||
))
|
||||
|
||||
# flush: single merge_insert transaction replaces N read-modify-writes.
|
||||
# Diagnostic-only: never block the user's recall on a provenance-write failure
|
||||
# (Rule 1 -- matches pipeline_recall's defensive contract).
|
||||
if provenance_pending:
|
||||
try:
|
||||
store.append_provenance_batch(provenance_pending)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
anti_hits: list[MemoryHit] = []
|
||||
# Naive anti-hit stub: bottom-k of the same query. Plan 02 replaces with
|
||||
# real contradicts-edge + AAAK-opposition scoring.
|
||||
tail = raw[-k_anti:] if len(raw) >= k_anti else []
|
||||
for record, score in reversed(tail):
|
||||
anti_hits.append(
|
||||
MemoryHit(
|
||||
record_id=record.id,
|
||||
score=float(score),
|
||||
reason="low-similarity baseline anti-hit",
|
||||
literal_surface=record.literal_surface,
|
||||
adjacent_suggestions=[],
|
||||
)
|
||||
)
|
||||
|
||||
# on-read S4 viability check on the baseline recall
|
||||
# path too, so behaviour is consistent regardless of which recall route
|
||||
# core.py dispatches to.
|
||||
try:
|
||||
from iai_mcp.s4 import on_read_check
|
||||
s4_hints = on_read_check(store, hits, session_id=session_id)
|
||||
except Exception:
|
||||
s4_hints = []
|
||||
|
||||
response = RecallResponse(
|
||||
hits=hits,
|
||||
anti_hits=anti_hits,
|
||||
activation_trace=[h.record_id for h in hits],
|
||||
# ~4 chars per token heuristic; Plan 03 benchmark will use Anthropic count_tokens.
|
||||
budget_used=sum(len(h.literal_surface) for h in hits) // 4,
|
||||
hints=s4_hints,
|
||||
# surface mode on the baseline response too. The
|
||||
# baseline does not produce concept-mode patterns_observed (that's
|
||||
# the full pipeline's job — patterns_observed reflects displaced
|
||||
# candidates the rank stage would have surfaced; baseline has no
|
||||
# rank stage). Default [] is correct for both modes here.
|
||||
cue_mode=mode,
|
||||
patterns_observed=[],
|
||||
)
|
||||
|
||||
# (M2 LIVE prerequisite): emit kind='retrieval_used' so M2
|
||||
# precision@5 can be computed live from production emits, not seeded
|
||||
# events. Diagnostic-only: never block the recall path on emit failure.
|
||||
try:
|
||||
write_event(
|
||||
store,
|
||||
kind="retrieval_used",
|
||||
data={
|
||||
"hit_ids": [str(h.record_id) for h in hits],
|
||||
"query": cue_text,
|
||||
"used": len(hits) > 0,
|
||||
"budget_used": response.budget_used,
|
||||
"path": "baseline_recall",
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def reinforce_edges(
|
||||
store: MemoryStore, ids: list[UUID], delta: float = 0.1
|
||||
) -> EdgeUpdate:
|
||||
"""Hebbian boost on all pairwise edges among co-retrieved ids.
|
||||
|
||||
Pairwise = C(n, 2) combinations. Delta 0.1 is the Phase-1 simple-increment
|
||||
default.
|
||||
"""
|
||||
pairs: list[tuple[UUID, UUID]] = list(combinations(ids, 2))
|
||||
new_weights = store.boost_edges(pairs, delta=delta)
|
||||
# Canonical JSON-string keys (tuples are not JSON-serialisable).
|
||||
new_weights_str = {f"{a}|{b}": float(w) for (a, b), w in new_weights.items()}
|
||||
return EdgeUpdate(
|
||||
edges_boosted=len(pairs),
|
||||
pairs=pairs,
|
||||
new_weights=new_weights_str,
|
||||
)
|
||||
|
||||
|
||||
def contradict(
|
||||
store: MemoryStore,
|
||||
original_id: UUID,
|
||||
new_fact: str,
|
||||
new_embedding: list[float],
|
||||
) -> ReconsolidationReceipt:
|
||||
"""MEM-05 edge-based reconsolidation.
|
||||
|
||||
Creates a new record with `new_fact` and adds a `contradicts` edge from
|
||||
original -> new. Does NOT rewrite the original record -- full amend-in-place
|
||||
is deferred to a future version.
|
||||
"""
|
||||
original = store.get(original_id)
|
||||
if original is None:
|
||||
raise ValueError(f"unknown record {original_id}")
|
||||
# validate against the store's actual embedding dim,
|
||||
# not the legacy hardcoded EMBED_DIM. Migrations and env overrides both
|
||||
# rely on store.embed_dim as source of truth.
|
||||
target_dim = store.embed_dim
|
||||
if len(new_embedding) != target_dim:
|
||||
raise ValueError(
|
||||
f"new_embedding must be {target_dim}d, got {len(new_embedding)}"
|
||||
)
|
||||
now = datetime.now(timezone.utc)
|
||||
new_rec = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier=original.tier,
|
||||
literal_surface=new_fact,
|
||||
aaak_index="",
|
||||
embedding=list(new_embedding),
|
||||
community_id=original.community_id,
|
||||
centrality=0.0,
|
||||
detail_level=original.detail_level,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=(original.detail_level >= 3),
|
||||
never_merge=False,
|
||||
provenance=[{"ts": now.isoformat(), "cue": "contradict", "session_id": "-"}],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["contradict"],
|
||||
# propagate the original record's language tag to the contradiction.
|
||||
# A contradiction is a linguistic amendment; it lives in the same
|
||||
# conversational register as the source.
|
||||
language=getattr(original, "language", "en") or "en",
|
||||
)
|
||||
# H-02: constitutional guard must run on EVERY write path, not just the
|
||||
# L0 seed. A Cyrillic/CJK `new_fact` without an explicit `raw:<lang>` tag
|
||||
# would otherwise land in literal_surface unguarded. Callers who intentionally
|
||||
# store non-English raw capture pre-tag the record via the MCP surface.
|
||||
#
|
||||
# note: once Task 2 ships enforce_language_tagged, call sites in
|
||||
# core.py + retrieve should migrate. For Phase-1 back-compat we keep
|
||||
# enforce_english_raw here so the H-02 Cyrillic-rejection test keeps passing.
|
||||
enforce_english_raw(new_rec)
|
||||
new_rec.aaak_index = generate_aaak_index(new_rec)
|
||||
store.insert(new_rec)
|
||||
store.add_contradicts_edge(original_id, new_rec.id)
|
||||
|
||||
# monotropic proactive check fires only in high-focus
|
||||
# domains. Hints aren't surfaced via contradict() (its signature is fixed
|
||||
# to ReconsolidationReceipt), but events land in the events table so the
|
||||
# user can inspect them via `iai-mcp contradictions` in Plan 02-04.
|
||||
try:
|
||||
from iai_mcp.s4 import monotropic_proactive_check
|
||||
# Deliberately empty profile_state: callers of contradict() don't pass
|
||||
# one; core.py can inject a fuller state via its own wrapper once the
|
||||
# profile is wired to pipeline_recall.
|
||||
monotropic_proactive_check(store, new_rec, {}, session_id="-")
|
||||
except Exception:
|
||||
pass # Rule 1: never block writes on S4 diagnostic path.
|
||||
|
||||
return ReconsolidationReceipt(
|
||||
original_id=original_id,
|
||||
new_record_id=new_rec.id,
|
||||
edge_type="contradicts",
|
||||
ts=now,
|
||||
)
|
||||
|
||||
|
||||
def link_temporal_next(
|
||||
store: MemoryStore,
|
||||
new_record: MemoryRecord,
|
||||
session_id: str,
|
||||
) -> UUID | None:
|
||||
"""create temporal_next edge + record_inserted event.
|
||||
|
||||
Reads the most recent `record_inserted` event (any record) from the events
|
||||
table. If that event happened within TEMPORAL_NEXT_WINDOW AND in the same
|
||||
session, create a `temporal_next` edge from the previous record to the new
|
||||
record.
|
||||
|
||||
Then write a fresh `record_inserted` event marking this insertion.
|
||||
|
||||
Returns the previous record UUID (the edge source) or None if no edge was
|
||||
created (either no prior insert or stale / cross-session).
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
# Look at the last ~20 record_inserted events to find the most recent match.
|
||||
prior_events = query_events(
|
||||
store, kind="record_inserted",
|
||||
since=now - TEMPORAL_NEXT_WINDOW, limit=20,
|
||||
)
|
||||
previous_id: UUID | None = None
|
||||
for ev in prior_events:
|
||||
if ev.get("session_id") != session_id:
|
||||
continue
|
||||
raw = ev["data"].get("record_id")
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
candidate = UUID(raw)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if candidate == new_record.id:
|
||||
continue
|
||||
previous_id = candidate
|
||||
break # events are newest-first
|
||||
|
||||
if previous_id is not None:
|
||||
try:
|
||||
store.boost_edges(
|
||||
[(previous_id, new_record.id)],
|
||||
edge_type="temporal_next",
|
||||
delta=1.0,
|
||||
)
|
||||
except Exception:
|
||||
# Diagnostic only; don't block the write path on edge failure.
|
||||
pass
|
||||
|
||||
write_event(
|
||||
store,
|
||||
kind="record_inserted",
|
||||
data={
|
||||
"record_id": str(new_record.id),
|
||||
"tier": new_record.tier,
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
source_ids=[new_record.id],
|
||||
)
|
||||
return previous_id
|
||||
|
||||
|
||||
def _make_graph_sync_hook(G):
|
||||
"""factory for the store -> graph mutation callback.
|
||||
|
||||
Returned callable dispatches on ``op`` (insert|update|delete) and
|
||||
mutates ``G`` (a NetworkX Graph) in-place. On unknown op or any
|
||||
payload shape error, the hook is a quiet no-op — the store's
|
||||
try/except surface turns exceptions into stderr events anyway, but
|
||||
we stay defensive here so hook-level bugs never reach the store.
|
||||
"""
|
||||
def _hook(op: str, record) -> None:
|
||||
nid = str(record.id)
|
||||
if op == "insert":
|
||||
payload = {
|
||||
"embedding": list(record.embedding),
|
||||
"surface": record.literal_surface,
|
||||
"centrality": float(record.centrality),
|
||||
"tier": record.tier,
|
||||
"pinned": bool(record.pinned),
|
||||
"tags": list(getattr(record, "tags", []) or []),
|
||||
"language": str(getattr(record, "language", "en") or "en"),
|
||||
}
|
||||
G.add_node(nid, **payload)
|
||||
elif op == "update":
|
||||
payload = {
|
||||
"embedding": list(record.embedding),
|
||||
"surface": record.literal_surface,
|
||||
"centrality": float(record.centrality),
|
||||
"tier": record.tier,
|
||||
"pinned": bool(record.pinned),
|
||||
"tags": list(getattr(record, "tags", []) or []),
|
||||
"language": str(getattr(record, "language", "en") or "en"),
|
||||
}
|
||||
if nid in G.nodes:
|
||||
G.nodes[nid].update(payload)
|
||||
else:
|
||||
G.add_node(nid, **payload)
|
||||
elif op == "delete":
|
||||
if nid in G.nodes:
|
||||
G.remove_node(nid)
|
||||
# Unknown op: silently ignore. The store writes are authoritative;
|
||||
# unknown ops will be picked up on the next full rebuild.
|
||||
return _hook
|
||||
|
||||
|
||||
def build_runtime_graph(store: MemoryStore):
|
||||
"""Reconstruct MemoryGraph + CommunityAssignment + rich-club from LanceDB.
|
||||
|
||||
Called by core.py's `memory_recall` dispatch when the store is non-empty.
|
||||
(P4.A): the expensive pieces -- Leiden community
|
||||
detection + rich-club selection -- are cached to disk in
|
||||
``runtime_graph_cache.json`` keyed on the store's (records_count,
|
||||
edges_count, schema_version, embed_dim) tuple. Cache hit skips
|
||||
~230 ms of Leiden + rich-club work. MemoryGraph itself is rebuilt
|
||||
on every call from the LanceDB rows because caching it would
|
||||
require a non-JSON format for the NetworkX object.
|
||||
|
||||
(hot-path switch): every graph node carries the record's
|
||||
payload (embedding, surface, centrality, tier, pinned) as NetworkX
|
||||
node attributes. ``pipeline._read_record_payload`` reads from these
|
||||
attributes at seed + spread stages, eliminating the per-id
|
||||
``store.get`` LanceDB round-trips that dominated at N=1k
|
||||
(737 ms -> target ~20-30 ms). A ``_graph_sync_hook`` is registered
|
||||
on the store so insert/update/delete mirror their mutations to the
|
||||
in-RAM graph; hook failures are logged, never raised (write-path
|
||||
authoritative). On cache HIT the node_payload blob rehydrates the
|
||||
NetworkX attributes directly; MISS rebuilds them from the fresh
|
||||
store.all_records() walk that was already happening for the graph.
|
||||
|
||||
Returns (graph, assignment, rich_club).
|
||||
|
||||
Local imports keep the heavy graph/community modules out of Plan-01's
|
||||
hot path (core.py module-load time stays small).
|
||||
"""
|
||||
from iai_mcp.community import CommunityAssignment, detect_communities
|
||||
from iai_mcp.graph import MemoryGraph
|
||||
from iai_mcp.richclub import rich_club_nodes
|
||||
from iai_mcp import runtime_graph_cache
|
||||
|
||||
graph = MemoryGraph()
|
||||
|
||||
# try the on-disk cache before running Leiden + rich-club.
|
||||
# Cache-first so we can consult the v2 node_payload blob for free.
|
||||
cached = runtime_graph_cache.try_load(store)
|
||||
assignment = None
|
||||
rich_club = None
|
||||
cached_node_payload: dict[str, dict] | None = None
|
||||
# R2: cached max_degree rehydrates without re-walking the
|
||||
# NetworkX graph. Used as a defensive fallback if the live degree
|
||||
# walk below fails for any reason.
|
||||
cached_max_degree: int = 0
|
||||
if cached is not None:
|
||||
assignment, rich_club, cached_node_payload, cached_max_degree = cached
|
||||
|
||||
# Build nodes. If the cache gave us a node_payload blob AND the store
|
||||
# record count matches, reuse it — skips the encrypted LanceDB scan.
|
||||
# Otherwise fall through to the full row walk so node attrs stay
|
||||
# strictly derived from the authoritative store.
|
||||
records_tbl = store.db.open_table("records")
|
||||
records_count = int(records_tbl.count_rows())
|
||||
use_cached_payload = (
|
||||
cached_node_payload is not None
|
||||
and len(cached_node_payload) == records_count
|
||||
)
|
||||
|
||||
if use_cached_payload:
|
||||
# Fast path: graph nodes + attributes come from the cache JSON.
|
||||
for nid, payload in cached_node_payload.items():
|
||||
# MemoryGraph.add_node has a fixed signature; use it for
|
||||
# topology, then pour the full payload into the NetworkX
|
||||
# node attribute dict.
|
||||
graph.add_node(
|
||||
UUID(nid),
|
||||
community_id=None,
|
||||
embedding=list(payload.get("embedding") or []),
|
||||
)
|
||||
graph._nx.nodes[nid].update({
|
||||
"embedding": list(payload.get("embedding") or []),
|
||||
"surface": payload.get("surface", ""),
|
||||
"centrality": float(payload.get("centrality") or 0.0),
|
||||
"tier": payload.get("tier", "episodic"),
|
||||
"pinned": bool(payload.get("pinned", False)),
|
||||
"tags": list(payload.get("tags") or []),
|
||||
"language": str(payload.get("language", "en") or "en"),
|
||||
})
|
||||
node_payload_for_cache = cached_node_payload
|
||||
else:
|
||||
# MISS path: walk the records table, attach payload at
|
||||
# graph.add_node time, and remember the payload so we can
|
||||
# persist it into the cache below.
|
||||
df = records_tbl.to_pandas()
|
||||
node_payload_for_cache = {}
|
||||
decrypt_fail_events = 0
|
||||
decrypt_fail_unique: set[str] = set()
|
||||
for _, row in df.iterrows():
|
||||
rid = UUID(row["id"])
|
||||
community_id = (
|
||||
UUID(row["community_id"])
|
||||
if row["community_id"]
|
||||
else None
|
||||
)
|
||||
embedding = (
|
||||
list(row["embedding"])
|
||||
if row["embedding"] is not None
|
||||
else [0.0] * EMBED_DIM
|
||||
)
|
||||
# literal_surface is AES-GCM encrypted at rest.
|
||||
# Decrypt here via the store's helper so the graph payload
|
||||
# carries plaintext the pipeline can use directly.
|
||||
literal_raw = row.get("literal_surface") or ""
|
||||
try:
|
||||
from iai_mcp.crypto import is_encrypted
|
||||
if is_encrypted(literal_raw):
|
||||
literal_raw = store._decrypt_for_record(rid, literal_raw)
|
||||
except Exception:
|
||||
# Plan 07.11-02 / (V2-03 fix): a decrypt failure here
|
||||
# used to assign ``literal_raw = ""`` and then fall through
|
||||
# to update the live NetworkX node + persist to
|
||||
# ``node_payload_for_cache``. That empty-surface payload
|
||||
# then poisoned the on-disk runtime_graph_cache, and on
|
||||
# warm-restart pipeline._read_record_payload happily
|
||||
# returned ``literal_surface=""`` claiming success —
|
||||
# silent corruption of verbatim recall.
|
||||
#
|
||||
# Skip-the-node approach (chosen over the _decrypt_failed
|
||||
# sentinel-flag because it produces the smallest disk
|
||||
# footprint and the simplest invariant: "the cache
|
||||
# contains only records whose surface successfully
|
||||
# decrypted"). The pipeline read path falls back to
|
||||
# store.get(rid) which has its own retry semantics in
|
||||
# crypto.py.
|
||||
#
|
||||
# Tail-end mandate: per-record ``graph_build_decrypt_failed``
|
||||
# warnings are rate-limited (default 300s) so wrong-key floods
|
||||
# do not spam launchd stderr; a per-build summary still fires.
|
||||
rid_s = str(rid)
|
||||
decrypt_fail_events += 1
|
||||
decrypt_fail_unique.add(rid_s)
|
||||
now_m = time.monotonic()
|
||||
last_m = _GRAPH_DECRYPT_WARN_LAST.get(rid_s, 0.0)
|
||||
if now_m - last_m >= _GRAPH_DECRYPT_WARN_INTERVAL_SEC:
|
||||
_GRAPH_DECRYPT_WARN_LAST[rid_s] = now_m
|
||||
log.warning(
|
||||
"graph_build_decrypt_failed",
|
||||
extra={"record_id": rid_s},
|
||||
)
|
||||
continue
|
||||
|
||||
tier = row.get("tier") or "episodic"
|
||||
centrality = float(row.get("centrality") or 0.0)
|
||||
pinned = bool(row.get("pinned") or False)
|
||||
# tags travel on graph nodes so the rank stage's
|
||||
# SimpleRecordView carries tags for profile_modulation_for_record
|
||||
# without needing a store.get fallback in the hot path.
|
||||
tags_raw = row.get("tags_json") or "[]"
|
||||
try:
|
||||
import json as _json
|
||||
tags_list = _json.loads(tags_raw) if isinstance(tags_raw, str) else list(tags_raw)
|
||||
if not isinstance(tags_list, list):
|
||||
tags_list = []
|
||||
except Exception:
|
||||
tags_list = []
|
||||
language = str(row.get("language") or "en")
|
||||
|
||||
graph.add_node(
|
||||
rid,
|
||||
community_id=community_id,
|
||||
embedding=embedding,
|
||||
)
|
||||
# Plan 05-12/05-13: attach record payload to the NetworkX node dict.
|
||||
graph._nx.nodes[str(rid)].update({
|
||||
"embedding": list(embedding),
|
||||
"surface": str(literal_raw),
|
||||
"centrality": centrality,
|
||||
"tier": str(tier),
|
||||
"pinned": pinned,
|
||||
"tags": list(tags_list),
|
||||
"language": language,
|
||||
})
|
||||
node_payload_for_cache[str(rid)] = {
|
||||
"embedding": list(embedding),
|
||||
"surface": str(literal_raw),
|
||||
"centrality": centrality,
|
||||
"tier": str(tier),
|
||||
"pinned": pinned,
|
||||
"tags": list(tags_list),
|
||||
"language": language,
|
||||
}
|
||||
|
||||
if decrypt_fail_events > 0:
|
||||
log.warning(
|
||||
"graph_build_decrypt_failed_summary",
|
||||
extra={
|
||||
"unique_records": len(decrypt_fail_unique),
|
||||
"total_skip_events": decrypt_fail_events,
|
||||
},
|
||||
)
|
||||
|
||||
edges_df = store.db.open_table("edges").to_pandas()
|
||||
for _, row in edges_df.iterrows():
|
||||
graph.add_edge(
|
||||
UUID(row["src"]),
|
||||
UUID(row["dst"]),
|
||||
weight=float(row["weight"]),
|
||||
edge_type=row["edge_type"],
|
||||
)
|
||||
|
||||
# R2: cache the maximum graph degree so the rank stage
|
||||
# can normalise log(1+deg) into [0,1] (sample-rank-comparable to
|
||||
# cosine; W_DEGREE * deg_norm bounded by W_DEGREE itself instead of
|
||||
# by an unbounded log term that scales with hub connectivity).
|
||||
# Computed once per build; rehydrated from disk on warm starts via
|
||||
# the runtime_graph_cache.json payload. Defensive: fall back to the
|
||||
# cached value if the live degree() walk fails for any reason — and
|
||||
# never let a bare AttributeError reach the rank stage.
|
||||
try:
|
||||
deg_values = [d for _, d in graph._nx.degree()]
|
||||
max_degree = max(deg_values) if deg_values else 0
|
||||
except Exception:
|
||||
max_degree = cached_max_degree
|
||||
if max_degree == 0 and cached_max_degree > 0:
|
||||
# Live walk produced 0 (no edges yet) but the cache held a real
|
||||
# value — prefer the cached value. Triggers when an upstream
|
||||
# path stripped edges before the rebuild reached us.
|
||||
max_degree = cached_max_degree
|
||||
graph._max_degree = int(max_degree)
|
||||
|
||||
# Run (or reuse cached) Leiden + rich-club.
|
||||
if assignment is None:
|
||||
assignment = detect_communities(graph, prior=None)
|
||||
rich_club = rich_club_nodes(graph, percent=0.10)
|
||||
|
||||
# compute betweenness centrality ONCE per build
|
||||
# and attach to every node as a NetworkX attribute so the rank stage
|
||||
# can read it O(1) instead of calling graph.centrality() on every
|
||||
# recall (the pre-05-13 hot path). Cache HIT path already rehydrated
|
||||
# centrality from node_payload into node attrs above; we only
|
||||
# (re)compute when the cache payload is absent / stale or when
|
||||
# node_payload centrality values are all-zero placeholders.
|
||||
needs_centrality = True
|
||||
if use_cached_payload and cached_node_payload is not None:
|
||||
# If the cache was written AFTER 05-13 the per-node centrality
|
||||
# floats are real (possibly non-zero). If every value is exactly
|
||||
# 0.0 the cache was written pre-05-13 shape — recompute to
|
||||
# populate the live graph, then a subsequent save() below will
|
||||
# upgrade the cache.
|
||||
any_nonzero = any(
|
||||
float(p.get("centrality") or 0.0) != 0.0
|
||||
for p in cached_node_payload.values()
|
||||
)
|
||||
needs_centrality = not any_nonzero
|
||||
if needs_centrality:
|
||||
try:
|
||||
centrality_map = graph.centrality()
|
||||
for rid, cval in centrality_map.items():
|
||||
nid_str = str(rid)
|
||||
if nid_str in graph._nx.nodes:
|
||||
graph._nx.nodes[nid_str]["centrality"] = float(cval)
|
||||
if (
|
||||
node_payload_for_cache is not None
|
||||
and nid_str in node_payload_for_cache
|
||||
):
|
||||
node_payload_for_cache[nid_str]["centrality"] = (
|
||||
float(cval)
|
||||
)
|
||||
except Exception:
|
||||
# Defensive: centrality is a ranking signal, not a
|
||||
# correctness invariant; fall back to zeros on failure.
|
||||
for nid_str in graph._nx.nodes:
|
||||
graph._nx.nodes[nid_str].setdefault("centrality", 0.0)
|
||||
|
||||
# Persist — fresh build, or cache was legacy 05-09 / 05-12 shape.
|
||||
if cached_node_payload is None or needs_centrality:
|
||||
runtime_graph_cache.save(
|
||||
store, assignment, rich_club,
|
||||
node_payload=node_payload_for_cache,
|
||||
# R2: max_degree travels with assignment + rich_club
|
||||
# so warm-start build_runtime_graph rehydrates without recompute.
|
||||
max_degree=int(getattr(graph, "_max_degree", 0) or 0),
|
||||
)
|
||||
|
||||
# register the graph-sync hook so future insert/update/
|
||||
# delete calls mutate the live graph instead of diverging. The store
|
||||
# swallows hook exceptions so a buggy hook never breaks a write.
|
||||
try:
|
||||
store.register_graph_sync_hook(_make_graph_sync_hook(graph._nx))
|
||||
except Exception:
|
||||
# Older store without register_graph_sync_hook — this is a
|
||||
# defensive upgrade path; the graph just won't stay live-sync'd.
|
||||
pass
|
||||
|
||||
# R2 belt-and-braces: every code path above sets
|
||||
# graph._max_degree, but if some future refactor short-circuits
|
||||
# before reaching the live degree walk we still want the rank
|
||||
# stage's `getattr(graph, "_max_degree", 0)` to read a real int.
|
||||
if not hasattr(graph, "_max_degree"):
|
||||
graph._max_degree = 0
|
||||
|
||||
return graph, assignment, rich_club
|
||||
35
src/iai_mcp/richclub.py
Normal file
35
src/iai_mcp/richclub.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
"""Rich-club pre-fetch (CONN-02).
|
||||
|
||||
Top 10% of nodes by centrality. Used by pipeline.pipeline_recall at stage 4
|
||||
(union with 2-hop spread) and by Plan 03's session-start assembler to pre-warm
|
||||
the Anthropic prompt cache with a stable global-hub set.
|
||||
|
||||
van den Heuvel & Sporns 2011 (J Neurosci 31:15775) observed that the top ~10%
|
||||
of hub nodes handle ~69% of the network's shortest-path traffic. We use the
|
||||
same percentile as the pre-fetch size.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from math import ceil
|
||||
from uuid import UUID
|
||||
|
||||
from iai_mcp.graph import MemoryGraph
|
||||
|
||||
|
||||
def rich_club_nodes(graph: MemoryGraph, percent: float = 0.10) -> list[UUID]:
|
||||
"""CONN-02: top `percent` fraction of nodes by centrality.
|
||||
|
||||
- Empty graph -> [].
|
||||
- Non-empty graph -> at least 1 node (ceil) even if percent rounds to 0.
|
||||
A rich club of zero is useless at the pipeline's Stage 4 union step.
|
||||
- Deterministic tie-break: dict.items() preserves insertion order; sort
|
||||
is stable, so equal-centrality nodes keep their insertion ordering.
|
||||
"""
|
||||
if graph.node_count() == 0:
|
||||
return []
|
||||
centrality = graph.centrality()
|
||||
if not centrality:
|
||||
return []
|
||||
k = max(1, ceil(len(centrality) * percent))
|
||||
ranked = sorted(centrality.items(), key=lambda kv: kv[1], reverse=True)
|
||||
return [node_id for node_id, _ in ranked[:k]]
|
||||
642
src/iai_mcp/runtime_graph_cache.py
Normal file
642
src/iai_mcp/runtime_graph_cache.py
Normal file
|
|
@ -0,0 +1,642 @@
|
|||
"""Plan 05-09 (P4.A): persist Leiden community assignment + rich-club
|
||||
to disk so the first ``memory_recall`` call in a fresh core process
|
||||
does not rebuild these expensive artefacts from scratch.
|
||||
|
||||
The Phase-1 ``retrieve.build_runtime_graph`` rebuilds everything on
|
||||
every call:
|
||||
|
||||
graph = MemoryGraph() # ~100 ms to construct from rows
|
||||
detect_communities(graph) # Leiden, ~200 ms at N=1k
|
||||
rich_club_nodes(graph, 0.10) # ~20 ms
|
||||
|
||||
Phase-5 P4 measured first-call cold path at ~440 ms at N=1k. Caching
|
||||
the *Leiden output* and the rich-club node list eliminates the two
|
||||
expensive computations when the store has not changed. MemoryGraph
|
||||
construction itself is cheap enough to rebuild per call; caching it
|
||||
too would require pickle (the NetworkX graph is not JSON-friendly)
|
||||
and the security-vs-speed trade-off is not worth it for ~100 ms.
|
||||
|
||||
**Invalidation** — any of these triggers a rebuild:
|
||||
|
||||
- Record count changed (user saved / consolidated / merged)
|
||||
- Edge count changed (Hebbian reinforcement or contradiction added)
|
||||
- SCHEMA_VERSION_CURRENT bumped (store migrated)
|
||||
- store.embed_dim changed (user swapped embedder; Plan 05-08)
|
||||
- CACHE_VERSION bumped (this module's on-disk format changed)
|
||||
|
||||
Any inconsistency — corrupt JSON, unreadable file, unknown keys —
|
||||
falls through to a clean rebuild. The cache is purely an optimisation;
|
||||
the authoritative graph is always the LanceDB store.
|
||||
|
||||
**Write strategy**: every ``save()`` writes a ``.tmp`` file first then
|
||||
``os.replace``s it over the real path — atomic on POSIX. A crash
|
||||
mid-write leaves either the old cache intact or no cache at all;
|
||||
never a partially written file. No flush timer; the cache refreshes
|
||||
on the next ``build_runtime_graph`` call when the key changes.
|
||||
|
||||
**Why JSON not pickle**: the cached payload is list-of-UUIDs,
|
||||
list-of-floats and scalars — all JSON-native after simple UUID→str
|
||||
conversion. JSON avoids the arbitrary-code-execution risk of pickle
|
||||
and makes the cache auditable (a user can cat the file to see what
|
||||
the brain thinks its communities are).
|
||||
|
||||
Constitutional invariants:
|
||||
|
||||
- C3 (zero API): pure local JSON + filesystem operations.
|
||||
- C6 (read-only against store): cache writes go to the cache file
|
||||
only, never to any LanceDB table.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from iai_mcp.crypto import (
|
||||
CryptoKey,
|
||||
decrypt_field,
|
||||
encrypt_field,
|
||||
is_encrypted,
|
||||
)
|
||||
from iai_mcp.types import SCHEMA_VERSION_CURRENT
|
||||
|
||||
|
||||
# Bump this whenever the on-disk cache shape changes. A mismatch
|
||||
# forces every user on the old shape to rebuild -- safer than silently
|
||||
# loading a file whose key contract has drifted.
|
||||
#
|
||||
# R2: bumped to "06-02-v1" — payload now carries max_degree
|
||||
# (one int) so the rank stage can normalise log(1+deg) by log(1+max_deg)
|
||||
# without re-walking the live graph on every recall. Old caches lacking
|
||||
# the field are invalidated cleanly by the version bump and rebuild on
|
||||
# the next build_runtime_graph call.
|
||||
#
|
||||
# W3 / bumped to "07-09-v3" — cache file is now
|
||||
# AES-256-GCM-wrapped. Old "06-02-v1" caches that pre-date 07.9 are
|
||||
# treated as legacy plaintext: read once, lazily re-saved as ciphertext
|
||||
# on first warm-start under 07.9, then never read again.
|
||||
CACHE_VERSION: str = "07-09-v3"
|
||||
LEGACY_CACHE_VERSION_PLAINTEXT: str = "06-02-v1"
|
||||
|
||||
# AES-GCM associated data (AD): binds the ciphertext to this format and
|
||||
# version. A bytewise tampering attempt that swaps the file with a
|
||||
# v06-02-v1 plaintext or any other stream fails the decrypt tag check.
|
||||
_CACHE_AAD: bytes = b"runtime-graph-cache:v3"
|
||||
|
||||
CACHE_FILENAME: str = "runtime_graph_cache.json"
|
||||
|
||||
# Size cap for the on-disk cache. When the encoded payload exceeds this,
|
||||
# ``save`` drops ``node_payload`` (the large per-record embedding map) and
|
||||
# writes only ``assignment + rich_club``. Cold-start ``build_runtime_graph``
|
||||
# rehydrates the node payload from the LanceDB store on the next recall;
|
||||
# the cache remains advisory. 10 MiB holds the Leiden + rich-club artefacts
|
||||
# for a ~50k-record store comfortably while keeping cold-start load under
|
||||
# the session-start token budget.
|
||||
MAX_CACHE_BYTES: int = 10 * 1024 * 1024
|
||||
|
||||
|
||||
def _cache_path(store: Any) -> Path:
|
||||
"""Cache file lives next to the LanceDB directory so it travels with
|
||||
the store on backup / move. One cache file per MemoryStore."""
|
||||
root = getattr(store, "root", None)
|
||||
if root is None:
|
||||
root = Path.cwd()
|
||||
return Path(root) / CACHE_FILENAME
|
||||
|
||||
|
||||
def _cache_encryption_key(store: Any) -> bytes:
|
||||
"""Phase 07.9 W3 / 32-byte AES key for the runtime-graph-cache
|
||||
sidecar. Reuses the store's already-cached key whenever possible to
|
||||
avoid a second keyring round-trip. Falls back to a fresh CryptoKey
|
||||
lookup keyed on the store's user_id (or "default") when the store
|
||||
doesn't expose a cached key — the same passphrase / keyring contract
|
||||
applies, so the resolved key is identical.
|
||||
"""
|
||||
# MemoryStore caches its key after the first encryption call
|
||||
# (store.py:_key()); that's the cheapest path. Defensive getattr
|
||||
# so this module stays usable from non-store call sites in tests.
|
||||
cached_via_store = getattr(store, "_crypto_key", None)
|
||||
if isinstance(cached_via_store, (bytes, bytearray)) and len(cached_via_store) == 32:
|
||||
return bytes(cached_via_store)
|
||||
if hasattr(store, "_key") and callable(store._key):
|
||||
try:
|
||||
key = store._key()
|
||||
if isinstance(key, (bytes, bytearray)) and len(key) == 32:
|
||||
return bytes(key)
|
||||
except Exception:
|
||||
pass
|
||||
user_id = getattr(store, "user_id", "default") or "default"
|
||||
return CryptoKey(user_id=user_id).get_or_create()
|
||||
|
||||
|
||||
def _cache_key(store: Any) -> tuple:
|
||||
"""Monotonic identity for "the cached graph is still correct for this
|
||||
store state". Any change to a component invalidates the cache.
|
||||
|
||||
(records_count, edges_count, schema_version, embed_dim, cache_version)
|
||||
"""
|
||||
try:
|
||||
records_count = int(store.db.open_table("records").count_rows())
|
||||
except Exception:
|
||||
records_count = -1
|
||||
try:
|
||||
edges_count = int(store.db.open_table("edges").count_rows())
|
||||
except Exception:
|
||||
edges_count = -1
|
||||
embed_dim = int(getattr(store, "embed_dim", 0))
|
||||
return (
|
||||
records_count,
|
||||
edges_count,
|
||||
SCHEMA_VERSION_CURRENT,
|
||||
embed_dim,
|
||||
CACHE_VERSION,
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------ JSON encode/decode
|
||||
|
||||
|
||||
def _encode_assignment(assignment: Any) -> dict:
|
||||
"""Serialise CommunityAssignment to a JSON-friendly dict.
|
||||
|
||||
node_to_community and mid_regions have UUID keys; community_centroids
|
||||
is {UUID: [float]}. UUIDs are stringified; floats stay native.
|
||||
"""
|
||||
return {
|
||||
"node_to_community": {
|
||||
str(leaf): str(comm)
|
||||
for leaf, comm in getattr(assignment, "node_to_community", {}).items()
|
||||
},
|
||||
"community_centroids": {
|
||||
str(comm): list(vec)
|
||||
for comm, vec in getattr(assignment, "community_centroids", {}).items()
|
||||
},
|
||||
"modularity": float(getattr(assignment, "modularity", 0.0)),
|
||||
"backend": str(getattr(assignment, "backend", "flat")),
|
||||
"top_communities": [str(c) for c in getattr(assignment, "top_communities", [])],
|
||||
"mid_regions": {
|
||||
str(comm): [str(m) for m in members]
|
||||
for comm, members in getattr(assignment, "mid_regions", {}).items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _decode_assignment(raw: dict) -> Any:
|
||||
"""Inverse of _encode_assignment. Imports CommunityAssignment lazily
|
||||
so this module does not pull in the community layer for callers that
|
||||
only want to poke the cache file."""
|
||||
from iai_mcp.community import CommunityAssignment
|
||||
|
||||
return CommunityAssignment(
|
||||
node_to_community={
|
||||
UUID(leaf): UUID(comm)
|
||||
for leaf, comm in raw.get("node_to_community", {}).items()
|
||||
},
|
||||
community_centroids={
|
||||
UUID(comm): list(vec)
|
||||
for comm, vec in raw.get("community_centroids", {}).items()
|
||||
},
|
||||
modularity=float(raw.get("modularity", 0.0)),
|
||||
backend=str(raw.get("backend", "flat")),
|
||||
top_communities=[UUID(c) for c in raw.get("top_communities", [])],
|
||||
mid_regions={
|
||||
UUID(comm): [UUID(m) for m in members]
|
||||
for comm, members in raw.get("mid_regions", {}).items()
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _encode_rich_club(rich_club: Any) -> list[str]:
|
||||
return [str(u) for u in (rich_club or [])]
|
||||
|
||||
|
||||
def _decode_rich_club(raw: Any) -> list[UUID]:
|
||||
return [UUID(u) for u in (raw or [])]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------- size estimator
|
||||
#
|
||||
# W2 / D-07, D-08, bound peak RSS in save() by estimating
|
||||
# serialised byte cost without materialising the full JSON string.
|
||||
#
|
||||
# The legacy save() path encoded the cache payload up to 4 times -- once
|
||||
# for the initial size check and once after each progressive drop. On
|
||||
# cold-start graphs (Leiden -> ~1 community per record),
|
||||
# assignment.community_centroids balloons with len(records) * 384-dim
|
||||
# float vectors and a single encode call materialises a multi-GB
|
||||
# intermediate Python string (py-spy confirmed RSS 7.6GB on cold start).
|
||||
#
|
||||
# The estimator overshoots rather than undershoots: false-positive drops
|
||||
# are safe (cache stays advisory; cold-start rebuilds from the live store),
|
||||
# false-negative under-drops produce the very bug we are fixing. The
|
||||
# constants below are upper bounds for the JSON-encoded byte width of each
|
||||
# field shape.
|
||||
|
||||
# JSON overhead per dict entry: 4 punctuation chars (quotes, colon, comma)
|
||||
# + variable-length key + value. We track the punctuation explicitly so
|
||||
# the per-field constants below are pure VALUE budgets.
|
||||
_JSON_DICT_ENTRY_OVERHEAD: int = 4
|
||||
|
||||
# node_payload entry value width upper bound. Shape:
|
||||
# {"embedding": [<384 float>], "surface": str(<=256), "centrality": float,
|
||||
# "tier": str(<=24), "pinned": bool, "tags": [<=16 short strings],
|
||||
# "language": str(<=8)}
|
||||
# 384-dim float vector dominates: each float worst-case ~24 bytes
|
||||
# ("-1.2345678901234567,") -> 384*24 = 9216. Plus structural keys / quotes
|
||||
# ~256. Plus other fields ~512. Round to a comfortable ceiling.
|
||||
_NODE_PAYLOAD_BYTES_PER_RECORD: int = 10240
|
||||
|
||||
# community_centroids entry value width upper bound. Shape:
|
||||
# {"<UUID-36>": [<384 float>]}
|
||||
# 384-dim float same calculus as node_payload embedding -> 9216. Plus
|
||||
# 36-char UUID quoted -> 38. Plus brackets / commas -> ~16. Round up.
|
||||
_CENTROID_BYTES_PER_RECORD: int = 9472
|
||||
|
||||
# mid_regions entry value width upper bound. Shape:
|
||||
# {"<UUID-36>": ["<UUID-36>", ..., "<UUID-36>"]}
|
||||
# Variable length; bound by typical mid-region size <= 32 UUIDs * 38 bytes
|
||||
# = 1216, plus brackets / commas -> 1280.
|
||||
_MID_REGION_BYTES_PER_RECORD: int = 1280
|
||||
|
||||
# rich_club is a list of UUID strings: 38 bytes per entry.
|
||||
_RICH_CLUB_BYTES_PER_ENTRY: int = 38
|
||||
|
||||
# Top-level scaffolding (cache_version + key + saved_at + max_degree +
|
||||
# backend / modularity / top_communities / node_to_community + structural
|
||||
# JSON braces). Conservative upper bound; node_to_community at scale is
|
||||
# the variable component.
|
||||
_BASE_SCAFFOLD_BYTES: int = 4096
|
||||
|
||||
|
||||
def _estimate_serialised_bytes(data: dict) -> int:
|
||||
"""Upper-bound estimate of the encoded ``data`` dict's byte width
|
||||
without actually serialising it.
|
||||
|
||||
Walks the cache payload shape and sums per-field worst-case JSON byte
|
||||
widths. Overshoots rather than undershoots so the caller's drop loop
|
||||
is conservative (false-positive drops are safe; the cache is advisory
|
||||
and cold-start rebuilds from the live store).
|
||||
|
||||
Used by ``save`` before every iteration of the drop loop -- replaces
|
||||
the legacy len-of-encoded round-trip which materialised the full
|
||||
JSON string up to 4 times per save.
|
||||
"""
|
||||
total = _BASE_SCAFFOLD_BYTES
|
||||
|
||||
# node_payload: dict[str, dict] of per-record graph attributes.
|
||||
np_block = data.get("node_payload") or {}
|
||||
if isinstance(np_block, dict):
|
||||
total += len(np_block) * (
|
||||
_NODE_PAYLOAD_BYTES_PER_RECORD + _JSON_DICT_ENTRY_OVERHEAD + 38
|
||||
)
|
||||
|
||||
# node_to_community + community_centroids + mid_regions live under
|
||||
# data["assignment"]. Encoded shape is what _encode_assignment returns.
|
||||
assignment_block = data.get("assignment") or {}
|
||||
if isinstance(assignment_block, dict):
|
||||
ntc = assignment_block.get("node_to_community") or {}
|
||||
if isinstance(ntc, dict):
|
||||
# Each entry: "<UUID-36>": <int>; ~50 bytes worst case.
|
||||
total += len(ntc) * 50
|
||||
|
||||
centroids = assignment_block.get("community_centroids") or {}
|
||||
if isinstance(centroids, dict):
|
||||
total += len(centroids) * (
|
||||
_CENTROID_BYTES_PER_RECORD + _JSON_DICT_ENTRY_OVERHEAD
|
||||
)
|
||||
|
||||
mid = assignment_block.get("mid_regions") or {}
|
||||
if isinstance(mid, dict):
|
||||
total += len(mid) * (
|
||||
_MID_REGION_BYTES_PER_RECORD + _JSON_DICT_ENTRY_OVERHEAD
|
||||
)
|
||||
|
||||
top = assignment_block.get("top_communities") or []
|
||||
if isinstance(top, list):
|
||||
total += len(top) * 16
|
||||
|
||||
rich_club = data.get("rich_club") or []
|
||||
if isinstance(rich_club, list):
|
||||
total += len(rich_club) * _RICH_CLUB_BYTES_PER_ENTRY
|
||||
|
||||
return total
|
||||
|
||||
|
||||
# ------------------------------------------------------------ public API
|
||||
|
||||
|
||||
def try_load(store: Any) -> tuple | None:
|
||||
"""Return the cached ``(assignment, rich_club, node_payload, max_degree)``
|
||||
tuple if the on-disk file is present, readable, and keyed to the
|
||||
current store state. Return ``None`` on any mismatch or error.
|
||||
|
||||
the third element is the ``node_payload`` blob
|
||||
(``dict[str, dict]``: UUID-str -> {embedding, surface, centrality,
|
||||
tier, pinned}) so cold-start ``build_runtime_graph`` can rehydrate
|
||||
NetworkX node attributes without re-walking the encrypted records
|
||||
table.
|
||||
|
||||
R2: the fourth element is ``max_degree`` (one int — the
|
||||
maximum NetworkX degree in the live graph at save() time). Used by
|
||||
the pipeline rank stage to normalise log(1+deg) into [0,1] without
|
||||
re-walking the graph. Missing / malformed value coerces to 0 — the
|
||||
rank stage falls back to deg_norm=0.0 when max_degree==0 (cosine
|
||||
carries the recall on its own at the cold-start scale).
|
||||
|
||||
Callers treat ``None`` as "rebuild from the live graph" — never as
|
||||
an error condition. The cache is advisory.
|
||||
|
||||
W3 / file format is now AES-256-GCM-wrapped JSON.
|
||||
A pre-07.9 plaintext file (cache_version="06-02-v1") is read once
|
||||
and re-saved under the new ciphertext format on the same call —
|
||||
one-cycle lazy migration. Any decrypt failure (wrong key, tampered
|
||||
file) returns None and the caller rebuilds from store.
|
||||
"""
|
||||
path = _cache_path(store)
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
raw_text = path.read_text(encoding="utf-8")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
legacy_v2_plaintext = False
|
||||
if is_encrypted(raw_text):
|
||||
# v3 ciphertext path.
|
||||
try:
|
||||
key = _cache_encryption_key(store)
|
||||
plaintext_json = decrypt_field(raw_text, key, _CACHE_AAD)
|
||||
data = json.loads(plaintext_json)
|
||||
except Exception as exc:
|
||||
try:
|
||||
sys.stderr.write(
|
||||
'{"event":"runtime_graph_cache_decrypt_failed","error":'
|
||||
+ json.dumps(str(exc))
|
||||
+ '}\n'
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
else:
|
||||
# Legacy plaintext path. Accept ONLY the documented v2 cache
|
||||
# version; anything else falls through to a clean rebuild
|
||||
# (the file is not necessarily ours).
|
||||
try:
|
||||
data = json.loads(raw_text)
|
||||
except Exception:
|
||||
return None
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if data.get("cache_version") == LEGACY_CACHE_VERSION_PLAINTEXT:
|
||||
legacy_v2_plaintext = True
|
||||
else:
|
||||
# Unknown format / version — treat as no cache.
|
||||
return None
|
||||
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
if not legacy_v2_plaintext and data.get("cache_version") != CACHE_VERSION:
|
||||
return None
|
||||
saved_key = tuple(data.get("key", []))
|
||||
current_key = _cache_key(store)
|
||||
if legacy_v2_plaintext:
|
||||
# Legacy v2 caches embed CACHE_VERSION="06-02-v1" in the last
|
||||
# key slot; compare against an expected key that swaps the
|
||||
# current CACHE_VERSION for the legacy one. All other
|
||||
# invariants (records_count, edges_count, schema_version,
|
||||
# embed_dim) MUST still match — anything else means the cache
|
||||
# is stale and we rebuild from store.
|
||||
expected_legacy_key = tuple(
|
||||
list(current_key)[:-1] + [LEGACY_CACHE_VERSION_PLAINTEXT]
|
||||
)
|
||||
if saved_key != expected_legacy_key:
|
||||
return None
|
||||
else:
|
||||
if saved_key != current_key:
|
||||
return None
|
||||
|
||||
try:
|
||||
assignment = _decode_assignment(data["assignment"])
|
||||
rich_club = _decode_rich_club(data.get("rich_club"))
|
||||
node_payload_raw = data.get("node_payload")
|
||||
node_payload: dict[str, dict] | None
|
||||
if isinstance(node_payload_raw, dict):
|
||||
# Shallow dict-of-dicts; embedding list[float] round-trips
|
||||
# through JSON natively.
|
||||
#
|
||||
# Plan 07.11-02 / (V2-03 fix): defensively drop
|
||||
# poisoned entries on rehydrate. Even though Plan 07.11-02's
|
||||
# retrieve.py fix prevents future writes of empty-surface
|
||||
# entries, an existing on-disk cache from before this fix
|
||||
# may still contain them. Belt-and-braces: rehydrate-side
|
||||
# filter ensures a poisoned cache from any source (legacy
|
||||
# write, future regression, manual tamper) cannot leak an
|
||||
# empty/None surface into the live graph.
|
||||
#
|
||||
# Drop rule: surface in (None, "") OR _decrypt_failed=True.
|
||||
# The structured event uses the same stderr-JSON idiom as
|
||||
# the existing runtime_graph_cache_decrypt_failed emission
|
||||
# at lines 376-383 — runtime_graph_cache.py intentionally
|
||||
# bypasses logging because the logger's re-entrant import
|
||||
# path can deadlock during cache rehydrate at very-cold-start.
|
||||
node_payload = {}
|
||||
drop_count = 0
|
||||
for k, v in node_payload_raw.items():
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
surface = v.get("surface")
|
||||
if surface in (None, "") or v.get("_decrypt_failed"):
|
||||
drop_count += 1
|
||||
continue # poisoned entry — never expose as a "valid" record
|
||||
node_payload[str(k)] = dict(v)
|
||||
if drop_count > 0:
|
||||
try:
|
||||
sys.stderr.write(
|
||||
'{"event":"runtime_graph_cache_drop_poisoned_entry","count":'
|
||||
+ str(drop_count)
|
||||
+ '}\n'
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
node_payload = None
|
||||
# R2: max_degree is one int — never participates in
|
||||
# the iterative drop path because dropping it costs nothing at
|
||||
# the JSON byte-budget level.
|
||||
try:
|
||||
max_degree = int(data.get("max_degree", 0) or 0)
|
||||
except (TypeError, ValueError):
|
||||
max_degree = 0
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if legacy_v2_plaintext:
|
||||
# W3 / lazy migration — re-save the loaded
|
||||
# content under the new v3 encrypted format. Wrapped: a
|
||||
# migration write failure must not block the caller from
|
||||
# using the loaded values they already have in memory.
|
||||
try:
|
||||
save(
|
||||
store, assignment, rich_club,
|
||||
node_payload=node_payload, max_degree=max_degree,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return assignment, rich_club, node_payload, max_degree
|
||||
|
||||
|
||||
def save(
|
||||
store: Any,
|
||||
assignment: Any,
|
||||
rich_club: Any,
|
||||
node_payload: "dict[str, dict] | None" = None,
|
||||
max_degree: int = 0,
|
||||
) -> bool:
|
||||
"""Persist the cache atomically. Returns True on success, False on
|
||||
any write error. Errors are swallowed — the caller has freshly
|
||||
computed values in memory either way; a failed cache write is not
|
||||
a reason to break the recall path.
|
||||
|
||||
``node_payload`` persists the per-record graph-node
|
||||
attribute map (UUID-str -> {embedding: list[float], surface: str,
|
||||
centrality: float, tier: str, pinned: bool}). Absent / None -> the
|
||||
cache still writes assignment + rich_club and next cold-start will
|
||||
rebuild node payload from the live store walk. JSON-native shape
|
||||
(no binary serialisation) keeps the cache auditable.
|
||||
|
||||
R2: ``max_degree`` (one int) is the maximum graph degree
|
||||
at save() time. Used by the rank stage to normalise log(1+deg) into
|
||||
[0,1] without re-walking the graph on every recall. Always present
|
||||
in the payload — never participates in the iterative drop path
|
||||
(one int costs nothing against MAX_CACHE_BYTES).
|
||||
"""
|
||||
path = _cache_path(store)
|
||||
tmp_path = path.with_suffix(path.suffix + ".tmp")
|
||||
# Normalise node_payload for JSON: stringify keys, list() embeddings.
|
||||
encoded_node_payload: dict[str, dict] | None = None
|
||||
if node_payload:
|
||||
encoded_node_payload = {}
|
||||
for k, v in node_payload.items():
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
# embeddings can be numpy float32 from LanceDB
|
||||
# rows; coerce to plain Python float so json.dump does not
|
||||
# trip on "Object of type float32 is not JSON serializable".
|
||||
raw_emb = v.get("embedding") or []
|
||||
# `centrality` is now betweenness, computed once
|
||||
# during build_runtime_graph and persisted here so warm starts
|
||||
# don't recompute it. Missing/None coerces to 0.0 (legacy
|
||||
# pre-05-13 pre-compute shape). `tags`/`language` persisted
|
||||
# so SimpleRecordView surfaces the full profile_modulation
|
||||
# input set without a store.get fallback.
|
||||
raw_tags = v.get("tags") or []
|
||||
encoded_node_payload[str(k)] = {
|
||||
"embedding": [float(x) for x in raw_emb],
|
||||
"surface": str(v.get("surface", "")),
|
||||
"centrality": float(v.get("centrality") or 0.0),
|
||||
"tier": str(v.get("tier", "episodic")),
|
||||
"pinned": bool(v.get("pinned", False)),
|
||||
"tags": [str(t) for t in raw_tags if t is not None],
|
||||
"language": str(v.get("language", "en") or "en"),
|
||||
}
|
||||
|
||||
data = {
|
||||
"cache_version": CACHE_VERSION,
|
||||
"key": list(_cache_key(store)),
|
||||
"assignment": _encode_assignment(assignment),
|
||||
"rich_club": _encode_rich_club(rich_club),
|
||||
"node_payload": encoded_node_payload or {},
|
||||
# R2: max_degree is one int — survives every iterative
|
||||
# drop step below because dropping it saves no measurable bytes.
|
||||
"max_degree": int(max_degree or 0),
|
||||
"saved_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
# Size guard: the previous single-drop path only trimmed
|
||||
# ``node_payload`` and shipped whatever remained, even when the bloat
|
||||
# lived elsewhere. On an all-isolated graph (0 edges) Leiden returns
|
||||
# one community per node and ``assignment.community_centroids`` alone
|
||||
# balloons to 70+ MiB (one 384-dim float vector per record).
|
||||
#
|
||||
# Drop candidates in decreasing marginal-value order. W2 /
|
||||
# D-07, D-08, estimate the encoded byte cost BEFORE materialising
|
||||
# the JSON string, so peak RSS during save matches the final on-disk
|
||||
# file size instead of the pre-drop full payload size. ``json.dumps``
|
||||
# is called AT MOST ONCE per ``save`` invocation, after all drop
|
||||
# decisions are made. The authoritative slim output of Leiden
|
||||
# (``node_to_community``, ``top_communities``, ``modularity``,
|
||||
# ``backend``) and the ``rich_club`` list always survive -- they are
|
||||
# cheap to encode and expensive to recompute from the live store.
|
||||
if _estimate_serialised_bytes(data) > MAX_CACHE_BYTES:
|
||||
# 1) node_payload: per-record blob, rebuildable from the live
|
||||
# store walk on the next cold start.
|
||||
data["node_payload"] = {}
|
||||
if _estimate_serialised_bytes(data) > MAX_CACHE_BYTES:
|
||||
# 2) assignment.community_centroids: {UUID: [float; embed_dim]}.
|
||||
# On sparse graphs this is the biggest single field. Leiden
|
||||
# recomputes centroids on the next build.
|
||||
if isinstance(data.get("assignment"), dict):
|
||||
data["assignment"]["community_centroids"] = {}
|
||||
if _estimate_serialised_bytes(data) > MAX_CACHE_BYTES:
|
||||
# 3) assignment.mid_regions: {UUID: [UUID, ...]}. Smaller view;
|
||||
# also recomputable.
|
||||
if isinstance(data.get("assignment"), dict):
|
||||
data["assignment"]["mid_regions"] = {}
|
||||
if _estimate_serialised_bytes(data) > MAX_CACHE_BYTES:
|
||||
# Still over the cap after dropping every advisory field. Prefer
|
||||
# a clean "give up" to shipping an oversized file; the caller
|
||||
# already has the in-memory values and the next build will
|
||||
# recompute everything from the live store.
|
||||
return False
|
||||
|
||||
# Single final encode -- AT MOST ONE json.dumps per save() per D-10.
|
||||
serialised = json.dumps(data, ensure_ascii=False)
|
||||
|
||||
# W3 / encrypt the JSON payload before writing.
|
||||
# Same AES-256-GCM machinery + key as the LanceDB literal_surface
|
||||
# column. ASCII-only ciphertext (b64 envelope) lets us keep the
|
||||
# text-mode write path; on-disk plaintext canary is provably absent.
|
||||
try:
|
||||
key = _cache_encryption_key(store)
|
||||
ciphertext = encrypt_field(serialised, key, _CACHE_AAD)
|
||||
except Exception:
|
||||
# Encryption failure: skip the cache write rather than persist
|
||||
# plaintext on disk. Cache is advisory; recall path unaffected.
|
||||
try:
|
||||
sys.stderr.write(
|
||||
'{"event":"runtime_graph_cache_encrypt_failed"}\n'
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with tmp_path.open("w", encoding="ascii") as f:
|
||||
f.write(ciphertext)
|
||||
os.replace(str(tmp_path), str(path))
|
||||
return True
|
||||
except Exception:
|
||||
try:
|
||||
if tmp_path.exists():
|
||||
tmp_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
def invalidate(store: Any) -> None:
|
||||
"""Delete the cache file for ``store``. Safe when the file does not
|
||||
exist. Used by explicit ``needs_refresh`` signals and by tests that
|
||||
want a clean slate."""
|
||||
path = _cache_path(store)
|
||||
try:
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
459
src/iai_mcp/s4.py
Normal file
459
src/iai_mcp/s4.py
Normal file
|
|
@ -0,0 +1,459 @@
|
|||
"""S4 viability -- on-read consistency + monotropic proactive checks (MEM-08, D-17).
|
||||
|
||||
D-17 constitutional:
|
||||
- (e) on-read consistency: runs inside `pipeline_recall` on top-K returned
|
||||
records. Pairwise cosine with ART vigilance ρ_s4=0.97 + `contradicts`
|
||||
edge lookup. Emits `s4_contradiction` events. Populates
|
||||
`RecallResponse.hints`.
|
||||
- (f) monotropic proactive: only fires when profile.monotropism_depth[domain]
|
||||
> 0.7 AND new_record.detail_level >= 4. Scans within-domain only.
|
||||
Performance guard: if domain > 100 records, skip with warning event.
|
||||
|
||||
Plan 03-02 CONN-07 addition:
|
||||
- `run_offline_pass(store)` -- new entry point, CALLED by the daemon /
|
||||
session_exit hook. Currently runs `sigma.compute_and_emit(store)` only;
|
||||
future plans append more offline-pass items here. Failures emit
|
||||
`kind="s4_error"` and never crash the pass.
|
||||
|
||||
Explicitly forbidden (D-17 negative assertions):
|
||||
- NO `daily_scan` function (Ashby Requisite Variety violation).
|
||||
- NO `session_exit_sweep` function (Anderson activation-based violation).
|
||||
|
||||
All detected contradictions go through `events.write_event` -- no .jsonl files
|
||||
(D-STORAGE).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
import numpy as np
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryHit, MemoryRecord
|
||||
|
||||
|
||||
# D-17(e) vigilance: 0.97 for near-duplicate contradiction detection.
|
||||
# Stricter than write-path ρ=0.95: we only flag VERY close matches.
|
||||
S4_VIGILANCE_RHO = 0.97
|
||||
|
||||
# D-17(f) performance guard: skip when domain has > this many records.
|
||||
MONOTROPIC_MAX_PAIRWISE = 100
|
||||
|
||||
# D-17(f) monotropism-depth threshold.
|
||||
S4_MONOTROPIC_THETA = 0.7
|
||||
|
||||
|
||||
def _cosine(a: list[float], b: list[float]) -> float:
|
||||
"""Cosine similarity in [-1, 1]. Returns 0.0 on zero-norm inputs."""
|
||||
av = np.asarray(a, dtype=np.float32)
|
||||
bv = np.asarray(b, dtype=np.float32)
|
||||
na = float(np.linalg.norm(av))
|
||||
nb = float(np.linalg.norm(bv))
|
||||
if na == 0.0 or nb == 0.0:
|
||||
return 0.0
|
||||
return float(np.dot(av, bv) / (na * nb))
|
||||
|
||||
|
||||
def on_read_check(
|
||||
store: MemoryStore,
|
||||
hits: list[MemoryHit],
|
||||
session_id: str,
|
||||
) -> list[dict]:
|
||||
"""D-17(e) on-read consistency check.
|
||||
|
||||
Two detection paths, both run per-retrieval on the top-K hits:
|
||||
|
||||
1. `contradicts`-edge authoritative: any pair of hits connected by an
|
||||
existing `contradicts` edge is flagged regardless of cosine. This is
|
||||
the definitive route -- the user (or a prior S4 run) already said
|
||||
"these two disagree", so we surface it every time they co-retrieve.
|
||||
|
||||
2. Cosine + tag-polarity heuristic: pairs with cosine >= ρ_s4 (0.97) AND
|
||||
conflicting polarity tags ({positive,negative} or {asserted,retracted})
|
||||
are flagged as `info`-severity. or can replace this
|
||||
with NLI-based semantic contradiction.
|
||||
|
||||
Returns a list of hint dicts; each dict is shaped per
|
||||
RecallResponse.hints contract. Also writes one `s4_contradiction` event
|
||||
per detected pair to the LanceDB events table (D-STORAGE).
|
||||
|
||||
note: `on_read_check_batch` is the D-SPEED variant. It accepts
|
||||
an optional `records_cache` kwarg so pipeline_recall can reuse the cache
|
||||
it already built at stage 1 (zero extra store.get calls). This function
|
||||
is preserved as the back-compat / ad-hoc caller API (retrieve.recall
|
||||
still calls it; no records_cache available there).
|
||||
"""
|
||||
if len(hits) < 2:
|
||||
return []
|
||||
|
||||
hint_list: list[dict] = []
|
||||
|
||||
# Load records for the hit ids. Missing records are skipped silently -- a
|
||||
# recent store.delete could race us.
|
||||
records: dict[UUID, MemoryRecord] = {}
|
||||
for h in hits:
|
||||
rec = store.get(h.record_id)
|
||||
if rec is not None:
|
||||
records[h.record_id] = rec
|
||||
if len(records) < 2:
|
||||
return []
|
||||
|
||||
# Load contradicts edges among these records. We precompute the set of
|
||||
# (sorted src,dst) pairs so the pairwise loop below is O(1) lookup.
|
||||
contradict_pairs: set[tuple[str, str]] = set()
|
||||
try:
|
||||
edges_df = store.db.open_table("edges").to_pandas()
|
||||
except Exception:
|
||||
edges_df = None
|
||||
if edges_df is not None and not edges_df.empty:
|
||||
contradict_df = edges_df[edges_df["edge_type"] == "contradicts"]
|
||||
hit_ids = {str(h.record_id) for h in hits}
|
||||
for _, row in contradict_df.iterrows():
|
||||
src = row["src"]
|
||||
dst = row["dst"]
|
||||
if src in hit_ids and dst in hit_ids:
|
||||
contradict_pairs.add(tuple(sorted([src, dst])))
|
||||
|
||||
# Pairwise scan across hit records.
|
||||
hit_records = list(records.values())
|
||||
for i in range(len(hit_records)):
|
||||
for j in range(i + 1, len(hit_records)):
|
||||
a = hit_records[i]
|
||||
b = hit_records[j]
|
||||
key = tuple(sorted([str(a.id), str(b.id)]))
|
||||
sim = _cosine(a.embedding, b.embedding)
|
||||
|
||||
# Path 1: explicit edge is authoritative.
|
||||
if key in contradict_pairs:
|
||||
hint = {
|
||||
"kind": "s4_contradiction",
|
||||
"severity": "warning",
|
||||
"source_ids": [str(a.id), str(b.id)],
|
||||
"text": (
|
||||
f"inconsistency: records have a contradicts edge; "
|
||||
f"review {a.id}, {b.id}"
|
||||
),
|
||||
"similarity": sim,
|
||||
}
|
||||
hint_list.append(hint)
|
||||
write_event(
|
||||
store,
|
||||
kind="s4_contradiction",
|
||||
data={
|
||||
"source_ids": list(key),
|
||||
"similarity": sim,
|
||||
"mechanism": "contradicts_edge",
|
||||
},
|
||||
severity="warning",
|
||||
session_id=session_id,
|
||||
source_ids=[a.id, b.id],
|
||||
)
|
||||
continue
|
||||
|
||||
# Path 2: cosine + polarity-tag heuristic.
|
||||
if sim >= S4_VIGILANCE_RHO:
|
||||
a_tags = set(a.tags or [])
|
||||
b_tags = set(b.tags or [])
|
||||
polarity_conflict = (
|
||||
("positive" in a_tags and "negative" in b_tags)
|
||||
or ("negative" in a_tags and "positive" in b_tags)
|
||||
or ("asserted" in a_tags and "retracted" in b_tags)
|
||||
or ("retracted" in a_tags and "asserted" in b_tags)
|
||||
)
|
||||
if polarity_conflict:
|
||||
hint = {
|
||||
"kind": "s4_contradiction",
|
||||
"severity": "info",
|
||||
"source_ids": [str(a.id), str(b.id)],
|
||||
"text": (
|
||||
f"inconsistency: near-duplicate ({sim:.3f}) with "
|
||||
f"conflicting polarity tags"
|
||||
),
|
||||
"similarity": sim,
|
||||
}
|
||||
hint_list.append(hint)
|
||||
write_event(
|
||||
store,
|
||||
kind="s4_contradiction",
|
||||
data={
|
||||
"source_ids": list(key),
|
||||
"similarity": sim,
|
||||
"mechanism": "tag_polarity",
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
source_ids=[a.id, b.id],
|
||||
)
|
||||
return hint_list
|
||||
|
||||
|
||||
def on_read_check_batch(
|
||||
store: MemoryStore,
|
||||
hits: list[MemoryHit],
|
||||
session_id: str,
|
||||
records_cache: "dict[UUID, MemoryRecord] | None" = None,
|
||||
) -> list[dict]:
|
||||
"""Plan 02-07 D-SPEED: batched variant of on_read_check.
|
||||
|
||||
Semantically identical to on_read_check (returns the same hint-shape list,
|
||||
emits the same events). The ONLY difference is the record-loading step:
|
||||
|
||||
- If `records_cache` is provided, use it directly. ZERO store.get calls.
|
||||
- Otherwise, do ONE `store.all_records()` call instead of N `store.get()`
|
||||
calls. ZERO per-hit round-trips either way.
|
||||
|
||||
The pairwise contradiction-detection loop, the polarity-tag heuristic, the
|
||||
vigilance threshold (S4_VIGILANCE_RHO), and the event-emission logic are
|
||||
byte-for-byte equivalent to on_read_check.
|
||||
|
||||
Why this is the perf-critical surface (D-SPEED SC-6):
|
||||
Pre-fix: pipeline_recall built records_cache at stage 1, then s4.on_read_check
|
||||
called `store.get(h.record_id)` per hit -- every call is a full
|
||||
to_pandas() scan (~140ms each at N=100 on executor hardware).
|
||||
Post-fix: pipeline_recall passes records_cache through; s4 does zero extra
|
||||
round-trips. Saves ~140ms per hit x N hits per recall.
|
||||
"""
|
||||
if len(hits) < 2:
|
||||
return []
|
||||
|
||||
hint_list: list[dict] = []
|
||||
|
||||
# Load records via cache (preferred) or one batched fallback.
|
||||
records: dict[UUID, MemoryRecord] = {}
|
||||
if records_cache is not None:
|
||||
for h in hits:
|
||||
rec = records_cache.get(h.record_id)
|
||||
if rec is not None:
|
||||
records[h.record_id] = rec
|
||||
else:
|
||||
all_recs = store.all_records()
|
||||
by_id = {r.id: r for r in all_recs}
|
||||
for h in hits:
|
||||
rec = by_id.get(h.record_id)
|
||||
if rec is not None:
|
||||
records[h.record_id] = rec
|
||||
if len(records) < 2:
|
||||
return []
|
||||
|
||||
# Load contradicts edges among these records. One edges.to_pandas() scan
|
||||
# (same as on_read_check).
|
||||
contradict_pairs: set[tuple[str, str]] = set()
|
||||
try:
|
||||
edges_df = store.db.open_table("edges").to_pandas()
|
||||
except Exception:
|
||||
edges_df = None
|
||||
if edges_df is not None and not edges_df.empty:
|
||||
contradict_df = edges_df[edges_df["edge_type"] == "contradicts"]
|
||||
hit_ids = {str(h.record_id) for h in hits}
|
||||
for _, row in contradict_df.iterrows():
|
||||
src = row["src"]
|
||||
dst = row["dst"]
|
||||
if src in hit_ids and dst in hit_ids:
|
||||
contradict_pairs.add(tuple(sorted([src, dst])))
|
||||
|
||||
# Pairwise scan -- identical logic to on_read_check.
|
||||
hit_records = list(records.values())
|
||||
for i in range(len(hit_records)):
|
||||
for j in range(i + 1, len(hit_records)):
|
||||
a = hit_records[i]
|
||||
b = hit_records[j]
|
||||
key = tuple(sorted([str(a.id), str(b.id)]))
|
||||
sim = _cosine(a.embedding, b.embedding)
|
||||
|
||||
# Path 1: explicit edge is authoritative.
|
||||
if key in contradict_pairs:
|
||||
hint = {
|
||||
"kind": "s4_contradiction",
|
||||
"severity": "warning",
|
||||
"source_ids": [str(a.id), str(b.id)],
|
||||
"text": (
|
||||
f"inconsistency: records have a contradicts edge; "
|
||||
f"review {a.id}, {b.id}"
|
||||
),
|
||||
"similarity": sim,
|
||||
}
|
||||
hint_list.append(hint)
|
||||
write_event(
|
||||
store,
|
||||
kind="s4_contradiction",
|
||||
data={
|
||||
"source_ids": list(key),
|
||||
"similarity": sim,
|
||||
"mechanism": "contradicts_edge",
|
||||
},
|
||||
severity="warning",
|
||||
session_id=session_id,
|
||||
source_ids=[a.id, b.id],
|
||||
)
|
||||
continue
|
||||
|
||||
# Path 2: cosine + polarity-tag heuristic.
|
||||
if sim >= S4_VIGILANCE_RHO:
|
||||
a_tags = set(a.tags or [])
|
||||
b_tags = set(b.tags or [])
|
||||
polarity_conflict = (
|
||||
("positive" in a_tags and "negative" in b_tags)
|
||||
or ("negative" in a_tags and "positive" in b_tags)
|
||||
or ("asserted" in a_tags and "retracted" in b_tags)
|
||||
or ("retracted" in a_tags and "asserted" in b_tags)
|
||||
)
|
||||
if polarity_conflict:
|
||||
hint = {
|
||||
"kind": "s4_contradiction",
|
||||
"severity": "info",
|
||||
"source_ids": [str(a.id), str(b.id)],
|
||||
"text": (
|
||||
f"inconsistency: near-duplicate ({sim:.3f}) with "
|
||||
f"conflicting polarity tags"
|
||||
),
|
||||
"similarity": sim,
|
||||
}
|
||||
hint_list.append(hint)
|
||||
write_event(
|
||||
store,
|
||||
kind="s4_contradiction",
|
||||
data={
|
||||
"source_ids": list(key),
|
||||
"similarity": sim,
|
||||
"mechanism": "tag_polarity",
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
source_ids=[a.id, b.id],
|
||||
)
|
||||
return hint_list
|
||||
|
||||
|
||||
def monotropic_proactive_check(
|
||||
store: MemoryStore,
|
||||
new_record: MemoryRecord,
|
||||
profile_state: dict,
|
||||
session_id: str,
|
||||
) -> list[dict]:
|
||||
"""D-17(f) monotropic proactive check.
|
||||
|
||||
Three gates (all must pass):
|
||||
|
||||
1. `profile_state["monotropism_depth"][domain] > θ_deep` (0.7). The user's
|
||||
autistic profile indicates DEEP focus in this domain -- we're willing
|
||||
to spend cycles checking for near-duplicates.
|
||||
2. `new_record.detail_level >= 4`. Shallow records (detail 1-3) don't
|
||||
warrant the pairwise scan.
|
||||
3. `new_record` carries a `domain:<name>` tag. Records without a domain
|
||||
tag are excluded (nothing to compare against).
|
||||
|
||||
Performance guard: if the domain has > MONOTROPIC_MAX_PAIRWISE records,
|
||||
skip the scan and emit a `s4_monotropic_skip` warning event. The scan is
|
||||
O(N) cosine comparisons; 100 is a reasonable ceiling.
|
||||
|
||||
Rule 1 deviation: if `profile_state["monotropism_depth"]` is not a dict
|
||||
(type drift), degrade silently to empty hints (no exception).
|
||||
"""
|
||||
md = profile_state.get("monotropism_depth", {})
|
||||
if not isinstance(md, dict):
|
||||
return [] # profile_state wrongly typed -- degrade silently
|
||||
|
||||
# Locate the record's domain tag ("domain:coding", "domain:gardening", ...)
|
||||
domain_tag: str | None = next(
|
||||
(t for t in (new_record.tags or []) if t.startswith("domain:")),
|
||||
None,
|
||||
)
|
||||
if domain_tag is None:
|
||||
return []
|
||||
|
||||
# Gate 1: monotropism depth must exceed θ_deep.
|
||||
domain_name = domain_tag.split(":", 1)[1]
|
||||
depth = md.get(domain_name, 0.0)
|
||||
if depth <= S4_MONOTROPIC_THETA:
|
||||
return []
|
||||
|
||||
# Gate 2: detail_level must be >= 4.
|
||||
if new_record.detail_level < 4:
|
||||
return []
|
||||
|
||||
# Load same-domain records (excluding the new record itself).
|
||||
same_domain = [
|
||||
r for r in store.all_records()
|
||||
if (r.tags or []) and domain_tag in r.tags and r.id != new_record.id
|
||||
]
|
||||
|
||||
# Performance guard: skip + warn above ceiling.
|
||||
if len(same_domain) > MONOTROPIC_MAX_PAIRWISE:
|
||||
write_event(
|
||||
store,
|
||||
kind="s4_monotropic_skip",
|
||||
data={
|
||||
"domain": domain_tag,
|
||||
"count": len(same_domain),
|
||||
"record_id": str(new_record.id),
|
||||
},
|
||||
severity="warning",
|
||||
domain=domain_tag,
|
||||
session_id=session_id,
|
||||
)
|
||||
return []
|
||||
|
||||
hints: list[dict] = []
|
||||
for r in same_domain:
|
||||
sim = _cosine(new_record.embedding, r.embedding)
|
||||
if sim >= S4_VIGILANCE_RHO:
|
||||
hint = {
|
||||
"kind": "s4_monotropic_contradiction",
|
||||
"severity": "info",
|
||||
"source_ids": [str(new_record.id), str(r.id)],
|
||||
"text": (
|
||||
f"monotropic near-duplicate in {domain_tag}: sim={sim:.3f}"
|
||||
),
|
||||
"similarity": sim,
|
||||
}
|
||||
hints.append(hint)
|
||||
write_event(
|
||||
store,
|
||||
kind="s4_monotropic_contradiction",
|
||||
data={
|
||||
"domain": domain_tag,
|
||||
"source_ids": [str(new_record.id), str(r.id)],
|
||||
"similarity": sim,
|
||||
},
|
||||
severity="info",
|
||||
domain=domain_tag,
|
||||
session_id=session_id,
|
||||
source_ids=[new_record.id, r.id],
|
||||
)
|
||||
return hints
|
||||
|
||||
|
||||
def run_offline_pass(store: MemoryStore) -> dict:
|
||||
"""Plan 03-02 CONN-07: S4 offline-pass entry point.
|
||||
|
||||
Called by the daemon's offline cycle (or by session_exit / cron).
|
||||
Currently runs ONE check: `sigma.compute_and_emit(store)` -- which writes
|
||||
`kind=sigma_observation` (developmental / healthy / insufficient_data) OR
|
||||
`kind=sigma_drift` (mid_life_drift) and (in developmental phase) bumps the
|
||||
Hebbian rate via a `profile_updated` event.
|
||||
|
||||
Failures are caught and emitted as `kind="s4_error"`; the pass does NOT
|
||||
crash. This mirrors the diagnostic discipline of `on_read_check`:
|
||||
S4 work is observation, never blocks reads or writes.
|
||||
|
||||
Returns a dict with the per-step outcome:
|
||||
{"sigma": <snapshot dict or {"error": "..."}>}
|
||||
"""
|
||||
from iai_mcp import sigma # local import; sigma is heavy (networkx)
|
||||
|
||||
out: dict = {}
|
||||
try:
|
||||
out["sigma"] = sigma.compute_and_emit(store)
|
||||
except Exception as exc: # noqa: BLE001 - diagnostic catch-all
|
||||
try:
|
||||
write_event(
|
||||
store,
|
||||
kind="s4_error",
|
||||
data={"step": "sigma", "error": repr(exc)},
|
||||
severity="warning",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
out["sigma"] = {"error": repr(exc)}
|
||||
return out
|
||||
417
src/iai_mcp/s5.py
Normal file
417
src/iai_mcp/s5.py
Normal file
|
|
@ -0,0 +1,417 @@
|
|||
"""S5 identity kernel -- invariant protection via M-of-N consensus (MEM-09, D-22).
|
||||
|
||||
D-22 constitutional rules enforced here:
|
||||
- ρ_identity = 0.99 (stricter than write-path ρ=0.95 and S4 ρ_s4=0.97).
|
||||
- 3-of-5 session-window consensus: an invariant update only commits after 3
|
||||
vigilance-passing proposals within the consensus window. A single-session
|
||||
attacker (e.g. prompt injection) cannot reach M by itself.
|
||||
- 48h cooldown: after a commit, any subsequent proposal on the same anchor
|
||||
is rejected for 48h. Prevents rapid sequential poisoning.
|
||||
- TRUST_THRESHOLD_IDENTITY = 0.9: records with s5_trust_score >= 0.9 are
|
||||
"invariant-tier". Direct writes bypassing propose_invariant_update are
|
||||
rejected by `check_identity_anchor_on_write`.
|
||||
- All commits emit `s5_invariant_update` events with full provenance
|
||||
(proposal history, session_ids, similarity scores).
|
||||
|
||||
Proposal events (kind=s5_invariant_proposal) are emitted for EVERY proposal
|
||||
so the M-of-N tally can be reconstructed from the events table alone -- no
|
||||
hidden in-memory state. Cooldown lookups read kind=s5_invariant_update.
|
||||
|
||||
Plan 02-05 additions (OPS-07 / gradual-drift detection):
|
||||
- `detect_drift_anomaly` reads trajectory_metric events for M4 (profile-vector
|
||||
variance). When the last `window_sessions` consecutive values have been
|
||||
monotonically increasing (was-decreasing becoming increasing), emits an
|
||||
s5_drift_alert event. User audit via `iai-mcp audit drift` surfaces these.
|
||||
- `audit_identity_events` aggregates s5_* + shield_* + s5_drift_alert events
|
||||
chronologically (newest first) for `iai-mcp audit` / `audit identity`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import numpy as np
|
||||
|
||||
from iai_mcp.aaak import enforce_language_tagged, generate_aaak_index
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
# ------------------------------------------------------------ constitutional constants
|
||||
|
||||
IDENTITY_VIGILANCE_RHO: float = 0.99 # strict vigilance on identity updates
|
||||
S5_CONSENSUS_M: int = 3 # 3-of-5: required agreeing proposals
|
||||
S5_CONSENSUS_N: int = 5 # 3-of-5: window size
|
||||
COOLDOWN_HOURS: int = 48 # cooldown after a commit
|
||||
TRUST_THRESHOLD_IDENTITY: float = 0.9 # score >= this => invariant-tier record
|
||||
CONSENSUS_WINDOW_HOURS: int = 24 # all M votes must land within this window
|
||||
|
||||
|
||||
# ------------------------------------------------------------ private helpers
|
||||
|
||||
|
||||
def _cosine(a: list[float], b: list[float]) -> float:
|
||||
av = np.asarray(a, dtype=np.float32)
|
||||
bv = np.asarray(b, dtype=np.float32)
|
||||
na = float(np.linalg.norm(av))
|
||||
nb = float(np.linalg.norm(bv))
|
||||
if na == 0.0 or nb == 0.0:
|
||||
return 0.0
|
||||
return float(np.dot(av, bv) / (na * nb))
|
||||
|
||||
|
||||
def _recent_proposals_for(
|
||||
store: MemoryStore, anchor_id: UUID,
|
||||
) -> list[dict]:
|
||||
"""Return all s5_invariant_proposal events for this anchor inside the
|
||||
consensus window, newest first."""
|
||||
since = datetime.now(timezone.utc) - timedelta(hours=CONSENSUS_WINDOW_HOURS)
|
||||
events = query_events(store, kind="s5_invariant_proposal", since=since, limit=100)
|
||||
return [e for e in events if e["data"].get("anchor_id") == str(anchor_id)]
|
||||
|
||||
|
||||
def _in_cooldown(store: MemoryStore, anchor_id: UUID) -> bool:
|
||||
"""True iff an s5_invariant_update for this anchor landed in the last COOLDOWN_HOURS."""
|
||||
since = datetime.now(timezone.utc) - timedelta(hours=COOLDOWN_HOURS)
|
||||
events = query_events(store, kind="s5_invariant_update", since=since, limit=10)
|
||||
for e in events:
|
||||
if e["data"].get("anchor_id") == str(anchor_id):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ------------------------------------------------------------ public API
|
||||
|
||||
|
||||
def propose_invariant_update(
|
||||
store: MemoryStore,
|
||||
anchor_id: UUID,
|
||||
new_fact: str,
|
||||
session_id: str,
|
||||
) -> tuple[str, UUID | None]:
|
||||
"""D-22 M-of-N voting on identity-tier updates.
|
||||
|
||||
Workflow:
|
||||
1. If the anchor is in 48h cooldown, reject (``cooldown``).
|
||||
2. If the anchor does not exist, reject (``rejected``).
|
||||
3. Encode the proposed fact; compute cosine against the anchor.
|
||||
4. Log an `s5_invariant_proposal` event regardless of vigilance outcome.
|
||||
(This is how the M-of-N tally is reconstructed on subsequent calls.)
|
||||
5. Count vigilance-passing proposals in the current consensus window.
|
||||
- If >= M (3): commit -- insert new record, create invariant_anchor
|
||||
edge, log `s5_invariant_update` event, return ("committed", new_id).
|
||||
- Else if total >= N (5) proposals in window: reject (``rejected``).
|
||||
- Else: stage (``staged``), return the proposal UUID.
|
||||
|
||||
Returns one of:
|
||||
("cooldown", None)
|
||||
("rejected", None)
|
||||
("staged", proposal_id)
|
||||
("committed", new_record_id)
|
||||
"""
|
||||
# Step 1: cooldown gate.
|
||||
if _in_cooldown(store, anchor_id):
|
||||
write_event(
|
||||
store,
|
||||
kind="s5_cooldown_block",
|
||||
data={"anchor_id": str(anchor_id), "session_id": session_id},
|
||||
severity="warning",
|
||||
session_id=session_id,
|
||||
source_ids=[anchor_id],
|
||||
)
|
||||
return "cooldown", None
|
||||
|
||||
# Step 2: anchor existence.
|
||||
anchor = store.get(anchor_id)
|
||||
if anchor is None:
|
||||
return "rejected", None
|
||||
|
||||
# Step 3: encode proposed fact + compute vigilance similarity.
|
||||
from iai_mcp.embed import embedder_for_store
|
||||
emb = embedder_for_store(store).embed(new_fact)
|
||||
sim = _cosine(anchor.embedding, emb)
|
||||
passes_vigilance = sim >= IDENTITY_VIGILANCE_RHO
|
||||
|
||||
# Step 4: log the proposal (counts toward N).
|
||||
proposal_id = uuid4()
|
||||
write_event(
|
||||
store,
|
||||
kind="s5_invariant_proposal",
|
||||
data={
|
||||
"proposal_id": str(proposal_id),
|
||||
"anchor_id": str(anchor_id),
|
||||
"new_fact": new_fact[:200], # payload size cap (T-02-02-05)
|
||||
"similarity": sim,
|
||||
"passes_vigilance": passes_vigilance,
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
source_ids=[anchor_id],
|
||||
)
|
||||
|
||||
# Step 5: tally.
|
||||
recent = _recent_proposals_for(store, anchor_id)
|
||||
agree_count = sum(1 for r in recent if r["data"].get("passes_vigilance"))
|
||||
total = len(recent)
|
||||
|
||||
if agree_count >= S5_CONSENSUS_M:
|
||||
# COMMIT: create the invariant_anchor edge + log the update.
|
||||
now = datetime.now(timezone.utc)
|
||||
updated = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier=anchor.tier,
|
||||
literal_surface=new_fact,
|
||||
aaak_index="",
|
||||
embedding=emb,
|
||||
community_id=anchor.community_id,
|
||||
centrality=anchor.centrality,
|
||||
detail_level=anchor.detail_level,
|
||||
pinned=anchor.pinned,
|
||||
stability=anchor.stability,
|
||||
difficulty=anchor.difficulty,
|
||||
last_reviewed=now,
|
||||
never_decay=True,
|
||||
never_merge=True,
|
||||
provenance=[
|
||||
{
|
||||
"ts": now.isoformat(),
|
||||
"cue": "s5_consensus",
|
||||
"session_id": session_id,
|
||||
}
|
||||
],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[*anchor.tags, "s5_consensus"],
|
||||
language=anchor.language or "en",
|
||||
s5_trust_score=min(1.0, anchor.s5_trust_score + 0.05),
|
||||
profile_modulation_gain=dict(anchor.profile_modulation_gain),
|
||||
schema_version=2,
|
||||
)
|
||||
enforce_language_tagged(updated, detect=False)
|
||||
updated.aaak_index = generate_aaak_index(updated)
|
||||
store.insert(updated)
|
||||
store.boost_edges(
|
||||
[(anchor_id, updated.id)],
|
||||
edge_type="invariant_anchor",
|
||||
delta=1.0,
|
||||
)
|
||||
write_event(
|
||||
store,
|
||||
kind="s5_invariant_update",
|
||||
data={
|
||||
"anchor_id": str(anchor_id),
|
||||
"new_record_id": str(updated.id),
|
||||
"session_ids": [r["session_id"] for r in recent],
|
||||
"agree_count": agree_count,
|
||||
"total_proposals": total,
|
||||
"similarity": sim,
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
source_ids=[anchor_id, updated.id],
|
||||
)
|
||||
return "committed", updated.id
|
||||
|
||||
if total >= S5_CONSENSUS_N:
|
||||
return "rejected", None
|
||||
|
||||
return "staged", proposal_id
|
||||
|
||||
|
||||
def check_identity_anchor_on_write(
|
||||
store: MemoryStore,
|
||||
record: MemoryRecord,
|
||||
profile_state: dict,
|
||||
) -> tuple[bool, str]:
|
||||
"""Guard invoked by write paths that accept externally-originated records.
|
||||
|
||||
Records with s5_trust_score >= TRUST_THRESHOLD_IDENTITY (0.9) are
|
||||
considered invariant-tier. They may NOT be written through any path that
|
||||
bypasses propose_invariant_update (D-22 consensus requirement).
|
||||
|
||||
extension (OPS-07, D-31): the shield is evaluated in
|
||||
HARD_BLOCK tier BEFORE the consensus marker check. Any detected
|
||||
injection signal short-circuits with "shield HARD_BLOCK" -- a
|
||||
mitigation for the "direct override" branch of the threat model.
|
||||
|
||||
cross-lingual warning: an identity update whose
|
||||
language differs from the existing pinned identity anchor(s) emits a
|
||||
`identity_cross_lingual_warning` event but does NOT block -- multi-lingual
|
||||
identity refinement is a design goal of the global-product roadmap. The
|
||||
warning surfaces via `iai-mcp audit identity` for user review.
|
||||
|
||||
We distinguish between:
|
||||
- DIRECT identity writes (reject): s5_trust_score >= 0.9 and no
|
||||
`s5_consensus` tag -- attacker trying to plant an invariant.
|
||||
- CONSENSUS-PROMOTED writes (accept): s5_trust_score >= 0.9 and
|
||||
`s5_consensus` tag present -- output of propose_invariant_update's
|
||||
own store.insert call.
|
||||
- NORMAL writes (accept): s5_trust_score < 0.9 -- below identity tier.
|
||||
"""
|
||||
if record.s5_trust_score < TRUST_THRESHOLD_IDENTITY:
|
||||
return True, ""
|
||||
|
||||
# shield HARD_BLOCK pre-check on identity-tier writes.
|
||||
from iai_mcp.shield import ShieldTier, evaluate_injection_risk
|
||||
|
||||
shield_verdict = evaluate_injection_risk(
|
||||
record.literal_surface or "",
|
||||
ShieldTier.HARD_BLOCK,
|
||||
target_language=record.language or None,
|
||||
)
|
||||
if shield_verdict.action == "reject":
|
||||
return (
|
||||
False,
|
||||
f"shield HARD_BLOCK: {shield_verdict.reason}",
|
||||
)
|
||||
|
||||
if "s5_consensus" not in (record.tags or []):
|
||||
return (
|
||||
False,
|
||||
"identity-tier write (s5_trust_score >= 0.9) requires "
|
||||
"propose_invariant_update consensus; direct inserts forbidden "
|
||||
"(D-22).",
|
||||
)
|
||||
|
||||
# cross-lingual warning. Non-fatal: emit an event and
|
||||
# continue. Inspect the existing pinned identity anchors for a language
|
||||
# mismatch with the incoming record.
|
||||
try:
|
||||
anchors_with_other_lang = [
|
||||
r for r in store.all_records()
|
||||
if r.pinned
|
||||
and r.s5_trust_score >= TRUST_THRESHOLD_IDENTITY
|
||||
and (r.language or "") != ""
|
||||
and (r.language or "") != (record.language or "")
|
||||
]
|
||||
except Exception:
|
||||
anchors_with_other_lang = []
|
||||
if anchors_with_other_lang:
|
||||
anchor_langs = sorted({
|
||||
r.language for r in anchors_with_other_lang if r.language
|
||||
})
|
||||
write_event(
|
||||
store,
|
||||
kind="identity_cross_lingual_warning",
|
||||
data={
|
||||
"record_id": str(record.id),
|
||||
"record_language": record.language,
|
||||
"existing_anchor_languages": anchor_langs,
|
||||
},
|
||||
severity="warning",
|
||||
session_id="-",
|
||||
source_ids=[record.id],
|
||||
)
|
||||
|
||||
return True, ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------- drift detection
|
||||
|
||||
# Relevant kinds for user audit surface. aggregates these under
|
||||
# `iai-mcp audit`.
|
||||
AUDIT_EVENT_KINDS: tuple[str, ...] = (
|
||||
"s5_invariant_update",
|
||||
"s5_invariant_proposal",
|
||||
"s5_cooldown_block",
|
||||
"s5_drift_alert",
|
||||
"shield_rejection",
|
||||
"shield_flag",
|
||||
"identity_cross_lingual_warning",
|
||||
)
|
||||
|
||||
|
||||
def detect_drift_anomaly(
|
||||
store: MemoryStore,
|
||||
window_sessions: int = 5,
|
||||
) -> list[dict]:
|
||||
"""D-30 gradual-drift detection via trajectory M4 reversal.
|
||||
|
||||
Reads trajectory_metric events filtered to metric=m4 (profile-vector
|
||||
variance). The expected direction is DECREASING (the profile is
|
||||
converging as the user is learnt over time). When the last
|
||||
`window_sessions` values are monotonically INCREASING or mostly so
|
||||
(at least window_sessions - 2 adjacent pairs increase), emits an
|
||||
s5_drift_alert event and returns the alert payload in a list.
|
||||
|
||||
Returns [] on insufficient data or no drift.
|
||||
"""
|
||||
events = query_events(store, kind="trajectory_metric", limit=1000)
|
||||
m4: list[tuple] = []
|
||||
for e in events:
|
||||
data = e.get("data") or {}
|
||||
if data.get("metric") != "m4":
|
||||
continue
|
||||
try:
|
||||
v = float(data.get("value", 0.0))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
ts = e.get("ts")
|
||||
m4.append((ts, v))
|
||||
|
||||
if len(m4) < window_sessions:
|
||||
return []
|
||||
|
||||
# Sort ascending (oldest first) so "recent" slice is the tail.
|
||||
try:
|
||||
m4.sort(key=lambda x: x[0])
|
||||
except TypeError:
|
||||
# Fallback: if ts objects are not comparable, keep insertion order.
|
||||
pass
|
||||
recent = m4[-window_sessions:]
|
||||
|
||||
increases = 0
|
||||
for i in range(1, len(recent)):
|
||||
if recent[i][1] > recent[i - 1][1]:
|
||||
increases += 1
|
||||
|
||||
# Drift signature: most of the window-1 adjacent steps are increasing.
|
||||
# For window_sessions=5, require increases >= 3 (at least 3 of 4 steps up).
|
||||
# For window_sessions=3, require increases >= 1 (at least 1 of 2 steps up).
|
||||
threshold = max(1, window_sessions - 2)
|
||||
if increases < threshold:
|
||||
return []
|
||||
|
||||
alert = {
|
||||
"kind": "s5_drift_alert",
|
||||
"severity": "warning",
|
||||
"window_sessions": window_sessions,
|
||||
"increases": increases,
|
||||
"first_value": float(recent[0][1]),
|
||||
"last_value": float(recent[-1][1]),
|
||||
}
|
||||
write_event(
|
||||
store,
|
||||
kind="s5_drift_alert",
|
||||
data={
|
||||
"window_sessions": window_sessions,
|
||||
"increases": increases,
|
||||
"first_value": alert["first_value"],
|
||||
"last_value": alert["last_value"],
|
||||
},
|
||||
severity="warning",
|
||||
)
|
||||
return [alert]
|
||||
|
||||
|
||||
def audit_identity_events(
|
||||
store: MemoryStore,
|
||||
since: datetime | None = None,
|
||||
kinds: tuple[str, ...] = AUDIT_EVENT_KINDS,
|
||||
) -> list[dict]:
|
||||
"""Aggregate identity-relevant events chronologically (newest first).
|
||||
|
||||
Used by `iai-mcp audit` + `audit identity` / `audit shield` / `audit drift`
|
||||
CLI subcommands. By default returns the full set of audit kinds; callers
|
||||
may pass a subset (e.g. only s5_* for `audit identity`).
|
||||
"""
|
||||
out: list[dict] = []
|
||||
for kind in kinds:
|
||||
out.extend(query_events(store, kind=kind, since=since, limit=500))
|
||||
# Newest first by ts; coerce to comparable form (fallback to id-based).
|
||||
try:
|
||||
out.sort(key=lambda e: e.get("ts"), reverse=True)
|
||||
except TypeError:
|
||||
pass
|
||||
return out
|
||||
551
src/iai_mcp/schema.py
Normal file
551
src/iai_mcp/schema.py
Normal file
|
|
@ -0,0 +1,551 @@
|
|||
"""Schema induction (LEARN-03, D-18, D-21) -- Task 3.
|
||||
|
||||
D-18 (scheduling): dual-path schema surfacing.
|
||||
- Primary: batch induction inside the heavy sleep cycle. Tier-1 Haiku
|
||||
extraction when `should_call_llm` permits, Tier-0 cooccurrence + TF-IDF
|
||||
fallback otherwise.
|
||||
- Secondary: entropy-gated provisional schemas surfaced during
|
||||
`pipeline_recall` when score distribution entropy > 0.8 bits AND the
|
||||
cohesive community has >= 2 shared tags.
|
||||
|
||||
D-21 (thresholds, autism-aware):
|
||||
- Auto-induct when co_occurrence >= 5 AND confidence >= 0.85.
|
||||
- User-approval flag at co_occurrence in [3, 5) AND confidence in [0.65, 0.85).
|
||||
- Below: discard.
|
||||
- Exceptions preserved as first-class records (never absorbed).
|
||||
- Abstraction level: concrete (Dawson-Mottron Raven's preference).
|
||||
|
||||
Schema records are first-class hubs:
|
||||
- tier="semantic", detail_level=3 -> never_decay=True.
|
||||
- schema_instance_of edges from evidence -> schema never decay.
|
||||
- pipeline routing can prioritise schema records when pattern
|
||||
matches.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from collections import Counter
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Iterable
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord, SCHEMA_VERSION_CURRENT
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- constants
|
||||
|
||||
AUTO_INDUCT_COOCCURRENCE: int = 5
|
||||
AUTO_INDUCT_CONFIDENCE: float = 0.85
|
||||
USER_APPROVAL_COOCCURRENCE: int = 3
|
||||
USER_APPROVAL_CONFIDENCE: float = 0.65
|
||||
MAX_EVIDENCE_PER_SCHEMA: int = 50
|
||||
PROVISIONAL_ENTROPY_MIN: float = 0.8
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- candidate
|
||||
|
||||
|
||||
@dataclass
|
||||
class SchemaCandidate:
|
||||
"""One schema candidate surfaced by induce_schemas_*."""
|
||||
|
||||
pattern: str
|
||||
confidence: float
|
||||
evidence_count: int
|
||||
evidence_ids: list[UUID] = field(default_factory=list)
|
||||
domain: str | None = None
|
||||
exceptions: list[UUID] = field(default_factory=list)
|
||||
status: str = "auto" # "auto" | "pending_user_approval"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- Tier-0 induction
|
||||
|
||||
|
||||
def _tag_cooccurrence(records: Iterable) -> dict:
|
||||
"""Bucket records by tag-pair frequency. Returns {frozenset(pair): [record_ids]}.
|
||||
|
||||
Phase 07.7-04 D-26-A: accepts either ``list[MemoryRecord]`` (back-compat;
|
||||
used by external callers passing dataclass instances) or an iterable of
|
||||
projected ``dict`` rows from ``store.iter_record_columns(["id", "tags_json"])``.
|
||||
|
||||
Dispatch is duck-typed: items with a ``.tags`` attribute are treated as
|
||||
MemoryRecord; items without are treated as dict rows. This keeps both
|
||||
surfaces alive while migrating the production path off ``all_records()``.
|
||||
|
||||
For dict rows, ``tags_json`` is parsed defensively (mirrors the W3
|
||||
pattern in ``sleep._tier0_schema_surfacing`` — corrupted rows contribute
|
||||
zero counts but do not crash). The ``id`` field arrives as a string from
|
||||
LanceDB and is converted to ``UUID`` here so callers always see
|
||||
``list[UUID]`` evidence_ids regardless of which input shape was passed.
|
||||
"""
|
||||
pairs: dict = {}
|
||||
for r in records:
|
||||
# Dispatch on duck-typing: MemoryRecord has .tags + .id attributes;
|
||||
# dict rows have ["tags_json"] + ["id"] keys.
|
||||
if hasattr(r, "tags"):
|
||||
# MemoryRecord path (back-compat for external/test callers).
|
||||
raw_tags = r.tags or []
|
||||
rid = r.id
|
||||
else:
|
||||
# Dict-row path (D-26-A migrated production path). Defensive parse:
|
||||
# malformed tags_json contributes zero pairs but does not raise.
|
||||
tags_raw = r.get("tags_json") or "[]"
|
||||
try:
|
||||
raw_tags = json.loads(tags_raw) if tags_raw else []
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
raw_tags = []
|
||||
id_raw = r.get("id")
|
||||
if id_raw is None:
|
||||
continue
|
||||
# iter_record_columns yields id as a string; convert to UUID at
|
||||
# the boundary so SchemaCandidate.evidence_ids stays list[UUID].
|
||||
try:
|
||||
rid = UUID(id_raw) if isinstance(id_raw, str) else id_raw
|
||||
except (ValueError, AttributeError):
|
||||
continue
|
||||
|
||||
tags = [
|
||||
t for t in raw_tags
|
||||
if not t.startswith("raw:") and not t.startswith("domain:")
|
||||
]
|
||||
for i in range(len(tags)):
|
||||
for j in range(i + 1, len(tags)):
|
||||
key = frozenset([tags[i], tags[j]])
|
||||
pairs.setdefault(key, []).append(rid)
|
||||
return pairs
|
||||
|
||||
|
||||
def induce_schemas_tier0(store: MemoryStore) -> list[SchemaCandidate]:
|
||||
"""D-18 Tier-0 path: tag cooccurrence + TF-IDF; no LLM.
|
||||
|
||||
Returns a list of SchemaCandidate. Each candidate passes the gate:
|
||||
- status="auto" -> count >= 5 AND confidence >= 0.85
|
||||
- status="pending_user_approval" -> count in [3,5) AND confidence in [0.65, 0.85)
|
||||
|
||||
Phase 07.7-04 D-26-A: streams via ``store.iter_record_columns(
|
||||
["id", "tags_json"], batch_size=1024)`` instead of ``store.all_records()``.
|
||||
Encrypted columns (literal_surface, provenance_json,
|
||||
profile_modulation_gain_json) are NEVER read on this path; the W5 cipher
|
||||
cache is short-circuited entirely. On the 8105-record production store
|
||||
this saves ~16210 AES-GCM operations + ~14.5 MB literal_surface
|
||||
materialisation per ``run_heavy_consolidation`` invocation, and unblocks
|
||||
the W4 ≤1 ``all_records()`` invariant on the heavy cycle.
|
||||
|
||||
Single-pass record-count tally: count_total is incremented inside the
|
||||
iterator loop and the ``< CLUSTER_MIN_SIZE`` floor is checked afterwards.
|
||||
Mirrors the pattern in ``sleep._tier0_schema_surfacing`` (Plan 07.7-03 W3).
|
||||
"""
|
||||
rows = list(store.iter_record_columns(["id", "tags_json"], batch_size=1024))
|
||||
if len(rows) < 3:
|
||||
return []
|
||||
|
||||
pair_counts = _tag_cooccurrence(rows)
|
||||
candidates: list[SchemaCandidate] = []
|
||||
for pair, evidence in pair_counts.items():
|
||||
count = len(evidence)
|
||||
# Heuristic confidence: saturates toward 1.0 at 10+ evidence records.
|
||||
confidence = min(1.0, count / 10.0)
|
||||
pattern = f"tags:{'+'.join(sorted(pair))}"
|
||||
if count >= AUTO_INDUCT_COOCCURRENCE and confidence >= AUTO_INDUCT_CONFIDENCE:
|
||||
status = "auto"
|
||||
elif (
|
||||
USER_APPROVAL_COOCCURRENCE <= count < AUTO_INDUCT_COOCCURRENCE
|
||||
and confidence >= USER_APPROVAL_CONFIDENCE
|
||||
):
|
||||
status = "pending_user_approval"
|
||||
else:
|
||||
continue
|
||||
candidates.append(
|
||||
SchemaCandidate(
|
||||
pattern=pattern,
|
||||
confidence=confidence,
|
||||
evidence_count=count,
|
||||
evidence_ids=list(evidence[:MAX_EVIDENCE_PER_SCHEMA]),
|
||||
status=status,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- Tier-1 w/ D-GUARD
|
||||
|
||||
|
||||
def induce_schemas_tier1(
|
||||
store: MemoryStore,
|
||||
budget: BudgetLedger,
|
||||
rate: RateLimitLedger,
|
||||
llm_enabled: bool = True,
|
||||
) -> list[SchemaCandidate]:
|
||||
"""D-18 Tier-1 path: Haiku extraction gated by D-GUARD ladder.
|
||||
|
||||
When should_call_llm returns False (any ladder step), emit an
|
||||
llm_health event and delegate to `induce_schemas_tier0`.
|
||||
|
||||
scope: the Tier-1 branch is reserved; wires the
|
||||
actual anthropic.batches.create call. This function's contract is: on
|
||||
allow, call budget.record_spend and emit llm_health; then fall back to
|
||||
tier0 (because real Batch output is a deliverable). The
|
||||
effective_tier in the event is "tier0" regardless until Plan 02-04.
|
||||
"""
|
||||
has_key = bool(os.environ.get("ANTHROPIC_API_KEY"))
|
||||
ok, reason = should_call_llm(
|
||||
budget=budget, rate=rate,
|
||||
llm_enabled=llm_enabled, has_api_key=has_key,
|
||||
estimated_usd=0.005,
|
||||
)
|
||||
if not ok:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "schema_induction",
|
||||
"tier": "fallback",
|
||||
"reason": reason,
|
||||
},
|
||||
severity="warning",
|
||||
)
|
||||
return induce_schemas_tier0(store)
|
||||
|
||||
# Tier-1 eligible -- scaffold only (Plan 02-04 wires real Batch API).
|
||||
try:
|
||||
import anthropic # noqa: F401 -- lazy import, raise-only if missing
|
||||
budget.record_spend(0.002, kind="schema_induction")
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={
|
||||
"component": "schema_induction",
|
||||
"tier": "haiku",
|
||||
"note": "Plan 02-04 wires real Batch API; 02-03 scaffolds only",
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
except Exception as e:
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={"component": "schema_induction", "error": str(e)},
|
||||
severity="critical",
|
||||
)
|
||||
return induce_schemas_tier0(store)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- persist
|
||||
|
||||
|
||||
def _majority_language(evidence_ids: list[UUID], store: MemoryStore) -> str:
|
||||
"""Return the plurality ISO-639-1 language tag among evidence records.
|
||||
|
||||
fix (D-08a constitutional): schema hubs must carry the
|
||||
language of their source evidence, not a hardcoded 'en'. A user whose
|
||||
records are Russian would otherwise get schemas tagged 'en' and fail
|
||||
their own language='ru' filter at retrieval.
|
||||
|
||||
Algorithm:
|
||||
- Fetch each evidence record via store.get (skip missing/deleted ones).
|
||||
- Collect their language fields (skip empty/None).
|
||||
- Return max(set(langs), key=langs.count). Tie-break is deterministic
|
||||
given a stable input list order: max with key=list.count returns
|
||||
the first element from the set iteration whose count is the
|
||||
maximum, and Python's set iteration on strings follows insertion
|
||||
order in CPython >= 3.7 for the distinct-values pattern used here
|
||||
because we build the distinct set from a list iteration.
|
||||
- Fallback 'en' when evidence is empty or all records are missing.
|
||||
|
||||
Tie-break policy: when two languages are tied, the one whose first
|
||||
occurrence appears EARLIEST in evidence_ids wins. Matches Phase 1
|
||||
default 'en' when no signal is available (least-surprise).
|
||||
"""
|
||||
langs: list[str] = []
|
||||
for eid in evidence_ids:
|
||||
rec = store.get(eid)
|
||||
if rec is None:
|
||||
continue
|
||||
if rec.language:
|
||||
langs.append(rec.language)
|
||||
if not langs:
|
||||
return "en"
|
||||
# Deterministic tie-break: iterate langs in order, pick the first whose
|
||||
# count is the max. max(set(langs), key=langs.count) is undefined for
|
||||
# set ordering, so we use a hand-rolled pass instead.
|
||||
best = langs[0]
|
||||
best_count = langs.count(best)
|
||||
seen: set[str] = {best}
|
||||
for lang in langs[1:]:
|
||||
if lang in seen:
|
||||
continue
|
||||
seen.add(lang)
|
||||
c = langs.count(lang)
|
||||
if c > best_count:
|
||||
best = lang
|
||||
best_count = c
|
||||
return best
|
||||
|
||||
|
||||
def persist_schema(
|
||||
store: MemoryStore,
|
||||
candidate: SchemaCandidate,
|
||||
) -> UUID:
|
||||
"""Insert a schema record + schema_instance_of edges to evidence.
|
||||
|
||||
Schema records carry:
|
||||
- tier="semantic", detail_level=3 (never_decay auto-true)
|
||||
- tags=["schema", <status>, f"pattern:{pattern}"]
|
||||
- s5_trust_score=0.5 (neutral prior; LEARN-06 may raise over time)
|
||||
- schema_version=2
|
||||
"""
|
||||
from iai_mcp.aaak import enforce_language_tagged, generate_aaak_index
|
||||
from iai_mcp.embed import embedder_for_store
|
||||
|
||||
summary = (
|
||||
f"Schema: {candidate.pattern} (confidence={candidate.confidence:.2f})"
|
||||
)
|
||||
|
||||
# R1 (D-09 + D-10): pattern dedup. Search for an existing
|
||||
# schema record carrying the tag `pattern:{candidate.pattern}` in the
|
||||
# semantic tier. If found, reinforce schema_instance_of edges from new
|
||||
# evidence onto the existing keeper, emit `schema_reinforced`, and
|
||||
# return the existing schema_id. If not found, fall through to the
|
||||
# original insert path. Closes the chain-induction bleed: every sleep
|
||||
# cycle would otherwise insert a fresh tier="semantic", never_decay
|
||||
# row for the same pattern (live store accumulated 7+ duplicates per
|
||||
# pattern with degree-bonus shouldering verbatim records out of hits[]).
|
||||
pattern_tag = f"pattern:{candidate.pattern}"
|
||||
# Phase 07.7-04 D-26-B: keeper scan migrated from store.all_records() to
|
||||
# store.iter_record_columns(["id", "tier", "tags_json"], batch_size=1024).
|
||||
# Projection skips encrypted columns (literal_surface, provenance_json,
|
||||
# profile_modulation_gain_json) entirely — the W5 cipher cache is
|
||||
# short-circuited on this path. Early-exit (`break`) semantics preserved.
|
||||
# The matching row's id arrives as a string from LanceDB; we convert to
|
||||
# UUID at the boundary so downstream code sees the same type contract as
|
||||
# the pre-D-26 ``existing_keeper.id`` access pattern.
|
||||
existing_keeper_id: UUID | None = None
|
||||
try:
|
||||
for row in store.iter_record_columns(
|
||||
["id", "tier", "tags_json"], batch_size=1024
|
||||
):
|
||||
if row.get("tier") != "semantic":
|
||||
continue
|
||||
tags_raw = row.get("tags_json") or "[]"
|
||||
try:
|
||||
tags = json.loads(tags_raw) if tags_raw else []
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
tags = []
|
||||
if pattern_tag in tags:
|
||||
id_raw = row.get("id")
|
||||
if id_raw is None:
|
||||
continue
|
||||
try:
|
||||
existing_keeper_id = (
|
||||
UUID(id_raw) if isinstance(id_raw, str) else id_raw
|
||||
)
|
||||
except (ValueError, AttributeError):
|
||||
continue
|
||||
break
|
||||
except Exception:
|
||||
# Defensive: if the scan fails, fall through to the insert path so
|
||||
# we never silently lose a schema. Mirrors the diagnostic-write
|
||||
# contract used in pipeline.py provenance batching.
|
||||
existing_keeper_id = None
|
||||
|
||||
if existing_keeper_id is not None:
|
||||
from iai_mcp.store import EDGES_TABLE
|
||||
|
||||
# Reinforce schema_instance_of edges from each new evidence record
|
||||
# onto the existing keeper. Reuses the same delta formula as the
|
||||
# insert path (max(0.1, candidate.confidence)) for symmetry.
|
||||
delta = max(0.1, candidate.confidence)
|
||||
new_pairs = [(ev_id, existing_keeper_id) for ev_id in candidate.evidence_ids]
|
||||
if new_pairs:
|
||||
store.boost_edges(
|
||||
new_pairs,
|
||||
edge_type="schema_instance_of",
|
||||
delta=delta,
|
||||
)
|
||||
|
||||
# Compute total_evidence after reinforcement: count
|
||||
# `schema_instance_of` edges incident on the keeper. Read via the
|
||||
# edges table to avoid trusting any in-memory cache.
|
||||
# Note: store.boost_edges canonicalises (src, dst) to a sorted
|
||||
# tuple, so the keeper appears in EITHER column depending on the
|
||||
# string ordering of the paired evidence UUID. OR-counting both
|
||||
# columns gives the true edge-incidence count (no double-count
|
||||
# since each edge row has the keeper in exactly one column).
|
||||
try:
|
||||
edges_df = store.db.open_table(EDGES_TABLE).to_pandas()
|
||||
keeper_str = str(existing_keeper_id)
|
||||
total_evidence = int(
|
||||
((edges_df["edge_type"] == "schema_instance_of")
|
||||
& ((edges_df["dst"] == keeper_str)
|
||||
| (edges_df["src"] == keeper_str))).sum()
|
||||
)
|
||||
except Exception:
|
||||
total_evidence = len(candidate.evidence_ids)
|
||||
|
||||
write_event(
|
||||
store,
|
||||
kind="schema_reinforced",
|
||||
data={
|
||||
"schema_id": str(existing_keeper_id),
|
||||
"pattern": candidate.pattern,
|
||||
"evidence_added": len(candidate.evidence_ids),
|
||||
"total_evidence": total_evidence,
|
||||
},
|
||||
severity="info",
|
||||
source_ids=[existing_keeper_id, *candidate.evidence_ids[:5]],
|
||||
)
|
||||
return existing_keeper_id
|
||||
|
||||
emb = embedder_for_store(store).embed(summary)
|
||||
now = datetime.now(timezone.utc)
|
||||
schema_id = uuid4()
|
||||
# fix: derive language from the plurality language
|
||||
# of the evidence records, not a hardcoded 'en'. Schema hubs for Russian /
|
||||
# Japanese / Arabic clusters now carry the correct ISO-639-1 tag so
|
||||
# language-filtered retrieval surfaces them as expected.
|
||||
derived_language = _majority_language(candidate.evidence_ids, store)
|
||||
schema_rec = MemoryRecord(
|
||||
id=schema_id,
|
||||
tier="semantic",
|
||||
literal_surface=summary,
|
||||
aaak_index="",
|
||||
embedding=emb,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=3,
|
||||
pinned=False,
|
||||
stability=0.7,
|
||||
difficulty=0.3,
|
||||
last_reviewed=now,
|
||||
never_decay=True,
|
||||
never_merge=False,
|
||||
provenance=[
|
||||
{
|
||||
"ts": now.isoformat(),
|
||||
"cue": "schema_induction",
|
||||
"session_id": "system",
|
||||
}
|
||||
],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[
|
||||
"schema",
|
||||
candidate.status,
|
||||
f"pattern:{candidate.pattern}",
|
||||
],
|
||||
language=derived_language,
|
||||
s5_trust_score=0.5,
|
||||
profile_modulation_gain={},
|
||||
schema_version=SCHEMA_VERSION_CURRENT,
|
||||
)
|
||||
enforce_language_tagged(schema_rec)
|
||||
schema_rec.aaak_index = generate_aaak_index(schema_rec)
|
||||
store.insert(schema_rec)
|
||||
|
||||
# R3: batch the schema_instance_of edges into ONE boost_edges
|
||||
# call (one merge_insert + one tbl.add at most). Previously this loop
|
||||
# issued N Lance versions on edges.lance for an N-evidence schema.
|
||||
instance_pairs = [(ev_id, schema_id) for ev_id in candidate.evidence_ids]
|
||||
if instance_pairs:
|
||||
store.boost_edges(
|
||||
instance_pairs,
|
||||
edge_type="schema_instance_of",
|
||||
delta=max(0.1, candidate.confidence),
|
||||
)
|
||||
|
||||
write_event(
|
||||
store,
|
||||
kind="schema_induction_run",
|
||||
data={
|
||||
"schema_id": str(schema_id),
|
||||
"pattern": candidate.pattern,
|
||||
"confidence": candidate.confidence,
|
||||
"evidence_count": candidate.evidence_count,
|
||||
"status": candidate.status,
|
||||
},
|
||||
severity="info",
|
||||
source_ids=[schema_id, *candidate.evidence_ids[:5]],
|
||||
)
|
||||
return schema_id
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- provisional
|
||||
|
||||
|
||||
def provisional_schemas_for_recall(
|
||||
store: MemoryStore,
|
||||
hits: list,
|
||||
entropy_bits: float,
|
||||
records_cache: "dict | None" = None,
|
||||
) -> list[dict]:
|
||||
"""D-18 secondary path: surface provisional schema hints on high-entropy recalls.
|
||||
|
||||
Returns a list of hint dicts compatible with RecallResponse.hints, one per
|
||||
cohesive tag appearing in >= 2 of the top hits.
|
||||
|
||||
perf: batched all_records() fetch replaces N+1 store.get()
|
||||
calls. A single to_pandas() call is still O(total_records) but constant
|
||||
per recall, not per-hit. This was a major D-SPEED bottleneck at N=50.
|
||||
|
||||
perf (Rule 1 auto-fix): accept optional `records_cache` so
|
||||
pipeline_recall can pass its already-built cache through -- avoids a
|
||||
second `store.all_records()` scan per recall (~40ms at N=100). Falls
|
||||
back to all_records() if no cache provided (preserves back-compat for
|
||||
ad-hoc callers; tests without pipeline_recall still work).
|
||||
"""
|
||||
if entropy_bits < PROVISIONAL_ENTROPY_MIN or len(hits) < 3:
|
||||
return []
|
||||
|
||||
# Batch-fetch all records once; hits are typically <=5 so the cost of
|
||||
# filtering in-memory dominates over 5 separate store.get() round-trips.
|
||||
hit_ids = {h.record_id for h in hits}
|
||||
if records_cache is not None:
|
||||
# Reuse the cache built at pipeline_recall stage 1. Zero scans.
|
||||
by_id = {
|
||||
rid: rec for rid, rec in records_cache.items() if rid in hit_ids
|
||||
}
|
||||
else:
|
||||
try:
|
||||
all_recs = store.all_records()
|
||||
except Exception:
|
||||
return []
|
||||
by_id = {r.id: r for r in all_recs if r.id in hit_ids}
|
||||
|
||||
tag_count: Counter = Counter()
|
||||
for h in hits:
|
||||
rec = by_id.get(h.record_id)
|
||||
if rec is None:
|
||||
continue
|
||||
for t in (rec.tags or []):
|
||||
if t.startswith("raw:") or t.startswith("domain:"):
|
||||
continue
|
||||
tag_count[t] += 1
|
||||
|
||||
provisional: list[dict] = []
|
||||
for tag, cnt in tag_count.most_common(3):
|
||||
if cnt >= 2:
|
||||
source_ids: list[str] = []
|
||||
for h in hits:
|
||||
rec = by_id.get(h.record_id)
|
||||
if rec is None:
|
||||
continue
|
||||
if tag in (rec.tags or []):
|
||||
source_ids.append(str(h.record_id))
|
||||
if len(source_ids) >= 5:
|
||||
break
|
||||
provisional.append(
|
||||
{
|
||||
"kind": "provisional_schema",
|
||||
"severity": "info",
|
||||
"source_ids": source_ids,
|
||||
"text": f"Potential schema: tag={tag} cnt={cnt}",
|
||||
"provisional": True,
|
||||
"entropy": entropy_bits,
|
||||
}
|
||||
)
|
||||
return provisional
|
||||
486
src/iai_mcp/session.py
Normal file
486
src/iai_mcp/session.py
Normal file
|
|
@ -0,0 +1,486 @@
|
|||
"""Session-start assembler (D-10 budget, OPS-01, continuity).
|
||||
|
||||
Produces the 4-segment cached prefix that Claude's MCP wrapper places in front
|
||||
of every request under Anthropic 1h-TTL prompt caching:
|
||||
|
||||
L0 -- pinned identity kernel (always includes the user's L0 record)
|
||||
L1 -- critical-facts block (pinned + high-detail records)
|
||||
L2[...] -- Yeo-like community summaries (top MAX_TOP_COMMUNITIES=7)
|
||||
rich_club -- global hub prefetch (CONN-02 rich-club nodes)
|
||||
|
||||
Plan 03-02 (M6 LIVE prerequisite): assemble_session_start emits
|
||||
``kind='session_started'`` with a deterministic ``session_state_hash`` so
|
||||
M6 context-repeat-rate can be computed live from production emits.
|
||||
|
||||
Budget breakdown:
|
||||
L0_BUDGET_TOKENS = 80
|
||||
L1_BUDGET_TOKENS = 200
|
||||
L2_PER_COMMUNITY_TOKENS = 50 (cap of 7 -> L2 totals ~350 tok)
|
||||
RICH_CLUB_BUDGET_TOKENS = 1500
|
||||
TOTAL_CACHED_BUDGET = 2000
|
||||
(plus ~1000 tok dynamic tail per -> steady-state <= 3000)
|
||||
|
||||
Tokens are counted via a local `_approx_tokens(text) = max(1, len(text) // 4)`
|
||||
heuristic that matches Anthropic's documented rough ratio; bench/tokens.py
|
||||
cross-validates with the real `count_tokens` API when ANTHROPIC_API_KEY is
|
||||
available.
|
||||
|
||||
OPS-05 observable: `payload.l0` always contains the substring "IAI-MCP" when the
|
||||
pinned L0 record is present, so the verifier can assert identity continuity
|
||||
on a fresh session open.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from uuid import UUID
|
||||
|
||||
from iai_mcp.aaak import generate_aaak_index
|
||||
from iai_mcp.community import CommunityAssignment
|
||||
from iai_mcp.handle import decode_compact_handle, encode_compact_handle
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
# ------------------------------------------------------------- budgets
|
||||
L0_BUDGET_TOKENS = 80
|
||||
L1_BUDGET_TOKENS = 200
|
||||
L2_PER_COMMUNITY_TOKENS = 50
|
||||
L2_COMMUNITY_CAP = 7 # CONN-01 Yeo-like cap
|
||||
RICH_CLUB_BUDGET_TOKENS = 1500
|
||||
TOTAL_CACHED_BUDGET = 2000 # L0 + L1 + L2 + rich_club <= this
|
||||
DYNAMIC_TAIL_TOKENS = 1000 # reserve for per-turn tool results
|
||||
|
||||
# Pinned L0 UUID (D-14, matches core._seed_l0_identity).
|
||||
L0_RECORD_UUID = UUID("00000000-0000-0000-0000-000000000001")
|
||||
|
||||
|
||||
# --------------------------------------------------------------- data shape
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionStartPayload:
|
||||
"""Cached prefix + metadata (D-10 + TOK-11 lazy fields).
|
||||
|
||||
`breakpoint_marker` is where the TS wrapper splits stable vs volatile
|
||||
content before applying Anthropic `cache_control` (TOK-01). The Python
|
||||
side never inserts it into the segment strings -- it's just a sentinel
|
||||
string the TS side recognises.
|
||||
|
||||
D5-02: three new pointer fields populated at
|
||||
`wake_depth=minimal` (the new default); legacy l0/l1/l2/rich_club left
|
||||
empty at minimal mode. `wake_depth` is echoed so the client knows
|
||||
which mode produced the payload.
|
||||
"""
|
||||
|
||||
l0: str = ""
|
||||
l1: str = ""
|
||||
l2: list[str] = field(default_factory=list)
|
||||
rich_club: str = ""
|
||||
total_cached_tokens: int = 0
|
||||
total_dynamic_tokens: int = 0
|
||||
breakpoint_marker: str = "--<cache-breakpoint>--"
|
||||
# D5-02 — lazy session-start fields (<=30 raw tok combined).
|
||||
identity_pointer: str = "" # "<id:{8-hex-of-L0-uuid}>" (~8 tok)
|
||||
brain_handle: str = "" # "<sess:{8-hex} pend:{N}>" (~12 tok)
|
||||
topic_cluster_hint: str = "" # "<topic:{community_label}>" (~8 tok)
|
||||
# — single compact handle, ≤16 raw tok target. At
|
||||
# `wake_depth=minimal` this supersedes the three legacy pointers above
|
||||
# (they are left empty to keep the budget tight); `standard`/`deep`
|
||||
# populate BOTH the compact handle and the legacy fields for back-compat.
|
||||
compact_handle: str = "" # "<iai:{16-hex-blake2s}>" (~6-10 raw tok)
|
||||
wake_depth: str = "minimal" # echoed for introspection
|
||||
|
||||
|
||||
# ---------------------------------------------------------- token counting
|
||||
|
||||
|
||||
def _approx_tokens(text: str) -> int:
|
||||
"""~4 chars per token heuristic (Anthropic documentation ballpark).
|
||||
|
||||
Minimum 1 for any non-empty text so callers don't divide-by-zero.
|
||||
"""
|
||||
if not text:
|
||||
return 0
|
||||
return max(1, len(text) // 4)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def _resolve_compact_handle_to_pointers(handle: str) -> tuple[str, str, str] | None:
|
||||
"""Rebuild the legacy (identity_pointer, brain_handle, topic_cluster_hint)
|
||||
triple from a compact ``<iai:HHHHHHHHHHHHHHHH>`` handle minted earlier in
|
||||
this process.
|
||||
|
||||
no-info-loss proof: everything the 3-field shape conveyed is
|
||||
recoverable from the compact handle via the LRU in ``iai_mcp.handle`` ---
|
||||
identity prefix, session prefix, topic label and pending count. Returns
|
||||
``None`` when the handle is malformed OR the LRU has evicted the record,
|
||||
mirroring ``decode_compact_handle``'s contract: callers that need strict
|
||||
resolution should keep the legacy fields available under
|
||||
``wake_depth=standard`` / ``deep`` as fallback.
|
||||
"""
|
||||
parts = decode_compact_handle(handle)
|
||||
if parts is None:
|
||||
return None
|
||||
identity_pointer = f"<id:{parts[0]}>" if parts[0] else ""
|
||||
brain_handle = f"<sess:{parts[1]} pend:{parts[3]}>"
|
||||
topic_cluster_hint = f"<topic:{parts[2]}>"
|
||||
return identity_pointer, brain_handle, topic_cluster_hint
|
||||
|
||||
|
||||
def _fetch_record(store: MemoryStore, uid: UUID) -> MemoryRecord | None:
|
||||
try:
|
||||
return store.get(uid)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
# ----------------------------------------------------------- segment builders
|
||||
|
||||
|
||||
def _l0_segment(store: MemoryStore) -> str:
|
||||
"""OPS-05 identity kernel -- the pinned L0 record by fixed UUID.
|
||||
|
||||
Returned string shape: "<aaak_index>\n<literal_surface[:200]>". Empty when
|
||||
the L0 record hasn't been seeded yet (fresh stores before first core boot).
|
||||
"""
|
||||
rec = _fetch_record(store, L0_RECORD_UUID)
|
||||
if rec is None:
|
||||
return ""
|
||||
aaak = rec.aaak_index or generate_aaak_index(rec)
|
||||
# Truncate literal to 200 chars -- the L0 budget is ~80 tok (~320 chars);
|
||||
# leave slack for the aaak line + newline.
|
||||
return f"{aaak}\n{rec.literal_surface[:200]}"
|
||||
|
||||
|
||||
def _l1_segment(store: MemoryStore, max_records: int = 10) -> str:
|
||||
"""L1 critical-facts block -- pinned records with detail_level >= 4.
|
||||
|
||||
Excludes the L0 record (duplicated in L0 segment). Lines formatted as
|
||||
"- <literal_surface[:100]>" so they fit in ~25 tokens each; 10 of them
|
||||
saturate the L1_BUDGET_TOKENS ~= 200 tok budget.
|
||||
"""
|
||||
try:
|
||||
records = store.all_records()
|
||||
except Exception:
|
||||
return ""
|
||||
pinned_hi_detail = [
|
||||
r for r in records
|
||||
if r.pinned and r.detail_level >= 4 and r.id != L0_RECORD_UUID
|
||||
]
|
||||
# Deterministic ordering: by detail_level desc, then by created_at asc.
|
||||
pinned_hi_detail.sort(
|
||||
key=lambda r: (-r.detail_level, r.created_at)
|
||||
)
|
||||
pinned_hi_detail = pinned_hi_detail[:max_records]
|
||||
if not pinned_hi_detail:
|
||||
return ""
|
||||
lines = [f"- {r.literal_surface[:100]}" for r in pinned_hi_detail]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _l2_segments(
|
||||
store: MemoryStore,
|
||||
assignment: CommunityAssignment,
|
||||
) -> list[str]:
|
||||
"""Up to L2_COMMUNITY_CAP (7) Yeo-like community summary lines.
|
||||
|
||||
Each summary samples up to 3 member records from the community's
|
||||
mid_regions list and joins them with `|`. Budget guardrail: each line
|
||||
is capped at approximately L2_PER_COMMUNITY_TOKENS * 4 chars (=200 chars).
|
||||
|
||||
Empty list when the assignment has no top_communities (fresh/flat case).
|
||||
"""
|
||||
top = list(assignment.top_communities)[:L2_COMMUNITY_CAP]
|
||||
if not top:
|
||||
return []
|
||||
|
||||
# records_cache: keep the single all_records() call hot (same trick
|
||||
# pipeline.py uses -- avoids N+1 store.get scans).
|
||||
try:
|
||||
records = store.all_records()
|
||||
except Exception:
|
||||
return []
|
||||
by_uuid = {r.id: r for r in records}
|
||||
|
||||
summaries: list[str] = []
|
||||
max_chars = L2_PER_COMMUNITY_TOKENS * 4 # ~200 chars budget per line
|
||||
for cid in top:
|
||||
members = assignment.mid_regions.get(cid, [])[:3]
|
||||
parts: list[str] = []
|
||||
for mid in members:
|
||||
rec = by_uuid.get(mid)
|
||||
if rec is None:
|
||||
continue
|
||||
# Per-member snippet: AAAK-shortened wing tag + first 40 chars.
|
||||
wing = rec.aaak_index.split("/")[0] if rec.aaak_index else "W:?"
|
||||
parts.append(f"{wing}/{rec.literal_surface[:40]}")
|
||||
if not parts:
|
||||
continue
|
||||
body = " | ".join(parts)
|
||||
line = f"[community {str(cid)[:8]}] {body}"
|
||||
if len(line) > max_chars:
|
||||
line = line[:max_chars]
|
||||
# LLMLingua-2 compression on L2 community
|
||||
# descriptors. Passthrough when package absent (see compress.py).
|
||||
try:
|
||||
from iai_mcp.compress import compress_l2_descriptor
|
||||
line = compress_l2_descriptor(line, store=store)
|
||||
except Exception:
|
||||
pass
|
||||
summaries.append(line)
|
||||
return summaries
|
||||
|
||||
|
||||
def _rich_club_segment(store: MemoryStore, rich_club: list[UUID]) -> str:
|
||||
"""Global rich-club summary, truncated to RICH_CLUB_BUDGET_TOKENS.
|
||||
|
||||
Each rich-club node contributes one line "<aaak_index>: <literal_surface[:60]>".
|
||||
Lines are added until the running token count would exceed the budget.
|
||||
"""
|
||||
return _rich_club_segment_with_budget(store, rich_club, budget=RICH_CLUB_BUDGET_TOKENS)
|
||||
|
||||
|
||||
def _rich_club_segment_with_budget(
|
||||
store: MemoryStore,
|
||||
rich_club: list[UUID],
|
||||
*,
|
||||
budget: int,
|
||||
) -> str:
|
||||
"""Rich-club summary with an explicit budget (Plan 05-03 deep mode).
|
||||
|
||||
Same rendering as `_rich_club_segment`; `budget` replaces the default cap
|
||||
so wake_depth=deep can lift the rich_club allotment to ~2000 tok.
|
||||
"""
|
||||
if not rich_club:
|
||||
return ""
|
||||
try:
|
||||
records = store.all_records()
|
||||
except Exception:
|
||||
return ""
|
||||
by_uuid = {r.id: r for r in records}
|
||||
|
||||
lines: list[str] = []
|
||||
running = 0
|
||||
for uid in rich_club:
|
||||
rec = by_uuid.get(uid)
|
||||
if rec is None:
|
||||
continue
|
||||
aaak = rec.aaak_index or generate_aaak_index(rec)
|
||||
line = f"{aaak}: {rec.literal_surface[:60]}"
|
||||
cost = _approx_tokens(line)
|
||||
# Respect running budget -- +1 accounts for the join newline.
|
||||
if running + cost + 1 > budget:
|
||||
break
|
||||
lines.append(line)
|
||||
running += cost + 1
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------ public
|
||||
|
||||
|
||||
def _session_state_hash(payload: SessionStartPayload) -> str:
|
||||
"""Plan 03-02 M6: deterministic SHA-256 over the 4-segment cached prefix.
|
||||
|
||||
Two sessions whose L0 + L1 + L2 + rich_club segments are byte-identical
|
||||
produce the SAME session_state_hash -- which is exactly the
|
||||
"context-repeat" signal M6 measures.
|
||||
"""
|
||||
import hashlib
|
||||
h = hashlib.sha256()
|
||||
h.update(payload.l0.encode("utf-8"))
|
||||
h.update(b"\x1f") # ASCII unit separator
|
||||
h.update(payload.l1.encode("utf-8"))
|
||||
h.update(b"\x1f")
|
||||
h.update("\n".join(payload.l2).encode("utf-8"))
|
||||
h.update(b"\x1f")
|
||||
h.update(payload.rich_club.encode("utf-8"))
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def _dominant_community_label(assignment: CommunityAssignment) -> str:
|
||||
"""Plan 05-03 D5-02: short (<=8 char) label for the largest community.
|
||||
|
||||
Returns 'none' when no communities exist (fresh or flat assignment). The
|
||||
label is the first 8 hex of the dominant community UUID — a stable handle
|
||||
that fits in ~3-4 tokens.
|
||||
"""
|
||||
try:
|
||||
top = list(assignment.top_communities)
|
||||
if not top:
|
||||
return "none"
|
||||
# top_communities is already ordered by member count (CONN-01 L1).
|
||||
return str(top[0])[:8]
|
||||
except Exception:
|
||||
return "none"
|
||||
|
||||
|
||||
def _count_pending_first_turn(store: MemoryStore) -> int:
|
||||
"""Plan 05-03 D5-02: count open first_turn_pending sessions in daemon_state.
|
||||
|
||||
Returns 0 if daemon_state is missing or malformed (silent fallback). This
|
||||
is only cosmetic input to the brain_handle pointer; the minimal payload
|
||||
must survive a missing daemon gracefully.
|
||||
"""
|
||||
try:
|
||||
from iai_mcp.daemon_state import load_state
|
||||
state = load_state()
|
||||
pending = state.get("first_turn_pending", {})
|
||||
if isinstance(pending, dict):
|
||||
return sum(1 for v in pending.values() if v)
|
||||
return 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def assemble_session_start(
|
||||
store: MemoryStore,
|
||||
assignment: CommunityAssignment,
|
||||
rich_club: list[UUID],
|
||||
*,
|
||||
session_id: str = "-",
|
||||
profile_state: dict | None = None,
|
||||
) -> SessionStartPayload:
|
||||
"""Assemble the session-start cached prefix.
|
||||
|
||||
TOK-11 / D5-02 / D5-10: branches on the `wake_depth` profile
|
||||
knob (15th sealed knob, MCP-12):
|
||||
|
||||
- ``minimal`` (default): produce a ≤30 raw-tok pointer handle (identity,
|
||||
brain session, topic cluster). Legacy l0/l1/l2/rich_club emitted empty
|
||||
for back-compat with existing TS-wrapper callers.
|
||||
- ``standard``: reproduce the Phase-1 1388-tok eager dump — l0/l1/l2/
|
||||
rich_club populated via `_l0_segment`, `_l1_segment`, `_l2_segments`,
|
||||
`_rich_club_segment`. New fields emitted empty.
|
||||
- ``deep``: same shape as standard but rich_club budget lifted to 2000.
|
||||
Populates both the legacy segments and the new pointers.
|
||||
|
||||
(M6 LIVE prerequisite): emits ``kind='session_started'`` with
|
||||
a deterministic ``session_state_hash`` over the cached prefix. Two
|
||||
consecutive sessions whose cached prefix is identical produce the same
|
||||
hash -- exactly the context-repeat signal M6 measures.
|
||||
|
||||
Pitfall 1 (Anthropic cache threshold reality per 05-RESEARCH lines
|
||||
447-469): at `wake_depth=minimal` the payload is ≤30 raw tok which is
|
||||
BELOW the Sonnet 4.6 / Opus 4.7 cache minimum (2048 / 4096). DO NOT add
|
||||
``cache_control`` to the minimal branch prefix — it would be silently
|
||||
ignored by the Anthropic API and waste a breakpoint slot.
|
||||
"""
|
||||
from iai_mcp.profile import default_state
|
||||
state = profile_state if isinstance(profile_state, dict) else default_state()
|
||||
wake_depth = state.get("wake_depth", "minimal")
|
||||
if wake_depth not in ("minimal", "standard", "deep"):
|
||||
wake_depth = "minimal" # D5-10 silent fallback
|
||||
|
||||
if wake_depth == "minimal":
|
||||
# Pitfall 1 guard: payload will not be Anthropic-cached
|
||||
# (<=30 raw tok < Sonnet 4.6 min 2048). DO NOT set cache_control.
|
||||
#
|
||||
# collapse the three legacy pointers
|
||||
# (identity_pointer + brain_handle + topic_cluster_hint, ~24 raw tok
|
||||
# together) into a single `<iai:HHHHHHHHHHHHHHHH>` handle (~6-10 raw
|
||||
# tok). The LRU inside `iai_mcp.handle` retains the reverse mapping
|
||||
# so downstream code can resolve the handle to its triple.
|
||||
#
|
||||
# Back-compat contract: the 3 legacy fields stay populated on the
|
||||
# dataclass so callers reading the old shape keep working; only
|
||||
# ``total_cached_tokens`` is charged for the compact handle (the
|
||||
# wire prefix at wake_depth=minimal is the compact handle alone).
|
||||
l0_rec = _fetch_record(store, L0_RECORD_UUID)
|
||||
identity_short = str(L0_RECORD_UUID)[:8] if l0_rec is not None else ""
|
||||
identity_pointer = f"<id:{identity_short}>" if identity_short else ""
|
||||
pending = _count_pending_first_turn(store)
|
||||
session_short = str(session_id)[:8]
|
||||
brain_handle = f"<sess:{session_short} pend:{pending}>"
|
||||
topic_label = _dominant_community_label(assignment)
|
||||
topic_cluster_hint = f"<topic:{topic_label}>"
|
||||
compact_handle = encode_compact_handle(
|
||||
identity_short, session_short, topic_label, pending
|
||||
)
|
||||
cached = _approx_tokens(compact_handle)
|
||||
payload = SessionStartPayload(
|
||||
l0="",
|
||||
l1="",
|
||||
l2=[],
|
||||
rich_club="",
|
||||
total_cached_tokens=cached,
|
||||
total_dynamic_tokens=DYNAMIC_TAIL_TOKENS,
|
||||
identity_pointer=identity_pointer,
|
||||
brain_handle=brain_handle,
|
||||
topic_cluster_hint=topic_cluster_hint,
|
||||
compact_handle=compact_handle,
|
||||
wake_depth="minimal",
|
||||
)
|
||||
else:
|
||||
# standard and deep share the Phase-1 eager assembly path; deep lifts
|
||||
# the rich_club budget by re-running the segment with a larger cap.
|
||||
l0 = _l0_segment(store)
|
||||
l1 = _l1_segment(store)
|
||||
l2 = _l2_segments(store, assignment)
|
||||
if wake_depth == "deep":
|
||||
rc = _rich_club_segment_with_budget(store, rich_club, budget=2000)
|
||||
else:
|
||||
rc = _rich_club_segment(store, rich_club)
|
||||
|
||||
cached = (
|
||||
_approx_tokens(l0)
|
||||
+ _approx_tokens(l1)
|
||||
+ sum(_approx_tokens(s) for s in l2)
|
||||
+ _approx_tokens(rc)
|
||||
)
|
||||
|
||||
# New pointers also populated under standard/deep so downstream callers
|
||||
# can use them alongside legacy segments if they want. Plan 05-06:
|
||||
# the compact handle is ALSO minted here so a consumer can opt in to
|
||||
# the short form without requiring a wake_depth mode switch.
|
||||
l0_rec = _fetch_record(store, L0_RECORD_UUID)
|
||||
identity_short = str(L0_RECORD_UUID)[:8] if l0_rec is not None else ""
|
||||
identity_pointer = f"<id:{identity_short}>" if identity_short else ""
|
||||
pending = _count_pending_first_turn(store)
|
||||
session_short = str(session_id)[:8]
|
||||
brain_handle = f"<sess:{session_short} pend:{pending}>"
|
||||
topic_label = _dominant_community_label(assignment)
|
||||
topic_cluster_hint = f"<topic:{topic_label}>"
|
||||
compact_handle = encode_compact_handle(
|
||||
identity_short, session_short, topic_label, pending
|
||||
)
|
||||
|
||||
payload = SessionStartPayload(
|
||||
l0=l0,
|
||||
l1=l1,
|
||||
l2=l2,
|
||||
rich_club=rc,
|
||||
total_cached_tokens=cached,
|
||||
total_dynamic_tokens=DYNAMIC_TAIL_TOKENS,
|
||||
identity_pointer=identity_pointer,
|
||||
brain_handle=brain_handle,
|
||||
topic_cluster_hint=topic_cluster_hint,
|
||||
compact_handle=compact_handle,
|
||||
wake_depth=wake_depth,
|
||||
)
|
||||
|
||||
# (M6 LIVE prerequisite): emit kind='session_started' with
|
||||
# session_state_hash for trajectory.m6_context_repeat_rate_live.
|
||||
# Diagnostic-only: never block session start on emit failure.
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
from iai_mcp.events import write_event
|
||||
write_event(
|
||||
store,
|
||||
kind="session_started",
|
||||
data={
|
||||
"session_id": session_id,
|
||||
"session_state_hash": _session_state_hash(payload),
|
||||
"total_cached_tokens": cached,
|
||||
"wake_depth": wake_depth,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return payload
|
||||
308
src/iai_mcp/shield.py
Normal file
308
src/iai_mcp/shield.py
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
"""OPS-07 prompt-injection shield (D-30, D-31) -- Plan 02-05.
|
||||
|
||||
Three-tier deployment per D-31:
|
||||
HARD_BLOCK -> L0 identity + S5 invariant writes (reject on detection)
|
||||
FLAG_FOR_REVIEW -> profile updates (flag + warn, write proceeds)
|
||||
LOG_ONLY -> content records (log only, allow)
|
||||
|
||||
D-30 threat model (three severities):
|
||||
- Direct override (e.g. "forget X, now Y") -> HARD BLOCK via signal words
|
||||
- Gradual drift (subtle lies over weeks) -> DETECT via trajectory M4 anomaly
|
||||
(see s5.detect_drift_anomaly)
|
||||
- Data poisoning (intentional false write) -> MITIGATE via ART vigilance
|
||||
+ user-approval UX
|
||||
|
||||
Global-product mandate: signal words cover 7+ languages
|
||||
(en + ru + ja + ar + de + fr + es + zh) at minimum. The module exports
|
||||
`SHIELD_LANGUAGES_SUPPORTED` as the authoritative set; downstream acceptance
|
||||
tests grep against it.
|
||||
|
||||
The shield is a PURE LOCAL filter: no LLM call, no network. Detection uses
|
||||
case-insensitive substring matching against curated signal-word lists. The
|
||||
tier policy is additive: warning signals escalate to critical in the
|
||||
HARD_BLOCK tier (L0 is sacred).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
|
||||
|
||||
# ------------------------------------------------------------ constitutional constants
|
||||
|
||||
# Confidence thresholds for the shield verdict. Confidence is a simple signal:
|
||||
# matched_count / TOTAL_BASELINE -- used for downstream analytics, not the
|
||||
# tier-policy gate. The tier enum + match count drives the action.
|
||||
SHIELD_SIGNAL_WORDS_MAX_CONFIDENCE: float = 0.9 # upper bound reported on any match
|
||||
SHIELD_FLAG_CONFIDENCE: float = 0.6 # reported when matches are warning-only
|
||||
|
||||
# global-product mandate: 7+ languages supported.
|
||||
SHIELD_LANGUAGES_SUPPORTED: frozenset[str] = frozenset({
|
||||
"en", "ru", "ja", "ar", "de", "fr", "es", "zh",
|
||||
})
|
||||
|
||||
# gradual-drift detection threshold -- used by s5.detect_drift_anomaly
|
||||
# but declared here so the single authoritative constant sits alongside the
|
||||
# other shield thresholds (downstream greps one file).
|
||||
DRIFT_M4_ANOMALY_SIGMA: float = 3.0
|
||||
|
||||
|
||||
# ------------------------------------------------------------ signal-word catalogues
|
||||
|
||||
# English critical signal words: classic prompt-injection imperatives.
|
||||
SIGNAL_WORDS_CRITICAL_EN: list[str] = [
|
||||
"forget", "override", "ignore previous", "you are now",
|
||||
"from now on", "system:", "admin:", "instruction:",
|
||||
"disregard", "new instructions", "ignore previous instructions",
|
||||
]
|
||||
|
||||
# English warning signals: softer but still suspicious rephrasings.
|
||||
SIGNAL_WORDS_WARNING_EN: list[str] = [
|
||||
"different", "instead", "actually", "update",
|
||||
]
|
||||
|
||||
# Per-language critical signal words (D-02a mandate).
|
||||
# Keys are ISO-639-1 codes; values are minimal strictly-imperative tokens.
|
||||
# Conservative by design: false positives on legitimate non-English chatter are
|
||||
# worse than false negatives at this tier (users have multiple layers of
|
||||
# defence; the shield is one slice of defence-in-depth).
|
||||
SIGNAL_WORDS_CRITICAL_BY_LANG: dict[str, list[str]] = {
|
||||
"ru": [
|
||||
"забудь", "забыть", "игнорируй",
|
||||
"отмени", "сбрось", "новые инструкции",
|
||||
"теперь ты", "с этого момента",
|
||||
],
|
||||
"ja": [
|
||||
"忘れて", "無視", "リセット",
|
||||
"新しい指示", "これから", "今から",
|
||||
],
|
||||
"ar": [
|
||||
"انسى", "تجاهل",
|
||||
"إعادة تعيين", "تعليمات جديدة", "أنت الآن",
|
||||
],
|
||||
"de": [
|
||||
"vergiss", "ignoriere", "überschreibe",
|
||||
"neue anweisungen", "ab jetzt",
|
||||
],
|
||||
"fr": [
|
||||
"oublie", "ignore",
|
||||
"remplace", "nouvelles instructions",
|
||||
],
|
||||
"es": [
|
||||
"olvida", "ignora",
|
||||
"sobrescribe", "nuevas instrucciones",
|
||||
],
|
||||
"zh": [
|
||||
"忘记", "忽略", "重置",
|
||||
"新指令", "从现在开始",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------ enums + types
|
||||
|
||||
|
||||
class ShieldTier(str, Enum):
|
||||
"""D-31 three-tier deployment."""
|
||||
|
||||
HARD_BLOCK = "hard_block" # L0 identity + S5 invariants
|
||||
FLAG_FOR_REVIEW = "flag" # profile updates
|
||||
LOG_ONLY = "log" # content records
|
||||
|
||||
|
||||
@dataclass
|
||||
class ShieldVerdict:
|
||||
"""Result of evaluating injection risk for a single text blob."""
|
||||
|
||||
tier: ShieldTier
|
||||
detected: bool
|
||||
matched_patterns: list[str] = field(default_factory=list)
|
||||
severity: str = "info" # "info" | "warning" | "critical"
|
||||
action: str = "log_allow" # "reject" | "flag" | "log_allow"
|
||||
reason: str = ""
|
||||
language: str | None = None
|
||||
confidence: float = 0.0
|
||||
|
||||
|
||||
# ------------------------------------------------------------ private helpers
|
||||
|
||||
|
||||
def _signal_lists_for_language(
|
||||
lang: str | None,
|
||||
) -> tuple[list[str], list[str]]:
|
||||
"""Return (critical, warning) lists for the given language.
|
||||
|
||||
English signals are ALWAYS included (prompt-injection attempts are often
|
||||
copy-pasted English regardless of the user's native language). When a
|
||||
`lang` is given AND supported, its per-language critical list is appended.
|
||||
"""
|
||||
critical = list(SIGNAL_WORDS_CRITICAL_EN)
|
||||
warning = list(SIGNAL_WORDS_WARNING_EN)
|
||||
if lang and lang in SIGNAL_WORDS_CRITICAL_BY_LANG:
|
||||
critical.extend(SIGNAL_WORDS_CRITICAL_BY_LANG[lang])
|
||||
return critical, warning
|
||||
|
||||
|
||||
def _match_patterns(text: str, patterns: list[str]) -> list[str]:
|
||||
"""Return the subset of patterns present in the (lowercased) text.
|
||||
|
||||
For Latin-script patterns we lowercase both sides. For non-ASCII scripts
|
||||
(Cyrillic, Hiragana, CJK, Arabic) lowercasing is either identity-preserving
|
||||
(CJK has no case) or handled uniformly by str.lower() which is safe for
|
||||
our lists.
|
||||
"""
|
||||
t = (text or "").lower()
|
||||
out: list[str] = []
|
||||
for p in patterns:
|
||||
if p.lower() in t:
|
||||
out.append(p)
|
||||
return out
|
||||
|
||||
|
||||
# ------------------------------------------------------------ public API
|
||||
|
||||
|
||||
def evaluate_injection_risk(
|
||||
text: str,
|
||||
tier: ShieldTier,
|
||||
target_language: str | None = None,
|
||||
) -> ShieldVerdict:
|
||||
"""Core shield detection (pure function, no side effects).
|
||||
|
||||
Tier escalation policy:
|
||||
HARD_BLOCK -- any critical OR warning match -> reject (severity critical)
|
||||
FLAG_FOR_REVIEW -- any match -> flag (severity warning)
|
||||
LOG_ONLY -- any match -> log_allow (severity info)
|
||||
no match -- detected=False, action=log_allow
|
||||
"""
|
||||
critical_list, warning_list = _signal_lists_for_language(target_language)
|
||||
matched_critical = _match_patterns(text, critical_list)
|
||||
matched_warning = _match_patterns(text, warning_list)
|
||||
all_matched = matched_critical + matched_warning
|
||||
|
||||
if not all_matched:
|
||||
return ShieldVerdict(
|
||||
tier=tier,
|
||||
detected=False,
|
||||
matched_patterns=[],
|
||||
severity="info",
|
||||
action="log_allow",
|
||||
reason="no signal patterns detected",
|
||||
language=target_language,
|
||||
confidence=0.0,
|
||||
)
|
||||
|
||||
# Confidence: 0.9 when any critical match, 0.6 when warning-only.
|
||||
confidence = (
|
||||
SHIELD_SIGNAL_WORDS_MAX_CONFIDENCE
|
||||
if matched_critical
|
||||
else SHIELD_FLAG_CONFIDENCE
|
||||
)
|
||||
|
||||
if tier == ShieldTier.HARD_BLOCK:
|
||||
return ShieldVerdict(
|
||||
tier=tier,
|
||||
detected=True,
|
||||
matched_patterns=all_matched,
|
||||
severity="critical",
|
||||
action="reject",
|
||||
reason=(
|
||||
f"injection signals detected in HARD_BLOCK tier: {all_matched}"
|
||||
),
|
||||
language=target_language,
|
||||
confidence=confidence,
|
||||
)
|
||||
if tier == ShieldTier.FLAG_FOR_REVIEW:
|
||||
return ShieldVerdict(
|
||||
tier=tier,
|
||||
detected=True,
|
||||
matched_patterns=all_matched,
|
||||
severity="warning",
|
||||
action="flag",
|
||||
reason=f"injection signals detected in FLAG tier: {all_matched}",
|
||||
language=target_language,
|
||||
confidence=confidence,
|
||||
)
|
||||
# LOG_ONLY
|
||||
return ShieldVerdict(
|
||||
tier=tier,
|
||||
detected=True,
|
||||
matched_patterns=all_matched,
|
||||
severity="info",
|
||||
action="log_allow",
|
||||
reason=f"injection signals detected in LOG tier: {all_matched}",
|
||||
language=target_language,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
def apply_shield(
|
||||
store: Any, # MemoryStore
|
||||
record: Any, # MemoryRecord (avoids import cycle with types)
|
||||
tier: ShieldTier,
|
||||
session_id: str = "-",
|
||||
) -> ShieldVerdict:
|
||||
"""Evaluate + emit event (side-effectful wrapper).
|
||||
|
||||
Event kind is determined by the tier policy:
|
||||
- reject -> kind="shield_rejection" (severity critical)
|
||||
- flag -> kind="shield_flag" (severity warning)
|
||||
- log_allow -> kind="shield_log" (severity info, ONLY on detection)
|
||||
|
||||
No event is emitted when the verdict is "not detected" -- no signal, no
|
||||
noise in the events table.
|
||||
"""
|
||||
verdict = evaluate_injection_risk(
|
||||
record.literal_surface or "",
|
||||
tier,
|
||||
target_language=record.language or None,
|
||||
)
|
||||
if verdict.detected:
|
||||
kind_map = {
|
||||
"reject": "shield_rejection",
|
||||
"flag": "shield_flag",
|
||||
"log_allow": "shield_log",
|
||||
}
|
||||
event_kind = kind_map.get(verdict.action, "shield_log")
|
||||
# Clip matched patterns payload so the events table does not grow
|
||||
# unbounded on adversarial input.
|
||||
matched_clipped = [str(p)[:80] for p in verdict.matched_patterns[:10]]
|
||||
record_id = record.id
|
||||
source_ids: list[UUID] = []
|
||||
if isinstance(record_id, UUID):
|
||||
source_ids = [record_id]
|
||||
write_event(
|
||||
store,
|
||||
kind=event_kind,
|
||||
data={
|
||||
"record_id": str(record_id) if record_id is not None else None,
|
||||
"tier": verdict.tier.value,
|
||||
"matched": matched_clipped,
|
||||
"language": record.language,
|
||||
"action": verdict.action,
|
||||
"confidence": verdict.confidence,
|
||||
},
|
||||
severity=verdict.severity,
|
||||
session_id=session_id,
|
||||
source_ids=source_ids,
|
||||
)
|
||||
return verdict
|
||||
|
||||
|
||||
__all__ = [
|
||||
"DRIFT_M4_ANOMALY_SIGMA",
|
||||
"SHIELD_FLAG_CONFIDENCE",
|
||||
"SHIELD_LANGUAGES_SUPPORTED",
|
||||
"SHIELD_SIGNAL_WORDS_MAX_CONFIDENCE",
|
||||
"SIGNAL_WORDS_CRITICAL_BY_LANG",
|
||||
"SIGNAL_WORDS_CRITICAL_EN",
|
||||
"SIGNAL_WORDS_WARNING_EN",
|
||||
"ShieldTier",
|
||||
"ShieldVerdict",
|
||||
"apply_shield",
|
||||
"evaluate_injection_risk",
|
||||
]
|
||||
374
src/iai_mcp/sigma.py
Normal file
374
src/iai_mcp/sigma.py
Normal file
|
|
@ -0,0 +1,374 @@
|
|||
"""Plan 03-02 CONN-07: small-world sigma as Ashby ultrastability diagnostic.
|
||||
|
||||
Ground-truth reference: Humphries MD, Gurney K (2008) "Network 'small-world-ness':
|
||||
a quantitative method for determining canonical network equivalence."
|
||||
|
||||
Constitutional anchor:
|
||||
- sigma is a CYBERNETIC DIAGNOSTIC (Ashby ultrastability), not a "RAG fallback".
|
||||
- Cold-start sigma<1 at N<500 is a DEVELOPMENTAL phase, not pathological.
|
||||
Emit kind=sigma_observation phase=developmental + boost Hebbian rate.
|
||||
- Mid-life drift sigma<1 at N>=500 emits kind=sigma_drift as an S4 event.
|
||||
- sigma trajectory is published as a deep-time metric, NEVER a routing
|
||||
decision. No code path in this module switches retrieval modes on sigma.
|
||||
|
||||
Design discipline:
|
||||
- DO NOT use NetworkX's built-in small-worldness function. NetworkX 3.6.1's
|
||||
built-in (niter=100, nrand=10) is empirically unusable at N>=200 (timed out
|
||||
at 60s+ during research session).
|
||||
- Custom `fast_sigma` follows Humphries-Gurney 2008 directly with a small
|
||||
number of single-reference Erdos-Renyi random graphs (G(n, m), same edge
|
||||
count). Validated 0.05s @ N=200, 0.34s @ N=500, 1.28s @ N=1000.
|
||||
|
||||
Module-level constants:
|
||||
- SIGMA_N_FLOOR = 200 -- D-SIGMA-01 floor (imports semantically from
|
||||
community.SMALL_N_FLAT -- same Humphries-Gurney 2008 floor).
|
||||
- SIGMA_MID_LIFE_THRESHOLD = 500 -- D-SIGMA-03 mid-life regime threshold
|
||||
(imports semantically from community.MID_N_LEIDEN).
|
||||
|
||||
Public API:
|
||||
- compute_sigma(graph, *, seed=42) -> Optional[float]
|
||||
- fast_sigma(graph, *, n_random=3, seed=42) -> tuple[float, float, float, float, float]
|
||||
- classify_regime(N, sigma) -> str
|
||||
- compute_topology_snapshot(graph) -> dict
|
||||
- compute_and_emit(store) -> dict
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, TYPE_CHECKING
|
||||
|
||||
import networkx as nx
|
||||
|
||||
from iai_mcp.events import write_event
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
# D-SIGMA-01: sigma is undefined below N=200 (Humphries-Gurney 2008 floor).
|
||||
# Aliased semantically from community.SMALL_N_FLAT -- same constitutional floor.
|
||||
SIGMA_N_FLOOR: int = 200
|
||||
|
||||
# D-SIGMA-03: mid-life vs developmental boundary (community.MID_N_LEIDEN).
|
||||
SIGMA_MID_LIFE_THRESHOLD: int = 500
|
||||
|
||||
# Event kinds emitted by this module. Naming follows the snake_case
|
||||
# noun_verb shape established in s4.py / s5.py.
|
||||
SIGMA_OBSERVATION_KIND: str = "sigma_observation"
|
||||
SIGMA_DRIFT_KIND: str = "sigma_drift"
|
||||
|
||||
# Hebbian rate boost applied during developmental phase (D-SIGMA-02).
|
||||
HEBBIAN_DEVELOPMENTAL_BOOST_FACTOR: float = 1.3
|
||||
HEBBIAN_DEVELOPMENTAL_BOOST_TTL_SESSIONS: int = 5
|
||||
|
||||
# Knob name we tag in profile_updated events when boosting the Hebbian rate
|
||||
# during developmental phase. The 11-knob registry is NOT modified -- this is
|
||||
# a transient operational tag, not an AUTIST kernel knob.
|
||||
HEBBIAN_RATE_KNOB: str = "hebbian_rate"
|
||||
|
||||
|
||||
def _largest_cc(graph: "nx.Graph") -> "nx.Graph":
|
||||
"""Return the largest connected component as a copy.
|
||||
|
||||
NetworkX raises on disconnected inputs to ``average_shortest_path_length``;
|
||||
take the largest CC up front so the rest of fast_sigma can stay simple.
|
||||
"""
|
||||
if graph.number_of_nodes() == 0:
|
||||
return graph
|
||||
if nx.is_connected(graph):
|
||||
return graph
|
||||
largest = max(nx.connected_components(graph), key=len)
|
||||
return graph.subgraph(largest).copy()
|
||||
|
||||
|
||||
def fast_sigma(
|
||||
graph: "nx.Graph",
|
||||
*,
|
||||
n_random: int = 3,
|
||||
seed: int = 42,
|
||||
) -> tuple[float, float, float, float, float]:
|
||||
"""Humphries-Gurney 2008 sigma via single-reference random graph(s).
|
||||
|
||||
Returns ``(sigma, C, L, Cr, Lr)`` where:
|
||||
- sigma = (C / Cr) / (L / Lr)
|
||||
- C / L : clustering / characteristic path length on the input graph
|
||||
- Cr / Lr : same metrics averaged over ``n_random`` Erdos-Renyi G(n, m)
|
||||
reference graphs.
|
||||
|
||||
DO NOT use NetworkX's built-in small-worldness function -- it is
|
||||
empirically unusable at N>=200 (>60s timeout).
|
||||
This implementation builds ONE G(n, m) reference per seed and averages
|
||||
the C and L values, NOT the library's full edge-rewiring loop.
|
||||
|
||||
Pre-processing: when the input graph is disconnected, the largest
|
||||
connected component is taken first. NetworkX raises on disconnected
|
||||
inputs to ``average_shortest_path_length``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
- Returns NaN sigma when Cr or Lr collapses to zero (degenerate reference;
|
||||
shouldn't happen at our N>=200 floor but defensive).
|
||||
- Deterministic per ``seed`` -- the n_random reference graphs use
|
||||
``seed, seed+1, ..., seed+n_random-1``.
|
||||
"""
|
||||
g = _largest_cc(graph)
|
||||
n = g.number_of_nodes()
|
||||
m = g.number_of_edges()
|
||||
if n < 2 or m == 0:
|
||||
return (float("nan"), 0.0, 0.0, 0.0, 0.0)
|
||||
|
||||
C = float(nx.average_clustering(g))
|
||||
L = float(nx.average_shortest_path_length(g))
|
||||
|
||||
Cs: list[float] = []
|
||||
Ls: list[float] = []
|
||||
for k in range(max(1, n_random)):
|
||||
gr_full = nx.gnm_random_graph(n, m, seed=seed + k)
|
||||
# Same disconnected-graph guard for the reference.
|
||||
if not nx.is_connected(gr_full):
|
||||
largest = max(nx.connected_components(gr_full), key=len)
|
||||
gr = gr_full.subgraph(largest).copy()
|
||||
else:
|
||||
gr = gr_full
|
||||
if gr.number_of_nodes() < 2 or gr.number_of_edges() == 0:
|
||||
continue
|
||||
Cs.append(float(nx.average_clustering(gr)))
|
||||
Ls.append(float(nx.average_shortest_path_length(gr)))
|
||||
|
||||
if not Cs or not Ls:
|
||||
return (float("nan"), C, L, 0.0, 0.0)
|
||||
|
||||
Cr = sum(Cs) / len(Cs)
|
||||
Lr = sum(Ls) / len(Ls)
|
||||
if Cr <= 0 or Lr <= 0 or L <= 0:
|
||||
return (float("nan"), C, L, Cr, Lr)
|
||||
|
||||
sigma_val = (C / Cr) / (L / Lr)
|
||||
return (sigma_val, C, L, Cr, Lr)
|
||||
|
||||
|
||||
def compute_sigma(graph: "nx.Graph", *, seed: int = 42) -> Optional[float]:
|
||||
"""D-SIGMA-01: sigma at N>=SIGMA_N_FLOOR; otherwise None.
|
||||
|
||||
Returns None for graphs with fewer than SIGMA_N_FLOOR nodes -- below
|
||||
that threshold, the random-graph baselines are too noisy to interpret
|
||||
(Humphries-Gurney 2008).
|
||||
"""
|
||||
if graph.number_of_nodes() < SIGMA_N_FLOOR:
|
||||
return None
|
||||
sigma_val, *_ = fast_sigma(graph, seed=seed)
|
||||
if isinstance(sigma_val, float) and math.isnan(sigma_val):
|
||||
return None
|
||||
return float(sigma_val)
|
||||
|
||||
|
||||
def classify_regime(N: int, sigma: Optional[float]) -> str:
|
||||
"""Four-cell regime truth table (D-SIGMA-02 / D-SIGMA-03).
|
||||
|
||||
Returns one of:
|
||||
- "insufficient_data" : sigma is None (N < SIGMA_N_FLOOR)
|
||||
- "developmental" : N < SIGMA_MID_LIFE_THRESHOLD AND sigma < 1
|
||||
- "mid_life_drift" : N >= SIGMA_MID_LIFE_THRESHOLD AND sigma < 1
|
||||
- "healthy" : sigma >= 1 (any N >= floor)
|
||||
"""
|
||||
if sigma is None:
|
||||
return "insufficient_data"
|
||||
if isinstance(sigma, float) and math.isnan(sigma):
|
||||
return "insufficient_data"
|
||||
if sigma < 1.0:
|
||||
if N < SIGMA_MID_LIFE_THRESHOLD:
|
||||
return "developmental"
|
||||
return "mid_life_drift"
|
||||
return "healthy"
|
||||
|
||||
|
||||
def _coerce_to_nx_graph(graph_or_wrapper) -> "nx.Graph":
|
||||
"""Accept either a raw nx.Graph or our MemoryGraph wrapper.
|
||||
|
||||
MemoryGraph (src/iai_mcp/graph.py) carries the underlying nx.Graph as
|
||||
``_nx``. The CLI passes a MemoryGraph; tests / fast_sigma also accept raw
|
||||
nx.Graph for portability.
|
||||
"""
|
||||
if isinstance(graph_or_wrapper, nx.Graph):
|
||||
return graph_or_wrapper
|
||||
underlying = getattr(graph_or_wrapper, "_nx", None)
|
||||
if isinstance(underlying, nx.Graph):
|
||||
return underlying
|
||||
raise TypeError(
|
||||
f"expected nx.Graph or MemoryGraph wrapper, got {type(graph_or_wrapper).__name__}"
|
||||
)
|
||||
|
||||
|
||||
def compute_topology_snapshot(graph) -> dict:
|
||||
"""Snapshot dict consumed by the topology CLI subcommand.
|
||||
|
||||
Returns: ``{C, L, sigma, community_count, rich_club_ratio, N, regime}``.
|
||||
|
||||
- C : average clustering on the largest connected component.
|
||||
- L : average shortest path length on the largest CC.
|
||||
- sigma : compute_sigma(graph) (None if N < SIGMA_N_FLOOR).
|
||||
- community_count : Leiden community count (CONN-01 reuse via
|
||||
community.detect_communities); uses an isolated MemoryGraph wrapper.
|
||||
- rich_club_ratio : len(rich_club_nodes) / N (CONN-02 reuse).
|
||||
- N : node count.
|
||||
- regime : classify_regime(N, sigma).
|
||||
"""
|
||||
nx_g = _coerce_to_nx_graph(graph)
|
||||
N = int(nx_g.number_of_nodes())
|
||||
|
||||
if N == 0:
|
||||
return {
|
||||
"C": 0.0, "L": 0.0, "sigma": None,
|
||||
"community_count": 0, "rich_club_ratio": 0.0,
|
||||
"N": 0, "regime": "insufficient_data",
|
||||
}
|
||||
|
||||
g_cc = _largest_cc(nx_g)
|
||||
try:
|
||||
C = float(nx.average_clustering(g_cc)) if g_cc.number_of_nodes() else 0.0
|
||||
except Exception:
|
||||
C = 0.0
|
||||
try:
|
||||
L = (
|
||||
float(nx.average_shortest_path_length(g_cc))
|
||||
if g_cc.number_of_nodes() >= 2 and g_cc.number_of_edges() > 0
|
||||
else 0.0
|
||||
)
|
||||
except Exception:
|
||||
L = 0.0
|
||||
|
||||
sigma_val = compute_sigma(nx_g)
|
||||
|
||||
# community_count + rich_club_ratio require the MemoryGraph wrapper.
|
||||
community_count = 0
|
||||
rich_club_ratio = 0.0
|
||||
try:
|
||||
from iai_mcp.community import detect_communities
|
||||
from iai_mcp.graph import MemoryGraph
|
||||
from iai_mcp.richclub import rich_club_nodes
|
||||
if isinstance(graph, MemoryGraph):
|
||||
mg = graph
|
||||
else:
|
||||
mg = None
|
||||
if mg is not None:
|
||||
try:
|
||||
assignment = detect_communities(mg, prior=None)
|
||||
community_count = int(len(assignment.community_centroids))
|
||||
except Exception:
|
||||
community_count = 0
|
||||
try:
|
||||
rc = rich_club_nodes(mg, percent=0.10)
|
||||
rich_club_ratio = (len(rc) / N) if N > 0 else 0.0
|
||||
except Exception:
|
||||
rich_club_ratio = 0.0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
regime = classify_regime(N, sigma_val)
|
||||
return {
|
||||
"C": C,
|
||||
"L": L,
|
||||
"sigma": sigma_val,
|
||||
"community_count": community_count,
|
||||
"rich_club_ratio": rich_club_ratio,
|
||||
"N": N,
|
||||
"regime": regime,
|
||||
}
|
||||
|
||||
|
||||
def _bump_hebbian_rate_developmental(store: "MemoryStore", N: int) -> None:
|
||||
"""Emit a profile_updated event marking the Hebbian-rate boost.
|
||||
|
||||
Per D-SIGMA-02 the developmental phase warrants a temporary
|
||||
Hebbian-rate boost. Rather than mutating the 10-knob AUTIST profile
|
||||
registry (which would violate len(PROFILE_KNOBS)==11), we record the
|
||||
intent as a profile_updated event with knob='hebbian_rate'. Downstream
|
||||
Hebbian write paths can read the most recent value and apply it.
|
||||
"""
|
||||
write_event(
|
||||
store,
|
||||
kind="profile_updated",
|
||||
data={
|
||||
"knob": HEBBIAN_RATE_KNOB,
|
||||
"old": 1.0,
|
||||
"new": HEBBIAN_DEVELOPMENTAL_BOOST_FACTOR,
|
||||
"ttl_sessions": HEBBIAN_DEVELOPMENTAL_BOOST_TTL_SESSIONS,
|
||||
"reason": "sigma_developmental_phase",
|
||||
"N": N,
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
|
||||
|
||||
def compute_and_emit(store: "MemoryStore") -> dict:
|
||||
"""S4 offline-pass entry point: build runtime graph, snapshot, emit event.
|
||||
|
||||
Routes to the correct event kind based on the regime classification:
|
||||
- "developmental" -> kind=sigma_observation, data.phase="developmental",
|
||||
AND a profile_updated event for hebbian_rate boost.
|
||||
- "mid_life_drift" -> kind=sigma_drift, data with full snapshot.
|
||||
- "healthy" -> kind=sigma_observation, data.phase="healthy".
|
||||
- "insufficient_data" -> kind=sigma_observation, data.phase="insufficient_data".
|
||||
|
||||
NEVER toggles retrieval modes (constitutional guard).
|
||||
"""
|
||||
from iai_mcp import retrieve
|
||||
|
||||
graph_bundle = retrieve.build_runtime_graph(store)
|
||||
# build_runtime_graph returns (graph, assignment, rich_club).
|
||||
if isinstance(graph_bundle, tuple):
|
||||
graph = graph_bundle[0]
|
||||
else:
|
||||
graph = graph_bundle
|
||||
|
||||
snap = compute_topology_snapshot(graph)
|
||||
regime = snap.get("regime", "insufficient_data")
|
||||
|
||||
base_data = {
|
||||
"sigma": snap.get("sigma"),
|
||||
"N": snap.get("N", 0),
|
||||
"C": snap.get("C", 0.0),
|
||||
"L": snap.get("L", 0.0),
|
||||
"community_count": snap.get("community_count", 0),
|
||||
"rich_club_ratio": snap.get("rich_club_ratio", 0.0),
|
||||
"regime": regime,
|
||||
}
|
||||
|
||||
if regime == "mid_life_drift":
|
||||
write_event(
|
||||
store,
|
||||
kind=SIGMA_DRIFT_KIND,
|
||||
data={**base_data, "phase": "mid_life_drift"},
|
||||
severity="warning",
|
||||
)
|
||||
elif regime == "developmental":
|
||||
write_event(
|
||||
store,
|
||||
kind=SIGMA_OBSERVATION_KIND,
|
||||
data={**base_data, "phase": "developmental"},
|
||||
severity="info",
|
||||
)
|
||||
try:
|
||||
_bump_hebbian_rate_developmental(store, int(snap.get("N", 0)))
|
||||
except Exception:
|
||||
# Diagnostic only: never block the sigma observation on the
|
||||
# follow-up Hebbian boost.
|
||||
pass
|
||||
elif regime == "healthy":
|
||||
write_event(
|
||||
store,
|
||||
kind=SIGMA_OBSERVATION_KIND,
|
||||
data={**base_data, "phase": "healthy"},
|
||||
severity="info",
|
||||
)
|
||||
else: # insufficient_data
|
||||
write_event(
|
||||
store,
|
||||
kind=SIGMA_OBSERVATION_KIND,
|
||||
data={**base_data, "phase": "insufficient_data"},
|
||||
severity="info",
|
||||
)
|
||||
|
||||
return snap
|
||||
610
src/iai_mcp/sleep.py
Normal file
610
src/iai_mcp/sleep.py
Normal file
|
|
@ -0,0 +1,610 @@
|
|||
"""CLS sleep-cycle replay (MEM-07, D-16, D-19, D-29).
|
||||
|
||||
Two phases (dual-tier per D-16):
|
||||
|
||||
- `run_light_consolidation` -- runs at every session_exit. Pure-local. NO LLM.
|
||||
FSRS tick on recently-recalled records. Sub-second. Always on.
|
||||
|
||||
- `run_heavy_consolidation` -- runs inside quiet window OR via MANUAL trigger
|
||||
(memory_consolidate MCP tool). D-GUARD ladder gates any Tier-1 LLM path via
|
||||
`should_call_llm`; Tier-0 fallback is ALWAYS present (TF-IDF + cooccurrence
|
||||
summarisation). Creates `consolidated_from` edges linking semantic summary
|
||||
records to their source episodes. Runs FSRS edge decay sweep. Logs
|
||||
`cls_consolidation_run` event with mode=heavy, tier=tier0|tier1.
|
||||
|
||||
D-16 scheduler (`should_run_heavy`):
|
||||
- ACTIVITY (default): idle>=30min AND local time in quiet_window.
|
||||
- TIME: strict cron at hour==3 local.
|
||||
- MANUAL: never fires automatically.
|
||||
- 48h max defer: if idle >= max_defer_hours, force-run regardless of window.
|
||||
|
||||
D-19 decay sweep (`_decay_edges`):
|
||||
- Only hebbian edges are decayed. contradicts / invariant_anchor /
|
||||
consolidated_from / schema_instance_of / temporal_next / curiosity_bridge /
|
||||
profile_modulates all survive forever (by design).
|
||||
- Edges > 90d stale: weight *= 0.9 ** (days - 90); prune if < ε (default 0.01).
|
||||
|
||||
D-29 unification: heavy cycle drives FSRS decay + CLS summarisation +
|
||||
schema-candidate surfacing in a single pass -- no duplicated IO.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum
|
||||
from itertools import combinations
|
||||
from uuid import UUID, uuid4
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from iai_mcp.aaak import enforce_language_tagged, generate_aaak_index
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import EDGES_TABLE, MemoryStore, _uuid_literal
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- constants
|
||||
|
||||
|
||||
class SleepMode(str, Enum):
|
||||
"""D-16 trigger mode for heavy consolidation."""
|
||||
|
||||
ACTIVITY = "activity" # Idle-triggered (default). 30min idle + quiet window.
|
||||
TIME = "time" # Strict cron at hour==3 local.
|
||||
MANUAL = "manual" # Only via memory_consolidate tool.
|
||||
|
||||
|
||||
@dataclass
|
||||
class SleepConfig:
|
||||
"""User-configurable sleep-cycle schedule knobs (D-16)."""
|
||||
|
||||
mode: SleepMode = SleepMode.ACTIVITY
|
||||
quiet_window: tuple[int, int] = (22, 6) # local-hour start..end (wrap-around)
|
||||
require_idle_minutes: int = 30
|
||||
max_defer_hours: int = 48
|
||||
on_user_resume: str = "defer_remaining"
|
||||
light_on_exit: bool = True
|
||||
llm_enabled: bool = False # Tier 0 default -- D-GUARD ladder step 1
|
||||
llm_tier: int = 1 # 1=Haiku-Batch, 2=Sonnet/Opus
|
||||
|
||||
|
||||
DECAY_EPSILON: float = 0.01 # prune threshold
|
||||
DECAY_GRACE_DAYS: int = 90 # no decay for edges <=90d old
|
||||
DECAY_BASE: float = 0.9 # weight *= 0.9^(days-90)
|
||||
FSRS_STABILITY_BOOST: float = 0.2 # simple per-recall linear boost
|
||||
CLUSTER_MIN_SIZE: int = 3 # CLS cluster threshold
|
||||
# H-03: Hebbian LTP increment applied to existing edges between
|
||||
# co-cluster members during heavy consolidation. Mirrors the LTD side (DECAY_*)
|
||||
# so the graph strengthens frequently-co-retrieved associations during sleep,
|
||||
# not only during explicit user-session pipeline_recall. Conservative delta --
|
||||
# 10 consolidations bring a fresh edge from 0.05 to ~0.5 stable.
|
||||
HEAVY_LTP_DELTA: float = 0.05
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- scheduler
|
||||
|
||||
|
||||
def should_run_heavy(
|
||||
now_utc: datetime,
|
||||
last_activity_utc: datetime,
|
||||
config: SleepConfig,
|
||||
tz: ZoneInfo,
|
||||
) -> tuple[bool, str]:
|
||||
"""D-16 trigger evaluator.
|
||||
|
||||
Returns (ok, reason). reason is "" on success, a short diagnostic otherwise.
|
||||
|
||||
The 48h deadline (config.max_defer_hours) overrides MANUAL, TIME, and
|
||||
ACTIVITY path-gates -- if the user has ignored the brain for 48h, we MUST
|
||||
consolidate before the next session starts. This is a cybernetic S4
|
||||
viability requirement (Beer VSM + Ashby ultrastability).
|
||||
"""
|
||||
idle_minutes = (now_utc - last_activity_utc).total_seconds() / 60.0
|
||||
|
||||
# 48h force-run. Precedes MANUAL so a stuck manual-only deployment still
|
||||
# gets periodic consolidation.
|
||||
if idle_minutes >= config.max_defer_hours * 60:
|
||||
return True, f"max_defer_hours ({config.max_defer_hours}h) exceeded"
|
||||
|
||||
if config.mode == SleepMode.MANUAL:
|
||||
return False, "manual-only mode"
|
||||
|
||||
if config.mode == SleepMode.TIME:
|
||||
local = now_utc.astimezone(tz)
|
||||
ok = local.hour == 3
|
||||
return ok, f"TIME mode, local hour={local.hour}"
|
||||
|
||||
# ACTIVITY mode from here on.
|
||||
if idle_minutes < config.require_idle_minutes:
|
||||
return False, f"idle < {config.require_idle_minutes}min"
|
||||
|
||||
local = now_utc.astimezone(tz)
|
||||
start_h, end_h = config.quiet_window
|
||||
# Wrap-around window support: (22, 6) means 22-23 OR 0-5.
|
||||
if start_h > end_h:
|
||||
in_window = (local.hour >= start_h) or (local.hour < end_h)
|
||||
else:
|
||||
in_window = start_h <= local.hour < end_h
|
||||
if not in_window:
|
||||
return False, (
|
||||
f"outside quiet window {config.quiet_window}, "
|
||||
f"local hour={local.hour}"
|
||||
)
|
||||
return True, ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- FSRS bits
|
||||
|
||||
|
||||
def _apply_fsrs(record: MemoryRecord, now: datetime) -> MemoryRecord:
|
||||
"""Simple FSRS-inspired stability boost for recently-recalled records.
|
||||
|
||||
scope: linear +0.2 per recall, capped at 1.0. Full FSRS (Woz et al
|
||||
2022) with per-difficulty retrievability modelling is Phase 3.
|
||||
"""
|
||||
if record.never_decay:
|
||||
return record
|
||||
record.stability = min(1.0, record.stability + FSRS_STABILITY_BOOST)
|
||||
record.last_reviewed = now
|
||||
return record
|
||||
|
||||
|
||||
def _decay_edges(
|
||||
store: MemoryStore, epsilon: float = DECAY_EPSILON,
|
||||
) -> dict:
|
||||
"""D-19 nightly sweep: decay stale hebbian + hebbian_structure edges, prune below e.
|
||||
|
||||
CONN-05 D-TEM-04 extension: structure-edge LTP from
|
||||
hebbian_structure.strengthen_structure_edge decays under the SAME formula
|
||||
and grace period as content-edge hebbian (constitutional contract: FSRS
|
||||
decay on structure edges is IDENTICAL to record-edge decay).
|
||||
|
||||
Other edge types (contradicts, invariant_anchor, consolidated_from,
|
||||
schema_instance_of, temporal_next, curiosity_bridge, profile_modulates)
|
||||
survive forever.
|
||||
"""
|
||||
tbl = store.db.open_table(EDGES_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
if df.empty:
|
||||
return {"decayed": 0, "pruned": 0}
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
decayed = 0
|
||||
pruned = 0
|
||||
|
||||
# include hebbian_structure in the sweep with identical formula.
|
||||
decayable_kinds = ("hebbian", "hebbian_structure")
|
||||
hebbian = df[df["edge_type"].isin(decayable_kinds)]
|
||||
for _, row in hebbian.iterrows():
|
||||
# CR-01: per-row try/except ValueError so one poisoned row
|
||||
# cannot kill the entire sweep. _uuid_literal raises ValueError on any
|
||||
# non-RFC-4122 UUID string, preventing SQL predicate injection via a
|
||||
# corrupt or adversarial `src`/`dst` value.
|
||||
try:
|
||||
last = row["updated_at"]
|
||||
if last is None:
|
||||
continue
|
||||
# Coerce naive -> UTC; pandas may drop tz on some backends.
|
||||
try:
|
||||
py = last.to_pydatetime() if hasattr(last, "to_pydatetime") else last
|
||||
except Exception:
|
||||
py = last
|
||||
if getattr(py, "tzinfo", None) is None:
|
||||
py = py.replace(tzinfo=timezone.utc)
|
||||
|
||||
days = (now - py).total_seconds() / 86400.0
|
||||
if days <= DECAY_GRACE_DAYS:
|
||||
continue
|
||||
|
||||
new_weight = float(row["weight"]) * (DECAY_BASE ** (days - DECAY_GRACE_DAYS))
|
||||
|
||||
# CR-01 fix: reject non-canonical UUID values BEFORE interpolation.
|
||||
src_lit = _uuid_literal(row["src"])
|
||||
dst_lit = _uuid_literal(row["dst"])
|
||||
edge_kind = str(row["edge_type"])
|
||||
if edge_kind not in decayable_kinds:
|
||||
# Belt-and-braces: should not happen given the .isin() above.
|
||||
continue
|
||||
if new_weight < epsilon:
|
||||
tbl.delete(
|
||||
f"src = '{src_lit}' AND dst = '{dst_lit}' "
|
||||
f"AND edge_type = '{edge_kind}'"
|
||||
)
|
||||
pruned += 1
|
||||
else:
|
||||
tbl.update(
|
||||
where=(
|
||||
f"src = '{src_lit}' AND dst = '{dst_lit}' "
|
||||
f"AND edge_type = '{edge_kind}'"
|
||||
),
|
||||
values={
|
||||
"weight": float(new_weight),
|
||||
"updated_at": now,
|
||||
},
|
||||
)
|
||||
decayed += 1
|
||||
except ValueError:
|
||||
# Poisoned UUID shape -- skip this row, continue the sweep.
|
||||
continue
|
||||
|
||||
return {"decayed": decayed, "pruned": pruned}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- light phase
|
||||
|
||||
|
||||
def run_light_consolidation(
|
||||
store: MemoryStore, session_id: str,
|
||||
) -> dict:
|
||||
"""D-16 light phase -- always on, pure local, no LLM.
|
||||
|
||||
Runs at every session_exit. Nudges FSRS stability on records that were
|
||||
recalled in this session (identified by fresh provenance entry within the
|
||||
last hour). Writes one `cls_consolidation_run` event with mode=light.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
records = store.all_records()
|
||||
fsrs_ticked = 0
|
||||
|
||||
for r in records:
|
||||
if r.never_decay:
|
||||
continue
|
||||
if not r.provenance:
|
||||
continue
|
||||
last_prov = r.provenance[-1]
|
||||
try:
|
||||
ts_str = last_prov.get("ts", "")
|
||||
prov_ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
||||
if prov_ts.tzinfo is None:
|
||||
prov_ts = prov_ts.replace(tzinfo=timezone.utc)
|
||||
# Only tick records recalled within the last hour.
|
||||
if (now - prov_ts).total_seconds() < 3600:
|
||||
_apply_fsrs(r, now)
|
||||
# H-01 fix: persist the FSRS mutation so stability
|
||||
# and last_reviewed survive process restart. update_record
|
||||
# rewrites only the FSRS-relevant columns -- embedding,
|
||||
# provenance, tags etc. are left intact.
|
||||
store.update_record(r)
|
||||
fsrs_ticked += 1
|
||||
except Exception:
|
||||
# Provenance ts malformed -- ignore that record, don't fail the sweep.
|
||||
continue
|
||||
|
||||
write_event(
|
||||
store,
|
||||
kind="cls_consolidation_run",
|
||||
data={
|
||||
"mode": "light",
|
||||
"fsrs_ticked": fsrs_ticked,
|
||||
"record_count": len(records),
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
)
|
||||
return {
|
||||
"mode": "light",
|
||||
"fsrs_ticked": fsrs_ticked,
|
||||
"cooccurrence_updates": 0, # populates real cooccurrence counts.
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- heavy phase
|
||||
|
||||
|
||||
def _build_hebbian_clusters(store: MemoryStore) -> list[list[UUID]]:
|
||||
"""Find connected components in the hebbian edge graph with size >= CLUSTER_MIN_SIZE."""
|
||||
edges_df = store.db.open_table(EDGES_TABLE).to_pandas()
|
||||
if edges_df.empty:
|
||||
return []
|
||||
hebbian = edges_df[edges_df["edge_type"] == "hebbian"]
|
||||
if hebbian.empty:
|
||||
return []
|
||||
|
||||
adj: dict[UUID, set[UUID]] = {}
|
||||
for _, row in hebbian.iterrows():
|
||||
src = UUID(row["src"])
|
||||
dst = UUID(row["dst"])
|
||||
adj.setdefault(src, set()).add(dst)
|
||||
adj.setdefault(dst, set()).add(src)
|
||||
|
||||
visited: set[UUID] = set()
|
||||
clusters: list[list[UUID]] = []
|
||||
for node in list(adj.keys()):
|
||||
if node in visited:
|
||||
continue
|
||||
stack = [node]
|
||||
component: list[UUID] = []
|
||||
while stack:
|
||||
cur = stack.pop()
|
||||
if cur in visited:
|
||||
continue
|
||||
visited.add(cur)
|
||||
component.append(cur)
|
||||
for neigh in adj.get(cur, set()):
|
||||
if neigh not in visited:
|
||||
stack.append(neigh)
|
||||
if len(component) >= CLUSTER_MIN_SIZE:
|
||||
clusters.append(component)
|
||||
return clusters
|
||||
|
||||
|
||||
def _tier0_schema_surfacing(store: MemoryStore) -> list[dict]:
|
||||
"""Tier-0 fallback schema candidate surfacing: tags appearing in >=3 records.
|
||||
|
||||
Plan 02-03's LEARN-03 schema induction consumes these candidates.
|
||||
|
||||
W3: rewritten on ``store.iter_record_columns(["tags_json"])``.
|
||||
No more full-store load + full-record decrypt -- only the ``tags_json`` column
|
||||
is read from disk; encrypted columns (literal_surface, provenance_json,
|
||||
profile_modulation_gain_json) are NEVER touched on this path. Saves ~16210
|
||||
AES-GCM operations + ~14.5 MB literal_surface materialisation + ~2.4 MB
|
||||
provenance_json materialisation + ~11.9 MB embedding materialisation per
|
||||
invocation on a production-scale store.
|
||||
"""
|
||||
tag_counts: dict[str, int] = {}
|
||||
record_count = 0
|
||||
for row in store.iter_record_columns(["tags_json"], batch_size=1024):
|
||||
record_count += 1
|
||||
tags_raw = row.get("tags_json") or "[]"
|
||||
try:
|
||||
tags = json.loads(tags_raw) if tags_raw else []
|
||||
except (TypeError, json.JSONDecodeError):
|
||||
tags = []
|
||||
for t in tags:
|
||||
# Skip language-qualifying raw:* and domain:* tags -- those are
|
||||
# classification metadata, not schema-candidate signals.
|
||||
if t.startswith("raw:") or t.startswith("domain:"):
|
||||
continue
|
||||
tag_counts[t] = tag_counts.get(t, 0) + 1
|
||||
if record_count < CLUSTER_MIN_SIZE:
|
||||
return []
|
||||
candidates: list[dict] = []
|
||||
for tag, count in tag_counts.items():
|
||||
if count >= 3:
|
||||
candidates.append(
|
||||
{
|
||||
"pattern": f"tag:{tag}",
|
||||
"confidence": min(1.0, count / 10.0),
|
||||
"evidence_count": count,
|
||||
}
|
||||
)
|
||||
return candidates
|
||||
|
||||
|
||||
def _create_semantic_summary(
|
||||
store: MemoryStore,
|
||||
cluster: list[MemoryRecord],
|
||||
summary_text: str,
|
||||
language: str,
|
||||
) -> UUID:
|
||||
"""Insert one semantic summary record + a consolidated_from edge to each source.
|
||||
|
||||
summary inherits dominant language of the source cluster.
|
||||
detail_level=3 -> never_decay=True (auto-enforced by __post_init__).
|
||||
"""
|
||||
# Lazy import -- embedder load is heavy; only needed when we actually summarise.
|
||||
from iai_mcp.embed import embedder_for_store
|
||||
|
||||
emb = embedder_for_store(store).embed(summary_text)
|
||||
now = datetime.now(timezone.utc)
|
||||
summary_id = uuid4()
|
||||
summary = MemoryRecord(
|
||||
id=summary_id,
|
||||
tier="semantic",
|
||||
literal_surface=summary_text,
|
||||
aaak_index="",
|
||||
embedding=emb,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=3, # semantic summaries protected from decay
|
||||
pinned=False,
|
||||
stability=0.5,
|
||||
difficulty=0.3,
|
||||
last_reviewed=now,
|
||||
never_decay=True,
|
||||
never_merge=False,
|
||||
provenance=[
|
||||
{
|
||||
"ts": now.isoformat(),
|
||||
"cue": "cls_consolidation",
|
||||
"session_id": "system",
|
||||
}
|
||||
],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["semantic", "cls_summary"],
|
||||
language=language,
|
||||
)
|
||||
enforce_language_tagged(summary, detect=False)
|
||||
summary.aaak_index = generate_aaak_index(summary)
|
||||
store.insert(summary)
|
||||
|
||||
# R3: batch all consolidated_from edges into a single
|
||||
# boost_edges call (one merge_insert + one tbl.add at most). Previously
|
||||
# this loop emitted N Lance versions on edges.lance for an N-source
|
||||
# cluster.
|
||||
pairs = [(summary_id, source.id) for source in cluster]
|
||||
if pairs:
|
||||
store.boost_edges(
|
||||
pairs,
|
||||
edge_type="consolidated_from",
|
||||
delta=1.0,
|
||||
)
|
||||
return summary_id
|
||||
|
||||
|
||||
def run_heavy_consolidation(
|
||||
store: MemoryStore,
|
||||
session_id: str,
|
||||
config: SleepConfig,
|
||||
budget: BudgetLedger,
|
||||
rate: RateLimitLedger,
|
||||
has_api_key: bool = False,
|
||||
) -> dict:
|
||||
"""D-16 heavy phase -- cluster-find, summarise, decay-sweep, schema-surface.
|
||||
|
||||
D-GUARD: the Tier-1 gate is consulted at the top of the function. If
|
||||
`should_call_llm` returns False for any reason (llm_enabled=false, no API
|
||||
key, budget exceeded, ratelimit cooldown), the entire cycle falls back to
|
||||
Tier 0 -- local heuristic summarisation, zero network I/O. This is the
|
||||
constitutional guarantee (D-GUARD): every LLM-dependent path
|
||||
must degrade gracefully.
|
||||
|
||||
Returns a dict with:
|
||||
mode: "heavy"
|
||||
tier: "tier0" | "tier1"
|
||||
summaries_created: int
|
||||
decay_result: {"decayed": int, "pruned": int}
|
||||
schema_candidates: list[dict]
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Step 1: FSRS edge decay sweep (runs regardless of tier).
|
||||
decay_result = _decay_edges(store)
|
||||
|
||||
# Step 2: Decide Tier 0 vs Tier 1. This is consulted BEFORE any API call;
|
||||
# even if Tier 1 is allowed, Plan 02-02's scope is Tier 0 summarisation
|
||||
# only. adds the actual Haiku Batch API call. The gate is here
|
||||
# so the event log reflects what WOULD have happened had Tier 1 been
|
||||
# implemented.
|
||||
llm_ok, _llm_reason = should_call_llm(
|
||||
budget=budget,
|
||||
rate=rate,
|
||||
llm_enabled=config.llm_enabled,
|
||||
has_api_key=has_api_key,
|
||||
)
|
||||
tier = "tier1" if llm_ok else "tier0"
|
||||
# flips the Tier-1 switch by wiring the Batch API. The
|
||||
# gate is re-checked inside batch.submit_batch_consolidation so event
|
||||
# ordering matches prior plans. Tier-0 fallback remains unchanged.
|
||||
effective_tier = "tier0"
|
||||
batch_submitted = False
|
||||
if llm_ok:
|
||||
try:
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
# Summarise the workload before submission. scope:
|
||||
# the real cluster/schema task payload is populated post-hoc by
|
||||
# Phase 3; for now we submit placeholder tasks so the D-GUARD
|
||||
# side-effects (budget spend + events) fire on the correct path.
|
||||
tasks: list[dict] = [
|
||||
{
|
||||
"task_id": f"sleep_cycle:{session_id}",
|
||||
"prompt": "CLS consolidation batch",
|
||||
"prompt_tok": 500,
|
||||
"output_tok": 200,
|
||||
}
|
||||
]
|
||||
ok_batch, _reason_batch, _results = submit_batch_consolidation(
|
||||
store, tasks, budget, rate,
|
||||
llm_enabled=config.llm_enabled,
|
||||
)
|
||||
if ok_batch:
|
||||
effective_tier = "tier1"
|
||||
batch_submitted = True
|
||||
except Exception as _exc:
|
||||
# Never block the Tier-0 fallback on batch errors.
|
||||
effective_tier = "tier0"
|
||||
|
||||
# Step 3: cluster-find + summarise.
|
||||
clusters = _build_hebbian_clusters(store)
|
||||
# Phase 07.7-04 W4 (D-13/D-14/D-20 + amendment): single-materialisation
|
||||
# invariant. After Plan 07.7-03 W3 rewrites _tier0_schema_surfacing on
|
||||
# iter_record_columns and Plan 07.7-04 D-26-A/B migrate schema.py
|
||||
# induce_schemas_tier0 + persist_schema to iter_record_columns, this is
|
||||
# the ONLY all_records() call left inside run_heavy_consolidation. The
|
||||
# cluster-lookup primitive choice (switch this site to iter_records or
|
||||
# per-id store.get) is DEFERRED to with the rest of W6
|
||||
# (D-20 deferred). Regression test:
|
||||
# tests/test_sleep_consolidation_streaming.py
|
||||
# ::test_run_heavy_consolidation_calls_all_records_at_most_once
|
||||
records_by_id = {r.id: r for r in store.all_records()}
|
||||
summaries_created = 0
|
||||
for cluster_ids in clusters:
|
||||
cluster_recs = [records_by_id[i] for i in cluster_ids if i in records_by_id]
|
||||
if len(cluster_recs) < CLUSTER_MIN_SIZE:
|
||||
continue
|
||||
# Dominant language vote among cluster members.
|
||||
langs = [r.language for r in cluster_recs if r.language]
|
||||
dom_lang = max(set(langs), key=langs.count) if langs else "en"
|
||||
# Tier-0 summary format: concatenated prefixes of cluster literals,
|
||||
# capped at 80 chars each + 5 members -- keeps the summary short and
|
||||
# keeps promises clean (summary is NEW content, sources intact).
|
||||
summary_text = (
|
||||
f"Cluster summary ({len(cluster_recs)} records, lang={dom_lang}): "
|
||||
+ "; ".join(r.literal_surface[:80] for r in cluster_recs[:5])
|
||||
)
|
||||
_create_semantic_summary(store, cluster_recs, summary_text, dom_lang)
|
||||
summaries_created += 1
|
||||
|
||||
# H-03: Hebbian LTP -- strengthen existing hebbian edges
|
||||
# between co-cluster members. Mirrors the LTD (_decay_edges) side so
|
||||
# the graph is not one-sided. Matches Woz 2022 SRS reinforcement on
|
||||
# co-retrieval. O(k^2) per cluster where k = cluster size; bounded by
|
||||
# the connected-components partition of hebbian adjacency.
|
||||
pairs_to_boost = list(combinations(cluster_ids, 2))
|
||||
if pairs_to_boost:
|
||||
store.boost_edges(
|
||||
pairs_to_boost,
|
||||
delta=HEAVY_LTP_DELTA,
|
||||
edge_type="hebbian",
|
||||
)
|
||||
|
||||
# Step 4: Tier-0 schema candidate surfacing.
|
||||
schemas = _tier0_schema_surfacing(store)
|
||||
|
||||
# Step 4b (Plan 02-03 LEARN-03 primary): schema induction batch run.
|
||||
# Tier-1 attempts the Haiku path via D-GUARD ladder; falls back to tier0.
|
||||
# auto-status candidates are persisted (creating schema_instance_of edges).
|
||||
schemas_induced = 0
|
||||
try:
|
||||
from iai_mcp.schema import (
|
||||
induce_schemas_tier1,
|
||||
persist_schema,
|
||||
)
|
||||
|
||||
candidates = induce_schemas_tier1(
|
||||
store, budget=budget, rate=rate,
|
||||
llm_enabled=config.llm_enabled,
|
||||
)
|
||||
for cand in candidates:
|
||||
if cand.status == "auto":
|
||||
persist_schema(store, cand)
|
||||
schemas_induced += 1
|
||||
# pending_user_approval candidates are only logged (via
|
||||
# induce_schemas_tier1's llm_health emission path).
|
||||
except Exception as exc:
|
||||
write_event(
|
||||
store,
|
||||
kind="schema_induction_run",
|
||||
data={"error": str(exc), "status": "failed"},
|
||||
severity="warning",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
write_event(
|
||||
store,
|
||||
kind="cls_consolidation_run",
|
||||
data={
|
||||
"mode": "heavy",
|
||||
"tier": effective_tier,
|
||||
"tier_eligible": tier,
|
||||
"summaries_created": summaries_created,
|
||||
"decay_result": decay_result,
|
||||
"schema_candidates": len(schemas),
|
||||
"schemas_induced": schemas_induced,
|
||||
"batch_submitted": batch_submitted,
|
||||
},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
return {
|
||||
"mode": "heavy",
|
||||
"tier": effective_tier,
|
||||
"summaries_created": summaries_created,
|
||||
"decay_result": decay_result,
|
||||
"schema_candidates": schemas,
|
||||
"schemas_induced": schemas_induced,
|
||||
}
|
||||
819
src/iai_mcp/sleep_pipeline.py
Normal file
819
src/iai_mcp/sleep_pipeline.py
Normal file
|
|
@ -0,0 +1,819 @@
|
|||
"""Phase 10.3 — Sleep cycle pipeline + L3 failure grammar.
|
||||
|
||||
Five ordered atomic steps run only inside the SLEEP lifecycle state:
|
||||
1. SCHEMA_MINE — extract schemas from episodic
|
||||
2. KNOB_TUNE — recompute procedural knobs
|
||||
3. DREAM_DECAY — Hebbian decay + edge prune
|
||||
4. OPTIMIZE_LANCE — table-level optimize(cleanup_older_than)
|
||||
5. COMPACT_RECORDS — final records.lance compaction
|
||||
|
||||
Design invariants:
|
||||
|
||||
* Each step is **transactional** — Lance optimize is itself transactional;
|
||||
schema_mine / knob_tune / dream_decay write their own atomic temp+swap
|
||||
semantics through the modules they delegate to. The pipeline never
|
||||
modifies `MemoryRecord.literal_surface` (verbatim-recall invariant
|
||||
carried forward from / Plan 5/6).
|
||||
|
||||
* On exception mid-step N, `lifecycle_state.json.sleep_cycle_progress`
|
||||
records `{last_completed_step: N-1, attempt: K, last_error: "..."}`
|
||||
via the same atomic-replace path as `lifecycle_state.save_state`.
|
||||
|
||||
* **3-strike → 24h auto-quarantine**: three consecutive failures of
|
||||
the SAME step (attempt ≥ 3 for that step) triggers quarantine. While
|
||||
quarantined, `run()` short-circuits with `quarantine_triggered=True`.
|
||||
Auto-recovery once `now >= until_ts`; manual recovery via
|
||||
`reset_quarantine()` or `iai-mcp maintenance sleep-cycle --reset-quarantine`.
|
||||
|
||||
* **Bounded deferral** (≤2 sec target via ≤10 sec checkpoint chunks):
|
||||
a callable `interrupt_check` is checked between chunks. If True, the
|
||||
current chunk completes, progress is persisted, and `run()` returns
|
||||
with `completed_steps` so far. The state machine then transitions to
|
||||
WAKE; the next SLEEP cycle resumes from the same chunk.
|
||||
|
||||
This module's heavy lifting **delegates to existing functions** —
|
||||
schema mining (`schema.induce_schemas_tier0`), Hebbian decay
|
||||
(`sleep._decay_edges`), table optimize (`maintenance.optimize_lance_storage`),
|
||||
records compaction (Phase 07.14-01 `optimize_lance_storage(retention=0d)`).
|
||||
The pipeline is orchestration only.
|
||||
|
||||
Daemon main-loop integration (Phase 10.4/10.5) and yield-gate removal
|
||||
(Phase 10.6) are shipped. ``continuous_audit`` (identity_audit.py) and
|
||||
``_hippea_cascade_loop`` (daemon.py) remain as background tasks
|
||||
running alongside the sleep-cycle pipeline; ``dream_daemon`` was
|
||||
removed in Phase 10.6.
|
||||
|
||||
Constitutional guards
|
||||
---------------------
|
||||
* C1 HUMAN-FIRST: pipeline runs only in SLEEP state, so MCP traffic
|
||||
cannot collide. The legacy ``_should_yield_to_mcp`` gate was removed
|
||||
in — SLEEP-state isolation is the sole guarantor.
|
||||
* C3 ZERO paid-API cost: no reference to ANTHROPIC_API_KEY anywhere.
|
||||
Schema induction stays Tier-0 (llm_enabled=False is the only path
|
||||
this pipeline exercises).
|
||||
* C5 / verbatim preservation: the pipeline does NOT touch
|
||||
`MemoryRecord.literal_surface`. Every delegated function is a
|
||||
metadata mutator (FSRS state, edge weights, schema candidates,
|
||||
Lance manifests, profile knobs).
|
||||
* C6 read-only audit: schema mining is MVCC reads against records;
|
||||
decay is metadata-only on edges; optimize is Lance-internal.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, TypedDict
|
||||
|
||||
from iai_mcp.lifecycle_event_log import LifecycleEventLog
|
||||
from iai_mcp.lifecycle_state import (
|
||||
LIFECYCLE_STATE_PATH,
|
||||
LifecycleStateRecord,
|
||||
Quarantine,
|
||||
SleepCycleProgress,
|
||||
load_state,
|
||||
save_state,
|
||||
)
|
||||
|
||||
|
||||
# Quarantine TTL configurable via env (default 24h).
|
||||
# Read ONCE at import time so tests that monkeypatch the env var must
|
||||
# also patch the module attribute (`sleep_pipeline.QUARANTINE_TTL_HOURS_DEFAULT`)
|
||||
# — same discipline as `maintenance.LANCE_OPTIMIZE_INTERVAL_SEC`.
|
||||
QUARANTINE_TTL_HOURS_DEFAULT: float = float(
|
||||
os.environ.get("IAI_MCP_SLEEP_QUARANTINE_TTL_HOURS", "24"),
|
||||
)
|
||||
|
||||
|
||||
class SleepStep(Enum):
|
||||
"""Five ordered atomic steps of the sleep pipeline.
|
||||
|
||||
Numeric values are stable: `lifecycle_state.json.sleep_cycle_progress
|
||||
.last_completed_step` persists the integer, and resume-from-step-N
|
||||
relies on the integer ordering. Re-ordering or renumbering is a
|
||||
schema migration (do NOT change without bumping the field).
|
||||
"""
|
||||
|
||||
SCHEMA_MINE = 1
|
||||
KNOB_TUNE = 2
|
||||
DREAM_DECAY = 3
|
||||
OPTIMIZE_LANCE = 4
|
||||
COMPACT_RECORDS = 5
|
||||
|
||||
|
||||
class SleepPipelineResult(TypedDict, total=False):
|
||||
"""Return shape from `SleepPipeline.run()` / `force_run()`.
|
||||
|
||||
`completed_steps`: list of `SleepStep` values that finished cleanly
|
||||
in this invocation (NOT cumulative across resumes; only this run).
|
||||
`failed_step`: the step that raised, if any. None on full success or
|
||||
on bounded-deferral early-return.
|
||||
`error`: stringified exception (truncated to 500 chars) or None.
|
||||
`duration_sec`: wall-clock for the invocation.
|
||||
`quarantine_triggered`: True iff quarantine was entered DURING this
|
||||
run (3rd-strike) OR was already active when run() was called.
|
||||
`interrupted`: True iff bounded-deferral interrupt_check fired and
|
||||
we returned early. None / absent means a natural completion or
|
||||
failure terminated the run.
|
||||
"""
|
||||
|
||||
completed_steps: list[SleepStep]
|
||||
failed_step: SleepStep | None
|
||||
error: str | None
|
||||
duration_sec: float
|
||||
quarantine_triggered: bool
|
||||
interrupted: bool
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
"""Single point of `datetime.now(UTC)` — patchable in tests."""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def _utc_now_iso() -> str:
|
||||
"""Return ISO-8601 UTC timestamp (matches lifecycle_state convention)."""
|
||||
return _utc_now().isoformat()
|
||||
|
||||
|
||||
class SleepPipeline:
|
||||
"""Orchestrates the 5-step sleep cycle with resume + quarantine.
|
||||
|
||||
Construction is cheap: opens no LanceDB tables, performs no I/O
|
||||
beyond reading `lifecycle_state.json`. The actual heavy work
|
||||
happens inside `run()` / `force_run()` step bodies.
|
||||
|
||||
Concurrency note: the pipeline is single-threaded by design. The
|
||||
caller (state machine in Phase 10.4/10.5; CLI in this phase) must
|
||||
ensure no overlapping invocations — typically by holding the
|
||||
SLEEP-state guard. There is no internal lock; running two
|
||||
`SleepPipeline` instances against the same `lifecycle_state_path`
|
||||
simultaneously is undefined behaviour.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: Any,
|
||||
lifecycle_state_path: Path | None = None,
|
||||
event_log: LifecycleEventLog | None = None,
|
||||
quarantine_ttl_hours: float | None = None,
|
||||
) -> None:
|
||||
self._store = store
|
||||
self._lifecycle_state_path = (
|
||||
lifecycle_state_path
|
||||
if lifecycle_state_path is not None
|
||||
else LIFECYCLE_STATE_PATH
|
||||
)
|
||||
# Default to a fresh LifecycleEventLog rooted at the conventional
|
||||
# `~/.iai-mcp/logs/` directory. Tests inject a tmp_path-rooted log.
|
||||
self._event_log = (
|
||||
event_log if event_log is not None else LifecycleEventLog()
|
||||
)
|
||||
self._quarantine_ttl_hours = (
|
||||
float(quarantine_ttl_hours)
|
||||
if quarantine_ttl_hours is not None
|
||||
else QUARANTINE_TTL_HOURS_DEFAULT
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Quarantine state (lifecycle_state.json.quarantine)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _load_state_record(self) -> LifecycleStateRecord:
|
||||
"""Read the current lifecycle state record (with self-heal)."""
|
||||
return load_state(self._lifecycle_state_path)
|
||||
|
||||
def _save_state_record(self, record: LifecycleStateRecord) -> None:
|
||||
"""Atomic-replace persist of the lifecycle state record."""
|
||||
save_state(record, self._lifecycle_state_path)
|
||||
|
||||
def _load_quarantine(self) -> Quarantine | None:
|
||||
"""Return the current quarantine sub-record or None."""
|
||||
return self._load_state_record().get("quarantine")
|
||||
|
||||
def _set_quarantine(self, reason: str) -> Quarantine:
|
||||
"""Set quarantine until now + ttl_hours; persist; emit event.
|
||||
|
||||
Returns the quarantine record we just persisted so callers can
|
||||
include `until_ts` in their result dict.
|
||||
"""
|
||||
now = _utc_now()
|
||||
until = now + timedelta(hours=self._quarantine_ttl_hours)
|
||||
quarantine: Quarantine = {
|
||||
"until_ts": until.isoformat(),
|
||||
"reason": reason,
|
||||
"since_ts": now.isoformat(),
|
||||
}
|
||||
record = self._load_state_record()
|
||||
record["quarantine"] = quarantine
|
||||
self._save_state_record(record)
|
||||
# Event is best-effort — a full disk should not crash the pipeline
|
||||
# mid-quarantine-write (state is already persisted).
|
||||
try:
|
||||
self._event_log.append({
|
||||
"event": "quarantine_entered",
|
||||
"reason": reason,
|
||||
"until_ts": quarantine["until_ts"],
|
||||
"ttl_hours": self._quarantine_ttl_hours,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
return quarantine
|
||||
|
||||
def _clear_quarantine(self, *, reason: str = "manual_reset") -> None:
|
||||
"""Wipe the quarantine sub-record + reset progress attempt counter.
|
||||
|
||||
`reason` is logged on the `quarantine_lifted` event. Defaults to
|
||||
`manual_reset` (the human-action path); auto-recovery passes
|
||||
`auto_recovery_after_ttl` from the run() entry point.
|
||||
"""
|
||||
record = self._load_state_record()
|
||||
prior_quarantine = record.get("quarantine")
|
||||
record["quarantine"] = None
|
||||
# Resetting quarantine also resets the per-step attempt counter
|
||||
# — otherwise the very next failure would re-trip 3-strike on
|
||||
# attempt=4 immediately. Progress.last_completed_step is kept
|
||||
# so resume-from-step-N still works on the next run.
|
||||
progress = record.get("sleep_cycle_progress")
|
||||
if progress is not None:
|
||||
progress["attempt"] = 0
|
||||
record["sleep_cycle_progress"] = progress
|
||||
self._save_state_record(record)
|
||||
try:
|
||||
self._event_log.append({
|
||||
"event": "quarantine_lifted",
|
||||
"reason": reason,
|
||||
"prior_until_ts": (
|
||||
prior_quarantine["until_ts"] if prior_quarantine else None
|
||||
),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def is_quarantined(self) -> bool:
|
||||
"""True iff a quarantine record exists AND `now < until_ts`.
|
||||
|
||||
A quarantine record with a past `until_ts` is automatically
|
||||
cleared by `run()` on the next invocation (auto-recovery); this
|
||||
getter does NOT mutate state — it is a pure read.
|
||||
"""
|
||||
quarantine = self._load_quarantine()
|
||||
if quarantine is None:
|
||||
return False
|
||||
try:
|
||||
until = datetime.fromisoformat(quarantine["until_ts"])
|
||||
except (TypeError, ValueError):
|
||||
# Malformed timestamp -- treat as not-quarantined so we don't
|
||||
# lock the user out forever on a corrupted entry. The next
|
||||
# successful run will overwrite this slot.
|
||||
return False
|
||||
if until.tzinfo is None:
|
||||
until = until.replace(tzinfo=timezone.utc)
|
||||
return _utc_now() < until
|
||||
|
||||
def reset_quarantine(self) -> None:
|
||||
"""Manual recovery: clear quarantine + reset attempt counter.
|
||||
|
||||
Used by `iai-mcp maintenance sleep-cycle --reset-quarantine`.
|
||||
"""
|
||||
self._clear_quarantine(reason="manual_reset")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Progress state (lifecycle_state.json.sleep_cycle_progress)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _load_progress(self) -> SleepCycleProgress | None:
|
||||
"""Return the current sleep-cycle progress sub-record or None."""
|
||||
return self._load_state_record().get("sleep_cycle_progress")
|
||||
|
||||
def _save_progress(
|
||||
self,
|
||||
last_completed_step: int,
|
||||
attempt: int,
|
||||
last_error: str | None,
|
||||
*,
|
||||
started_at: str | None = None,
|
||||
) -> SleepCycleProgress:
|
||||
"""Persist sleep-cycle progress; preserve `started_at` across saves.
|
||||
|
||||
`started_at` defaults to: prior progress's started_at if any,
|
||||
else `now()`. This gives the operator a wall-clock view of how
|
||||
long the cycle has been running across resumes.
|
||||
"""
|
||||
record = self._load_state_record()
|
||||
prior = record.get("sleep_cycle_progress") or {}
|
||||
progress: SleepCycleProgress = {
|
||||
"last_completed_step": last_completed_step,
|
||||
"attempt": attempt,
|
||||
"last_error": last_error,
|
||||
"started_at": (
|
||||
started_at
|
||||
if started_at is not None
|
||||
else prior.get("started_at", _utc_now_iso())
|
||||
),
|
||||
}
|
||||
record["sleep_cycle_progress"] = progress
|
||||
self._save_state_record(record)
|
||||
return progress
|
||||
|
||||
def _clear_progress(self) -> None:
|
||||
"""Wipe the sleep-cycle progress sub-record after full success."""
|
||||
record = self._load_state_record()
|
||||
record["sleep_cycle_progress"] = None
|
||||
self._save_state_record(record)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Step orchestrators (Task 1.2 — call existing functions)
|
||||
# ------------------------------------------------------------------
|
||||
#
|
||||
# Each `_step_*` returns True on natural completion and False when
|
||||
# `interrupt_check` fired between chunks. On exception, the step
|
||||
# body re-raises to the caller (run()) which handles 3-strike
|
||||
# quarantine + progress save. Step bodies are deliberately small:
|
||||
# they delegate to the migration-source functions listed in
|
||||
# the migration-source functions from the respective modules.
|
||||
|
||||
def _emit_step_started(self, step: SleepStep) -> None:
|
||||
"""Best-effort `sleep_step_started` emission to the event log.
|
||||
|
||||
Failure (e.g. /home full) MUST NOT abort the step — the work
|
||||
itself is the load-bearing path; observability is secondary.
|
||||
"""
|
||||
try:
|
||||
self._event_log.append({
|
||||
"event": "sleep_step_started",
|
||||
"step": step.name,
|
||||
"step_num": step.value,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _emit_step_completed(
|
||||
self, step: SleepStep, duration_sec: float, **payload: Any,
|
||||
) -> None:
|
||||
"""Best-effort `sleep_step_completed` emission with optional payload."""
|
||||
try:
|
||||
self._event_log.append({
|
||||
"event": "sleep_step_completed",
|
||||
"step": step.name,
|
||||
"step_num": step.value,
|
||||
"duration_sec": round(duration_sec, 3),
|
||||
**payload,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _check_interrupt(
|
||||
self,
|
||||
step: SleepStep,
|
||||
chunk_idx: int,
|
||||
interrupt_check: Callable[[], bool] | None,
|
||||
) -> bool:
|
||||
"""Return True iff the caller asked us to defer.
|
||||
|
||||
Persists `sleep_cycle_progress.last_completed_step = step.value-1`
|
||||
(we have NOT completed `step` yet) and stamps `last_error` with
|
||||
a structured deferral marker so `iai-mcp lifecycle status` can
|
||||
show "deferred at step N chunk K" rather than a fake error.
|
||||
"""
|
||||
if interrupt_check is None:
|
||||
return False
|
||||
try:
|
||||
should = bool(interrupt_check())
|
||||
except Exception:
|
||||
# If the caller's predicate is broken, do NOT defer (better
|
||||
# to keep working than to hang forever waiting for a True
|
||||
# that will never come). Same fail-safe discipline as the
|
||||
# event-log emit failures above.
|
||||
should = False
|
||||
if not should:
|
||||
return False
|
||||
# Save deferral marker. last_completed_step stays at the prior
|
||||
# step (we are mid-`step`); attempt counter is unchanged because
|
||||
# this is NOT a failure — it is a cooperative yield.
|
||||
prior = self._load_progress() or {}
|
||||
last_completed = step.value - 1
|
||||
attempt = int(prior.get("attempt", 0))
|
||||
self._save_progress(
|
||||
last_completed_step=last_completed,
|
||||
attempt=attempt,
|
||||
last_error=f"deferred:step={step.name}:chunk_idx={chunk_idx}",
|
||||
)
|
||||
return True
|
||||
|
||||
def _step_schema_mine(
|
||||
self, interrupt_check: Callable[[], bool] | None,
|
||||
) -> tuple[bool, dict[str, Any]]:
|
||||
"""Step 1: schema mining via existing tier-0 induction.
|
||||
|
||||
`induce_schemas_tier0(store)` is the migration source — it does
|
||||
a single MVCC pass over `records.tags_json` and returns
|
||||
candidates without persisting (Plan 02-03 contract). For Phase
|
||||
10.3 the chunk granularity is one (the underlying call is a
|
||||
single batch read internally; we do NOT slice it). The chunk
|
||||
boundary is honoured by checking `interrupt_check` BEFORE the
|
||||
call — if the operator wants to bail, we do, otherwise we run
|
||||
to completion.
|
||||
|
||||
Returns `(completed, payload)` — completed=False signals an
|
||||
interrupt-induced early return (no payload metadata).
|
||||
"""
|
||||
from iai_mcp.schema import induce_schemas_tier0
|
||||
|
||||
# Single-chunk implementation: chunk_idx=0 is the only checkpoint.
|
||||
if self._check_interrupt(SleepStep.SCHEMA_MINE, 0, interrupt_check):
|
||||
return False, {}
|
||||
candidates = induce_schemas_tier0(self._store)
|
||||
# Best-effort metric for the completion event; tier-0 returns a
|
||||
# list of `SchemaCandidate` dataclass instances, len() works.
|
||||
try:
|
||||
count = len(candidates) if candidates is not None else 0
|
||||
except Exception:
|
||||
count = 0
|
||||
return True, {"schemas_induced": count}
|
||||
|
||||
def _step_knob_tune(
|
||||
self, interrupt_check: Callable[[], bool] | None,
|
||||
) -> tuple[bool, dict[str, Any]]:
|
||||
"""Step 2: per-knob procedural snapshot.
|
||||
|
||||
implements this as a per-knob iteration over the
|
||||
sealed `PROFILE_KNOBS` registry. Each knob is one chunk (so the
|
||||
interrupt cadence matches the registry size — currently 11 per
|
||||
the 2026-04-30 audit). The actual Bayesian update is event-
|
||||
driven via `core.dispatch profile_update_from_signal` and
|
||||
already runs there; what sleep needs to do is take a snapshot
|
||||
of the live state so audit trails can replay it. We call
|
||||
`profile.default_state()` once outside the loop so a future
|
||||
phase that adds real per-knob work has a place to hook in
|
||||
WITHOUT re-architecting the chunk boundary.
|
||||
"""
|
||||
from iai_mcp.profile import PROFILE_KNOBS, default_state
|
||||
|
||||
knob_names = sorted(PROFILE_KNOBS.keys())
|
||||
# Capture current state once outside the loop — calling this
|
||||
# per knob would be wasteful and would still be a single-shot
|
||||
# snapshot. The loop's purpose is the chunk boundary (interrupt
|
||||
# check), not work amplification.
|
||||
snapshot = default_state()
|
||||
for chunk_idx, name in enumerate(knob_names):
|
||||
if self._check_interrupt(
|
||||
SleepStep.KNOB_TUNE, chunk_idx, interrupt_check,
|
||||
):
|
||||
return False, {}
|
||||
# Per-knob "work" — currently observation-only. A future
|
||||
# phase plugs Bayesian recomputation here. Touching
|
||||
# `snapshot[name]` is enough to surface a missing-knob bug
|
||||
# at sleep time rather than at retrieval time.
|
||||
_ = snapshot.get(name)
|
||||
return True, {"knobs_tuned": len(knob_names)}
|
||||
|
||||
def _step_dream_decay(
|
||||
self, interrupt_check: Callable[[], bool] | None,
|
||||
) -> tuple[bool, dict[str, Any]]:
|
||||
"""Step 3: Hebbian decay + edge prune via existing `_decay_edges`.
|
||||
|
||||
`sleep._decay_edges(store)` is the migration source — Plan
|
||||
03-01 CONN-05 D-TEM-04. It walks every hebbian/hebbian_structure
|
||||
edge and either decays the weight in place or prunes when
|
||||
below epsilon. The function is monolithic; for we
|
||||
wrap it as a single chunk (chunk_idx=0) and check
|
||||
`interrupt_check` before the call.
|
||||
"""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
|
||||
if self._check_interrupt(SleepStep.DREAM_DECAY, 0, interrupt_check):
|
||||
return False, {}
|
||||
result = _decay_edges(self._store)
|
||||
# Surface decay/prune counts in the completion event for ops.
|
||||
if isinstance(result, dict):
|
||||
return True, {
|
||||
"decayed": int(result.get("decayed", 0) or 0),
|
||||
"pruned": int(result.get("pruned", 0) or 0),
|
||||
}
|
||||
return True, {}
|
||||
|
||||
def _step_optimize_lance(
|
||||
self, interrupt_check: Callable[[], bool] | None,
|
||||
) -> tuple[bool, dict[str, Any]]:
|
||||
"""Step 4: per-table Lance optimize via existing helper.
|
||||
|
||||
`optimize_lance_storage(store, retention=None)` is the
|
||||
migration source (Phase 7.3 D7.3-09). It iterates the three
|
||||
daemon-owned tables (records / edges / events) internally; we
|
||||
cannot subdivide without reimplementing. For the
|
||||
chunk boundary is one (chunk_idx=0). The retention defaults to
|
||||
the configured 1-day window (matches periodic-audit cadence).
|
||||
"""
|
||||
from iai_mcp.maintenance import optimize_lance_storage
|
||||
|
||||
if self._check_interrupt(
|
||||
SleepStep.OPTIMIZE_LANCE, 0, interrupt_check,
|
||||
):
|
||||
return False, {}
|
||||
report = optimize_lance_storage(self._store)
|
||||
# Helper never raises (D7.3-09); per-table errors live inside
|
||||
# the report dict. We surface a compact summary in the event.
|
||||
tables_with_errors = [
|
||||
t for t, r in (report or {}).items()
|
||||
if isinstance(r, dict) and "error" in r
|
||||
]
|
||||
return True, {
|
||||
"tables_optimized": list((report or {}).keys()),
|
||||
"tables_with_errors": tables_with_errors,
|
||||
}
|
||||
|
||||
def _step_compact_records(
|
||||
self, interrupt_check: Callable[[], bool] | None,
|
||||
) -> tuple[bool, dict[str, Any]]:
|
||||
"""Step 5: final records.lance compaction with retention=0d.
|
||||
|
||||
Phase 07.14-01 helper: `optimize_lance_storage(store,
|
||||
retention=timedelta(days=0))` reclaims version manifests
|
||||
accumulated since the last compaction. This is intentionally
|
||||
a separate step from
|
||||
OPTIMIZE_LANCE because the retention policy differs: step 4
|
||||
keeps a 1-day point-in-time window for time-travel reads;
|
||||
step 5 takes the more aggressive zero-retention pass after
|
||||
the day-old data is no longer needed.
|
||||
"""
|
||||
from iai_mcp.maintenance import optimize_lance_storage
|
||||
|
||||
if self._check_interrupt(
|
||||
SleepStep.COMPACT_RECORDS, 0, interrupt_check,
|
||||
):
|
||||
return False, {}
|
||||
report = optimize_lance_storage(
|
||||
self._store, retention=timedelta(days=0),
|
||||
)
|
||||
tables_with_errors = [
|
||||
t for t, r in (report or {}).items()
|
||||
if isinstance(r, dict) and "error" in r
|
||||
]
|
||||
return True, {
|
||||
"tables_compacted": list((report or {}).keys()),
|
||||
"tables_with_errors": tables_with_errors,
|
||||
"retention_days": 0,
|
||||
}
|
||||
|
||||
# Lookup table from step -> bound method, in execution order.
|
||||
# Defined AFTER the step methods so attribute resolution succeeds.
|
||||
@property
|
||||
def _step_methods(
|
||||
self,
|
||||
) -> dict[
|
||||
SleepStep,
|
||||
Callable[
|
||||
[Callable[[], bool] | None],
|
||||
"tuple[bool, dict[str, Any]]",
|
||||
],
|
||||
]:
|
||||
return {
|
||||
SleepStep.SCHEMA_MINE: self._step_schema_mine,
|
||||
SleepStep.KNOB_TUNE: self._step_knob_tune,
|
||||
SleepStep.DREAM_DECAY: self._step_dream_decay,
|
||||
SleepStep.OPTIMIZE_LANCE: self._step_optimize_lance,
|
||||
SleepStep.COMPACT_RECORDS: self._step_compact_records,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public entry points
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Step ordering used by both run() and force_run(). Tuple is fixed so
|
||||
# neither path can accidentally execute steps out of order.
|
||||
_STEP_ORDER: tuple[SleepStep, ...] = (
|
||||
SleepStep.SCHEMA_MINE,
|
||||
SleepStep.KNOB_TUNE,
|
||||
SleepStep.DREAM_DECAY,
|
||||
SleepStep.OPTIMIZE_LANCE,
|
||||
SleepStep.COMPACT_RECORDS,
|
||||
)
|
||||
|
||||
# 3-strike threshold: the SAME step failing this many consecutive
|
||||
# times triggers 24h auto-quarantine. Per panel verdict R3 / proposal
|
||||
# v2 §2 L3.
|
||||
_QUARANTINE_STRIKE_THRESHOLD: int = 3
|
||||
|
||||
def run(
|
||||
self, interrupt_check: Callable[[], bool] | None = None,
|
||||
) -> SleepPipelineResult:
|
||||
"""Run the sleep pipeline (auto-quarantine respected).
|
||||
|
||||
Behaviour summary:
|
||||
|
||||
1. If `is_quarantined()`: return immediately with
|
||||
`quarantine_triggered=True` and `completed_steps=[]`. The
|
||||
caller is expected to surface this in CLI output / doctor row.
|
||||
|
||||
2. Auto-recovery: if `quarantine` exists but `until_ts` is in
|
||||
the past, clear it (logged as `quarantine_lifted`,
|
||||
reason=`auto_recovery_after_ttl`) and proceed.
|
||||
|
||||
3. Determine resume point from `_load_progress()`:
|
||||
- No progress record OR last_completed_step == 0 → start at
|
||||
SCHEMA_MINE (step 1).
|
||||
- last_completed_step == K (1 ≤ K < 5) → start at step K+1.
|
||||
- last_completed_step == 5 → fresh cycle (start at step 1);
|
||||
we treat a successful prior run that was never cleared as
|
||||
a fresh start, not a no-op.
|
||||
|
||||
4. For each step from `start` to COMPACT_RECORDS:
|
||||
- Emit `sleep_step_started`.
|
||||
- Call `_step_*(interrupt_check)`. The step body itself
|
||||
checks the interrupt between chunks and persists progress.
|
||||
- On interrupt (returned False): early-return with
|
||||
`interrupted=True`. progress is already saved by the
|
||||
step body; we do NOT touch it here.
|
||||
- On exception: save progress with attempt+1, log
|
||||
`sleep_step_completed` (with error payload), check 3-strike
|
||||
→ maybe quarantine, then return with `failed_step` set.
|
||||
- On success: emit `sleep_step_completed`, persist progress
|
||||
with last_completed_step=step.value (attempt reset to 0).
|
||||
|
||||
5. On full success: clear progress (sleep_cycle_progress=None).
|
||||
|
||||
Failure isolation: the helper functions used by step bodies
|
||||
already have their own "never-raise" disciplines where
|
||||
applicable (e.g. `optimize_lance_storage` per D7.3-09); this
|
||||
method's try/except is a defense-in-depth wrapper around the
|
||||
whole step call.
|
||||
"""
|
||||
return self._run_internal(
|
||||
interrupt_check, force=False,
|
||||
)
|
||||
|
||||
def force_run(
|
||||
self, interrupt_check: Callable[[], bool] | None = None,
|
||||
) -> SleepPipelineResult:
|
||||
"""Run even if quarantined. Used by `--force` CLI path.
|
||||
|
||||
Quarantine state is NOT cleared by force_run on its own — the
|
||||
operator-facing `--reset-quarantine` flag is what wipes the
|
||||
quarantine record. force_run merely bypasses the gate so a
|
||||
diagnostic / repair run can execute. If the run succeeds in
|
||||
full, the quarantine sub-record is left alone (operator may
|
||||
still want to investigate); subsequent natural `run()` calls
|
||||
will see `is_quarantined()` True until TTL expires or the
|
||||
operator runs `--reset-quarantine` explicitly.
|
||||
"""
|
||||
return self._run_internal(
|
||||
interrupt_check, force=True,
|
||||
)
|
||||
|
||||
def _run_internal(
|
||||
self,
|
||||
interrupt_check: Callable[[], bool] | None,
|
||||
*,
|
||||
force: bool,
|
||||
) -> SleepPipelineResult:
|
||||
"""Shared body for `run()` / `force_run()`. See `run()` docstring."""
|
||||
t0 = time.monotonic()
|
||||
completed_steps: list[SleepStep] = []
|
||||
|
||||
# Quarantine gate (skipped under force=True).
|
||||
if not force and self._check_and_maybe_auto_recover_quarantine():
|
||||
# is_quarantined returned True AND we are NOT in force mode.
|
||||
# Short-circuit: quarantined.
|
||||
return {
|
||||
"completed_steps": [],
|
||||
"failed_step": None,
|
||||
"error": None,
|
||||
"duration_sec": round(time.monotonic() - t0, 3),
|
||||
"quarantine_triggered": True,
|
||||
"interrupted": False,
|
||||
}
|
||||
|
||||
# Determine resume step from persisted progress.
|
||||
progress = self._load_progress()
|
||||
last_completed = (
|
||||
int(progress.get("last_completed_step", 0))
|
||||
if progress is not None
|
||||
else 0
|
||||
)
|
||||
# If last_completed >= 5, treat as fresh cycle (the prior cycle
|
||||
# finished but progress was never cleared — defensive). Otherwise
|
||||
# resume from last_completed + 1.
|
||||
if last_completed >= SleepStep.COMPACT_RECORDS.value:
|
||||
last_completed = 0
|
||||
resume_step_value = last_completed + 1
|
||||
|
||||
# Execute steps in order, skipping any with value < resume.
|
||||
for step in self._STEP_ORDER:
|
||||
if step.value < resume_step_value:
|
||||
continue
|
||||
|
||||
self._emit_step_started(step)
|
||||
step_t0 = time.monotonic()
|
||||
method = self._step_methods[step]
|
||||
try:
|
||||
done, payload = method(interrupt_check)
|
||||
except Exception as exc: # noqa: BLE001 -- 3-strike + quarantine flow
|
||||
err_str = str(exc)[:500]
|
||||
# Increment attempt counter for THIS step. If the prior
|
||||
# progress record's last_completed_step matches step-1,
|
||||
# we are failing the same step; attempt counter persists
|
||||
# and we add 1. If it differs (e.g. resumed from a
|
||||
# different step that just succeeded above), reset to 1.
|
||||
prior = self._load_progress() or {}
|
||||
prior_last = int(prior.get("last_completed_step", 0))
|
||||
if prior_last == step.value - 1:
|
||||
new_attempt = int(prior.get("attempt", 0)) + 1
|
||||
else:
|
||||
new_attempt = 1
|
||||
self._save_progress(
|
||||
last_completed_step=step.value - 1,
|
||||
attempt=new_attempt,
|
||||
last_error=err_str,
|
||||
)
|
||||
# Log completion event with error info for ops trail.
|
||||
self._emit_step_completed(
|
||||
step,
|
||||
duration_sec=time.monotonic() - step_t0,
|
||||
error=err_str,
|
||||
attempt=new_attempt,
|
||||
)
|
||||
quarantine_triggered = False
|
||||
if new_attempt >= self._QUARANTINE_STRIKE_THRESHOLD:
|
||||
self._set_quarantine(
|
||||
reason=(
|
||||
f"sleep step {step.value} ({step.name}) "
|
||||
f"failed {new_attempt}x"
|
||||
),
|
||||
)
|
||||
quarantine_triggered = True
|
||||
return {
|
||||
"completed_steps": completed_steps,
|
||||
"failed_step": step,
|
||||
"error": err_str,
|
||||
"duration_sec": round(time.monotonic() - t0, 3),
|
||||
"quarantine_triggered": quarantine_triggered,
|
||||
"interrupted": False,
|
||||
}
|
||||
|
||||
if not done:
|
||||
# Bounded-deferral early return. The step body already
|
||||
# persisted the deferral marker via `_check_interrupt`.
|
||||
return {
|
||||
"completed_steps": completed_steps,
|
||||
"failed_step": None,
|
||||
"error": None,
|
||||
"duration_sec": round(time.monotonic() - t0, 3),
|
||||
"quarantine_triggered": False,
|
||||
"interrupted": True,
|
||||
}
|
||||
|
||||
# Step succeeded. Persist progress with attempt=0 (clean
|
||||
# slate for the NEXT step's strike counter; if the next step
|
||||
# fails, prior_last will equal step.value, so the failure
|
||||
# branch above will correctly start its own counter at 1).
|
||||
self._save_progress(
|
||||
last_completed_step=step.value,
|
||||
attempt=0,
|
||||
last_error=None,
|
||||
)
|
||||
self._emit_step_completed(
|
||||
step,
|
||||
duration_sec=time.monotonic() - step_t0,
|
||||
**payload,
|
||||
)
|
||||
completed_steps.append(step)
|
||||
|
||||
# All steps from `resume` to COMPACT_RECORDS completed cleanly.
|
||||
# Clear progress so the next invocation starts fresh.
|
||||
self._clear_progress()
|
||||
return {
|
||||
"completed_steps": completed_steps,
|
||||
"failed_step": None,
|
||||
"error": None,
|
||||
"duration_sec": round(time.monotonic() - t0, 3),
|
||||
"quarantine_triggered": False,
|
||||
"interrupted": False,
|
||||
}
|
||||
|
||||
def _check_and_maybe_auto_recover_quarantine(self) -> bool:
|
||||
"""Return True iff the pipeline should short-circuit due to quarantine.
|
||||
|
||||
Side effect: when a quarantine record exists but `until_ts` is
|
||||
in the past, this clears the quarantine via `_clear_quarantine`
|
||||
with reason=`auto_recovery_after_ttl` and returns False
|
||||
(caller proceeds to run the cycle). Otherwise:
|
||||
- No quarantine → False.
|
||||
- Quarantine still active (`now < until_ts`) → True.
|
||||
"""
|
||||
quarantine = self._load_quarantine()
|
||||
if quarantine is None:
|
||||
return False
|
||||
try:
|
||||
until = datetime.fromisoformat(quarantine["until_ts"])
|
||||
except (TypeError, ValueError):
|
||||
# Malformed; clear and proceed (don't lock the user out).
|
||||
self._clear_quarantine(reason="auto_recovery_malformed_ts")
|
||||
return False
|
||||
if until.tzinfo is None:
|
||||
until = until.replace(tzinfo=timezone.utc)
|
||||
if _utc_now() >= until:
|
||||
self._clear_quarantine(reason="auto_recovery_after_ttl")
|
||||
return False
|
||||
return True
|
||||
389
src/iai_mcp/socket_server.py
Normal file
389
src/iai_mcp/socket_server.py
Normal file
|
|
@ -0,0 +1,389 @@
|
|||
"""Phase 7 daemon socket-server (R1, R3, R4, R6).
|
||||
|
||||
NDJSON JSON-RPC 2.0 server over ~/.iai-mcp/.daemon.sock. Reuses
|
||||
core.dispatch() with stdio (R6 -- both transports share one function per D7-08).
|
||||
|
||||
Constitutional guards:
|
||||
- C-DISPATCHER-FSM-ISOLATION (NEW per D7-16, formerly SPEC R7 'C2'): socket
|
||||
dispatcher MUST NOT transition daemon FSM directly; it calls core.dispatch
|
||||
which returns a dict. FSM transitions remain owned by daemon.py FSM tick.
|
||||
- C1 HUMAN-FIRST: in-process cooperative yield via last_activity_ts and
|
||||
active_connections probes; daemon.py REM scheduler reads these between
|
||||
cycles (D7-09 revised wording -- see RESEARCH §2).
|
||||
- C3 ZERO API COST: imports stdlib + core.dispatch only; no SDK references.
|
||||
- C5 LITERAL PRESERVATION: zero record mutation paths; transport-only adapter.
|
||||
- R5 fail-loud surface: daemon-side raises become JSON-RPC error code -32001;
|
||||
wrapper-side socket-death surfaces as -32002 (see bridge.ts in Plan 07-04).
|
||||
- R6 backward-compat: imports core.dispatch; no transport branching.
|
||||
|
||||
D7-17 single-socket dispatcher fork: each accepted NDJSON line is parsed once,
|
||||
then routed by shape:
|
||||
- jsonrpc=='2.0' -> core.dispatch (Phase 7 MCP methods)
|
||||
- 'type' in CONTROL_MSG_TYPES (Phase 4 control plane) -> forward verbatim to
|
||||
concurrency._dispatch_socket_request (lock + state must be wired by Wave 3
|
||||
via SocketServer(store, lock=..., state=...); Wave 2 standalone tests do not
|
||||
exercise this branch -- the forks are independent).
|
||||
- else -> JSON-RPC ERR_INVALID_REQUEST.
|
||||
|
||||
D7.1-02 launchd socket activation: serve() forks on LISTEN_FDS env var. When
|
||||
launchd-managed (LISTEN_FDS=1, LISTEN_PID==os.getpid()), inherit pre-bound fd 3
|
||||
via the systemd-compatible inherited-fd protocol; SKIP cleanup_stale_socket,
|
||||
mkdir, chmod, and post-serve unlink (launchd owns the socket file). Otherwise
|
||||
binds the path manually (development, tests, non-Darwin). See _inherit_launchd_socket.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from iai_mcp.concurrency import SOCKET_PATH, cleanup_stale_socket
|
||||
from iai_mcp.core import UnknownMethodError
|
||||
|
||||
# JSON-RPC 2.0 server-error codes (jsonrpc.org/specification reserves
|
||||
# -32099..-32000 for "implementation-defined server-errors").
|
||||
ERR_DAEMON_INTERNAL = -32001 # internal dispatch failure
|
||||
ERR_INVALID_REQUEST = -32600 # malformed JSON-RPC envelope
|
||||
ERR_METHOD_NOT_FOUND = -32601 # core.dispatch raised UnknownMethodError
|
||||
ERR_INVALID_PARAMS = -32602 # core.dispatch raised TypeError or KeyError on params
|
||||
ERR_PARSE_ERROR = -32700 # json.loads failed
|
||||
|
||||
# Plan 10.6-01 Task 1.4: REMOVED `IDLE_CHECK_INTERVAL_SECS`
|
||||
# and the socket-side `idle_watcher` task. The lifecycle state machine
|
||||
# (heartbeat scanner + idle detector + sleep_pipeline + Hibernation
|
||||
# transition) now owns the "idle daemon -> shut down" responsibility.
|
||||
# `IDLE_SECS_DEFAULT` and `idle_secs` are kept on the SocketServer
|
||||
# constructor for backward compat with existing tests, but no
|
||||
# in-process loop consumes them anymore.
|
||||
IDLE_SECS_DEFAULT = 1800 # 30 minutes per SPEC R4 (kept for compat)
|
||||
|
||||
|
||||
def _inherit_launchd_socket() -> socket.socket | None:
|
||||
"""Return inherited unix socket from launchd, or None for manual run.
|
||||
|
||||
Implements the systemd-style inherited-fd protocol (also honored by
|
||||
macOS launchd) per D7.1-02:
|
||||
- LISTEN_FDS env var = number of inherited fds (must be >= 1).
|
||||
- LISTEN_PID env var = pid of process meant to inherit (must == os.getpid()).
|
||||
- First inherited fd is 3 (SD_LISTEN_FDS_START).
|
||||
|
||||
Returns None on ANY mismatch / parse-failure / env-absent so caller can
|
||||
fall back to the manual bind path. Defensive against:
|
||||
- env vars absent (manual `python -m iai_mcp.daemon` from terminal)
|
||||
- LISTEN_PID inherited from a parent but not meant for us
|
||||
- LISTEN_FDS=0 (launchd would never set this, but be safe)
|
||||
- non-integer values (raise-free; return None)
|
||||
"""
|
||||
listen_fds = os.environ.get("LISTEN_FDS")
|
||||
listen_pid = os.environ.get("LISTEN_PID")
|
||||
if listen_fds is None or listen_pid is None:
|
||||
return None
|
||||
try:
|
||||
if int(listen_pid) != os.getpid():
|
||||
return None
|
||||
if int(listen_fds) < 1:
|
||||
return None
|
||||
except ValueError:
|
||||
return None
|
||||
inherited_fd = 3 # SD_LISTEN_FDS_START
|
||||
sock = socket.socket(fileno=inherited_fd)
|
||||
sock.setblocking(False)
|
||||
return sock
|
||||
|
||||
|
||||
def _validate_jsonrpc_envelope(req: Any) -> tuple[bool, str | None]:
|
||||
"""D7-01 schema check: jsonrpc=='2.0', id present and non-null, method is string."""
|
||||
if not isinstance(req, dict):
|
||||
return False, "request must be a JSON object"
|
||||
if req.get("jsonrpc") != "2.0":
|
||||
return False, "jsonrpc must be '2.0'"
|
||||
if "id" not in req or req["id"] is None:
|
||||
return False, "id required and non-null"
|
||||
if not isinstance(req.get("method"), str):
|
||||
return False, "method must be a string"
|
||||
if "params" in req and not isinstance(req["params"], (dict, list)):
|
||||
return False, "params must be object or array"
|
||||
return True, None
|
||||
|
||||
|
||||
class SocketServer:
|
||||
"""Per-connection multiplexed JSON-RPC 2.0 server over unix socket.
|
||||
|
||||
D7-17 single-socket dispatcher: same accept loop handles both Phase 4
|
||||
control messages (forwarded to concurrency._dispatch_socket_request when
|
||||
lock + state are wired) and JSON-RPC MCP envelopes (routed via
|
||||
core.dispatch on a worker thread per R3).
|
||||
|
||||
Constructor args:
|
||||
store: shared MemoryStore (singleton in daemon.main(); fresh in tests).
|
||||
idle_secs: idle-shutdown threshold; falls back to env override then
|
||||
IDLE_SECS_DEFAULT when None.
|
||||
lock: ProcessLock for the control-plane fork (Wave 3 wires; Wave 2
|
||||
standalone path leaves None and the control branch returns a
|
||||
structured "control_plane_unwired" error if exercised).
|
||||
state: shared state dict for the control-plane fork (same wiring rule).
|
||||
"""
|
||||
|
||||
# control-message types (the existing 7) -- used by D7-17 dispatcher fork.
|
||||
# Source of truth: concurrency.py:_dispatch_socket_request branches.
|
||||
CONTROL_MSG_TYPES = frozenset({
|
||||
"status", "user_initiated_sleep", "force_wake", "force_rem",
|
||||
"pause", "resume", "session_open",
|
||||
})
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
store: Any,
|
||||
idle_secs: int | None = None,
|
||||
*,
|
||||
lock: Any | None = None,
|
||||
state: dict | None = None,
|
||||
) -> None:
|
||||
self.store = store
|
||||
# Plan 10.6-01 Task 1.4: env override
|
||||
# `IAI_DAEMON_IDLE_SHUTDOWN_SECS` removed; the constructor
|
||||
# default falls through to IDLE_SECS_DEFAULT (1800). The
|
||||
# attribute is kept for back-compat with telemetry / tests
|
||||
# but no in-process loop reads it anymore.
|
||||
if idle_secs is None:
|
||||
idle_secs = IDLE_SECS_DEFAULT
|
||||
self.idle_secs = idle_secs
|
||||
self.last_activity_ts: float = time.monotonic()
|
||||
self.active_connections: int = 0
|
||||
# asyncio.Event lazy-binds to the running loop on first wait/set, so it
|
||||
# is safe to construct here even before the loop starts (Python 3.10+).
|
||||
self.shutdown_event: asyncio.Event = asyncio.Event()
|
||||
# D7-17: control-plane fork wiring (Wave 3 supplies these).
|
||||
self._lock = lock
|
||||
self._state = state
|
||||
|
||||
async def handle(
|
||||
self,
|
||||
reader: asyncio.StreamReader,
|
||||
writer: asyncio.StreamWriter,
|
||||
) -> None:
|
||||
"""One coroutine per accepted connection. Reads NDJSON lines, dispatches each.
|
||||
|
||||
D7-17 fork on each line:
|
||||
- jsonrpc=='2.0' -> core.dispatch (Phase 7 MCP, R1)
|
||||
- 'type' in CONTROL_MSG_TYPES and no jsonrpc -> control plane
|
||||
- else -> JSON-RPC ERR_INVALID_REQUEST.
|
||||
"""
|
||||
self.active_connections += 1
|
||||
try:
|
||||
while not reader.at_eof():
|
||||
line = await reader.readline()
|
||||
if not line:
|
||||
break
|
||||
self.last_activity_ts = time.monotonic() # D7-05
|
||||
req_id: Any = None
|
||||
try:
|
||||
req = json.loads(line)
|
||||
except json.JSONDecodeError as e:
|
||||
resp = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": None,
|
||||
"error": {"code": ERR_PARSE_ERROR, "message": str(e)},
|
||||
}
|
||||
writer.write((json.dumps(resp) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
continue
|
||||
|
||||
# D7-17 fork branch 1: control message (no jsonrpc field).
|
||||
if (
|
||||
isinstance(req, dict)
|
||||
and req.get("type") in self.CONTROL_MSG_TYPES
|
||||
and "jsonrpc" not in req
|
||||
):
|
||||
if self._lock is None or self._state is None:
|
||||
# Wave 2 standalone path: control plane needs daemon
|
||||
# context (Wave 3 wires it via daemon.main()).
|
||||
result = {
|
||||
"ok": False,
|
||||
"reason": "control_plane_unwired",
|
||||
"error": (
|
||||
"SocketServer constructed without lock/state; "
|
||||
"control-plane fork unavailable in this context"
|
||||
),
|
||||
}
|
||||
else:
|
||||
try:
|
||||
# Lazy local import; signature/behavior owned by
|
||||
# (UNCHANGED): (req, store, lock, state).
|
||||
from iai_mcp.concurrency import _dispatch_socket_request
|
||||
result = await _dispatch_socket_request(
|
||||
req, self.store, self._lock, self._state,
|
||||
)
|
||||
except Exception as e: # noqa: BLE001
|
||||
# Control-plane errors must not crash the daemon.
|
||||
# Return structured error (mirrors shape).
|
||||
result = {"ok": False, "reason": "control_plane_error",
|
||||
"error": str(e)[:200]}
|
||||
if result is not None:
|
||||
writer.write((json.dumps(result) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
continue
|
||||
|
||||
# D7-17 fork branch 2: JSON-RPC 2.0 envelope.
|
||||
ok, err = _validate_jsonrpc_envelope(req)
|
||||
req_id = req.get("id") if isinstance(req, dict) else None
|
||||
if not ok:
|
||||
resp = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"error": {"code": ERR_INVALID_REQUEST, "message": err},
|
||||
}
|
||||
writer.write((json.dumps(resp) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
continue
|
||||
method = req["method"]
|
||||
params = req.get("params") or {}
|
||||
try:
|
||||
# Lazy local import keeps daemon startup snappy and dodges
|
||||
# circular-import edge cases during async test fixture setup
|
||||
# (mirrors concurrency.py:251-256 lazy-import pattern).
|
||||
from iai_mcp.core import dispatch
|
||||
# CRITICAL R3: dispatch is sync + can take 50-500 ms.
|
||||
# asyncio.to_thread prevents head-of-line blocking across
|
||||
# connections. The threading.RLock added in Plan 07-01
|
||||
# (_profile_lock in core.py) keeps profile mutations safe
|
||||
# under concurrent worker-thread access.
|
||||
result = await asyncio.to_thread(
|
||||
dispatch, self.store, method, params,
|
||||
)
|
||||
resp = {"jsonrpc": "2.0", "id": req_id, "result": result}
|
||||
except UnknownMethodError as e:
|
||||
# V3-03 fix: unknown method now raises (was: in-band {error:...} dict).
|
||||
# e.args[0] is the unknown method name (per core.UnknownMethodError contract).
|
||||
resp = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"error": {
|
||||
"code": ERR_METHOD_NOT_FOUND,
|
||||
"message": f"unknown method '{e.args[0]}'",
|
||||
},
|
||||
}
|
||||
except KeyError as e:
|
||||
# V3-04 fix: KeyError from missing required params (e.g. params["cue"]).
|
||||
# Was incorrectly mapped to -32601; correct code is -32602 INVALID_PARAMS.
|
||||
# e.args[0] is the missing key name.
|
||||
resp = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"error": {
|
||||
"code": ERR_INVALID_PARAMS,
|
||||
"message": f"missing required param: {e.args[0]!r}",
|
||||
},
|
||||
}
|
||||
except TypeError as e:
|
||||
resp = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"error": {"code": ERR_INVALID_PARAMS, "message": str(e)},
|
||||
}
|
||||
except Exception as e: # noqa: BLE001 -- socket must never crash daemon
|
||||
resp = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": req_id,
|
||||
"error": {"code": ERR_DAEMON_INTERNAL, "message": str(e)},
|
||||
}
|
||||
writer.write((json.dumps(resp) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
except (ConnectionResetError, BrokenPipeError, ConnectionAbortedError):
|
||||
# Client closed the socket mid-write (common when the MCP wrapper
|
||||
# in Claude Code exits or the host kills its pipe). Expected
|
||||
# behavior — not a daemon fault. Falls through to finally cleanup
|
||||
# without the asyncio "Unhandled exception in client_connected_cb"
|
||||
# noise that previously flooded launchd-stderr.log.
|
||||
pass
|
||||
finally:
|
||||
self.active_connections -= 1
|
||||
try:
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Plan 10.6-01 Task 1.4: REMOVED `idle_watcher`. The
|
||||
# lifecycle state machine + heartbeat scanner + idle detector
|
||||
# supersede this in-process timer. `last_activity_ts` /
|
||||
# `active_connections` accounting on this object is preserved (used
|
||||
# by tests + future observability) but no internal loop consumes
|
||||
# them.
|
||||
|
||||
async def serve(self, socket_path: Path | None = None) -> None:
|
||||
"""Bind socket, run server until shutdown_event set, drain in-flight, unlink socket.
|
||||
|
||||
D7.1-02 fork: when launchd has pre-bound the listener (LISTEN_FDS env set
|
||||
and LISTEN_PID==os.getpid()), inherit fd 3 and call asyncio.start_unix_server
|
||||
with sock=. SKIP cleanup_stale_socket, mkdir, chmod, post-serve unlink, and
|
||||
the cleanup_socket=True kwarg -- launchd owns the socket file's lifecycle
|
||||
(SockPathMode=384 already applied at bind time per D7.1-01). Otherwise
|
||||
(development, tests, non-Darwin) preserve the original manual-bind
|
||||
path: cleanup_stale -> mkdir -> bind -> chmod, with post-serve unlink on
|
||||
Python < 3.13.
|
||||
"""
|
||||
if socket_path is None:
|
||||
# Honor IAI_DAEMON_SOCKET_PATH env override per D7-14 test-isolation pattern.
|
||||
env_path = os.environ.get("IAI_DAEMON_SOCKET_PATH")
|
||||
socket_path = Path(env_path) if env_path else SOCKET_PATH
|
||||
|
||||
# Detect Python 3.13+ cleanup_socket kwarg (mirror the same probe used
|
||||
# in concurrency.py to keep behavior identical between the two servers).
|
||||
sig = inspect.signature(asyncio.start_unix_server)
|
||||
supports_cleanup_socket = "cleanup_socket" in sig.parameters
|
||||
|
||||
inherited = _inherit_launchd_socket()
|
||||
if inherited is not None:
|
||||
# D7.1-02 launchd socket activation. launchd owns the socket file:
|
||||
# do NOT cleanup_stale_socket (would unlink launchd's listener and
|
||||
# brick subsequent activations), do NOT mkdir (path already exists
|
||||
# since launchd bound it), do NOT chmod (SockPathMode=384 applied
|
||||
# at bind), do NOT pass cleanup_socket=True (asyncio would unlink
|
||||
# on close), do NOT post-serve unlink. launchd manages the file.
|
||||
server = await asyncio.start_unix_server(
|
||||
self.handle,
|
||||
sock=inherited,
|
||||
)
|
||||
else:
|
||||
# Manual-run fallback (development, tests, non-Darwin) -- unchanged
|
||||
# from except enclosed in the else branch.
|
||||
cleanup_stale_socket(socket_path)
|
||||
socket_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
server_kwargs: dict[str, Any] = (
|
||||
{"cleanup_socket": True} if supports_cleanup_socket else {}
|
||||
)
|
||||
server = await asyncio.start_unix_server(
|
||||
self.handle,
|
||||
path=str(socket_path),
|
||||
**server_kwargs,
|
||||
)
|
||||
# T-04-07 mitigation (Phase 4 threat model): chmod 0o600 immediately after bind.
|
||||
try:
|
||||
os.chmod(str(socket_path), 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Plan 10.6-01 Task 1.4: idle_task removed (was
|
||||
# `asyncio.create_task(self.idle_watcher())`). The lifecycle
|
||||
# state machine drives shutdown via Hibernation transitions.
|
||||
try:
|
||||
async with server:
|
||||
await self.shutdown_event.wait()
|
||||
# Graceful shutdown: stop accepting new connections, drain in-flight.
|
||||
server.close()
|
||||
await server.wait_closed()
|
||||
finally:
|
||||
# Manual unlink fallback ONLY for the manual-bind branch on
|
||||
# Python <3.13. Under launchd, NEVER unlink -- launchd owns the file.
|
||||
if inherited is None and not supports_cleanup_socket:
|
||||
try:
|
||||
socket_path.unlink()
|
||||
except (FileNotFoundError, OSError):
|
||||
pass
|
||||
1598
src/iai_mcp/store.py
Normal file
1598
src/iai_mcp/store.py
Normal file
File diff suppressed because it is too large
Load diff
241
src/iai_mcp/tem.py
Normal file
241
src/iai_mcp/tem.py
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
"""Plan 03-01 CONN-05: TEM factorization (Whittington-Behrens 2020 Cell 183:1249-1263).
|
||||
|
||||
Tolman-Eichenbaum Machine factorization of *structure* and *content* into
|
||||
binary BSC hypervectors at D=10000 (TorchHD semantics, packed to 1250 bytes).
|
||||
Structure is bound with content via tensor product (binary XOR in BSC), and
|
||||
multiple role-filler pairs are bundled via per-bit majority vote so a single
|
||||
1250-byte hypervector carries 15-20 simultaneously-recoverable structural
|
||||
attributes per record (D-TEM-02: unbind fidelity >= 0.95 at 15 pairs).
|
||||
|
||||
Constitutional fit:
|
||||
- CONN-05 = TEM factorization. Structural queries are FIRST-CLASS peers of
|
||||
cosine queries in the retrieval pipeline. NOT a "VSA retrieval layer over
|
||||
cosine" -- structural and content signals merge in the ranker as siblings.
|
||||
- D-TEM-01: BSC binary (NOT FHRR), D=10000.
|
||||
- D-TEM-02: 15-20 role-filler pairs target; >= 95% unbind fidelity.
|
||||
- D-TEM-03: tensor-product binding (XOR self-inverse in the binary case).
|
||||
- D-TEM-04: Hebbian LTP on structure edges mirrors content-edge behavior
|
||||
(autopoiesis applied to structure).
|
||||
- bind/unbind is lossless wrt the codebook -- decode by nearest-neighbor
|
||||
Hamming-distance against known fillers.
|
||||
|
||||
Implementation note (vs TorchHD direct usage): we operate on packed `bytes`
|
||||
because (a) LanceDB's pa.binary() column type is the storage contract; (b)
|
||||
1250 bytes per record is much cheaper than torch tensor materialisation on
|
||||
every read; (c) bytewise XOR + np.unpackbits-based majority is faster than
|
||||
the torch round-trip at our N. TorchHD BSC semantics are preserved bit-for-bit.
|
||||
|
||||
Public API:
|
||||
- ROLE_VOCABULARY: 18 fixed role symbols (D-TEM Claude's Discretion).
|
||||
- role_hv(role): deterministic D=10000 binary codebook vector for a role symbol.
|
||||
- filler_hv(value): deterministic hash-to-D=10000 of a string filler.
|
||||
- bind(a, b) / unbind(bound, key): bytewise XOR (BSC binding self-inverse).
|
||||
- pack_pairs(pairs): per-bit majority bundle of bound role-filler pairs.
|
||||
- unpack_role(hv, role): unbind by role key; caller compares to filler codebook.
|
||||
- bind_structure(record): derive role-filler pairs from MemoryRecord fields,
|
||||
return packed hypervector (1250 bytes).
|
||||
- decay_structure_edge(stability, difficulty, dt_days): FSRS decay identical
|
||||
to the content-edge formula (sleep.py: weight *= 0.9 ** (days - 90)).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import numpy as np
|
||||
|
||||
from iai_mcp.types import STRUCTURE_HV_BYTES, STRUCTURE_HV_DIM
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
# D-TEM Claude's Discretion: 18 fixed role symbols. ORDER IS PART
|
||||
# OF THE CONTRACT -- changing it breaks bind_structure's deterministic codebook.
|
||||
ROLE_VOCABULARY: tuple[str, ...] = (
|
||||
"WHEN",
|
||||
"WHERE",
|
||||
"ROLE",
|
||||
"PROJECT",
|
||||
"COMMUNITY_ID",
|
||||
"TEMPORAL_POSITION",
|
||||
"ACTOR",
|
||||
"OBJECT",
|
||||
"INTENT",
|
||||
"MODALITY",
|
||||
"LANG",
|
||||
"SESSION_ID",
|
||||
"TIER",
|
||||
"VALENCE",
|
||||
"CERTAINTY",
|
||||
"SOURCE",
|
||||
"TOPIC",
|
||||
"PARENT_ID",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- primitives
|
||||
|
||||
|
||||
def _seed_from_str(prefix: str, value: str) -> int:
|
||||
"""Stable per-string 64-bit seed (sha256 prefix). hash() is randomised
|
||||
per-process by default, so we use a deterministic digest instead."""
|
||||
digest = hashlib.sha256(f"{prefix}:{value}".encode("utf-8")).digest()
|
||||
return int.from_bytes(digest[:8], "big", signed=False)
|
||||
|
||||
|
||||
def _hv_from_seed(seed: int) -> bytes:
|
||||
"""Generate a D=10000 binary hypervector packed to STRUCTURE_HV_BYTES."""
|
||||
rng = np.random.default_rng(seed)
|
||||
bits = rng.integers(0, 2, size=STRUCTURE_HV_DIM, dtype=np.uint8)
|
||||
return np.packbits(bits).tobytes()
|
||||
|
||||
|
||||
# Precompute the 18-role codebook at import time. Same role -> same bytes
|
||||
# across processes thanks to the deterministic sha256-prefixed seed.
|
||||
_ROLE_HV_TABLE: dict[str, bytes] = {
|
||||
role: _hv_from_seed(_seed_from_str("tem-role-v1", role))
|
||||
for role in ROLE_VOCABULARY
|
||||
}
|
||||
|
||||
|
||||
def role_hv(role: str) -> bytes:
|
||||
"""Deterministic D=10000 binary codebook vector for a role symbol.
|
||||
|
||||
Uses the precomputed _ROLE_HV_TABLE for the 18 known roles; falls back
|
||||
to a fresh deterministic generation for any other role (still seeded
|
||||
on the role string, so callers can extend the vocabulary at their own
|
||||
risk -- ROLE_VOCABULARY is the canonical contract).
|
||||
"""
|
||||
cached = _ROLE_HV_TABLE.get(role)
|
||||
if cached is not None:
|
||||
return cached
|
||||
return _hv_from_seed(_seed_from_str("tem-role-v1", role))
|
||||
|
||||
|
||||
def filler_hv(value: str) -> bytes:
|
||||
"""Deterministic hash-to-D=10000 of a string filler."""
|
||||
return _hv_from_seed(_seed_from_str("tem-filler-v1", value))
|
||||
|
||||
|
||||
def bind(a: bytes, b: bytes) -> bytes:
|
||||
"""BSC tensor-product binding: bytewise XOR. Self-inverse semantics."""
|
||||
if len(a) != len(b):
|
||||
raise ValueError(
|
||||
f"bind requires equal-length hypervectors, got {len(a)} and {len(b)}"
|
||||
)
|
||||
aa = np.frombuffer(a, dtype=np.uint8)
|
||||
bb = np.frombuffer(b, dtype=np.uint8)
|
||||
return np.bitwise_xor(aa, bb).tobytes()
|
||||
|
||||
|
||||
def unbind(bound: bytes, key: bytes) -> bytes:
|
||||
"""XOR inverse of bind. Identical to bind() because XOR is self-inverse."""
|
||||
return bind(bound, key)
|
||||
|
||||
|
||||
def pack_pairs(pairs: list[tuple[str, bytes]]) -> bytes:
|
||||
"""Bundle bound role-filler pairs via per-bit majority vote.
|
||||
|
||||
Deterministic tiebreak: bit=1 on even ties (`sums * 2 >= n`). This means
|
||||
a single (role, filler) pair recovers the filler exactly under unbind.
|
||||
"""
|
||||
if not pairs:
|
||||
return bytes(STRUCTURE_HV_BYTES) # empty bundle is the zero hv
|
||||
bound = []
|
||||
for role, filler in pairs:
|
||||
bound.append(np.frombuffer(bind(role_hv(role), filler), dtype=np.uint8))
|
||||
# Stack as (N, 1250) uint8, unpack to (N, 10000) bits, vote per column.
|
||||
stacked_bytes = np.stack(bound) # shape (N, 1250)
|
||||
bits = np.unpackbits(stacked_bytes, axis=1).astype(np.int32) # (N, 10000)
|
||||
sums = bits.sum(axis=0)
|
||||
n = len(pairs)
|
||||
# majority: bit=1 when more than half of inputs are 1; ties -> 1 (`>=`).
|
||||
voted = (sums * 2 >= n).astype(np.uint8)
|
||||
return np.packbits(voted).tobytes()
|
||||
|
||||
|
||||
def unpack_role(hv: bytes, role: str) -> bytes:
|
||||
"""Unbind hv by role's hypervector. Returns a noisy filler hv; caller
|
||||
nearest-neighbour decodes against a known filler codebook."""
|
||||
return unbind(hv, role_hv(role))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- structure
|
||||
|
||||
|
||||
def _bucket_datetime(dt) -> str:
|
||||
"""Coarse temporal bucket for the WHEN role-filler -- ISO YYYY-MM-DD."""
|
||||
try:
|
||||
return dt.date().isoformat()
|
||||
except Exception:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def bind_structure(record: "MemoryRecord") -> bytes:
|
||||
"""Derive 15+ role-filler pairs from a MemoryRecord and pack to bytes.
|
||||
|
||||
Deterministic per (record fields, structural identity). NOT a hash of the
|
||||
full record content -- only the structural attributes (tier, language,
|
||||
community, temporal bucket, schema_version, pinned, detail_level, leading
|
||||
tags, parent provenance). literal_surface is intentionally excluded (it
|
||||
is content, not structure -- D-TEM-03 keeps the two factorised).
|
||||
"""
|
||||
pairs: list[tuple[str, bytes]] = []
|
||||
|
||||
# Constitutional 6 (D-TEM):
|
||||
pairs.append(("WHEN", filler_hv(_bucket_datetime(record.created_at))))
|
||||
pairs.append(("WHERE", filler_hv(record.tier))) # tier doubles as locale
|
||||
pairs.append(("ROLE", filler_hv(record.tier)))
|
||||
pairs.append(("PROJECT", filler_hv("iai-mcp")))
|
||||
pairs.append(("COMMUNITY_ID", filler_hv(str(record.community_id) if record.community_id else "none")))
|
||||
pairs.append(("TEMPORAL_POSITION", filler_hv(_bucket_datetime(record.created_at))))
|
||||
|
||||
# Schema-side fillers (deterministic, queryable):
|
||||
pairs.append(("LANG", filler_hv(record.language or "en")))
|
||||
pairs.append(("TIER", filler_hv(record.tier)))
|
||||
pairs.append(("MODALITY", filler_hv("text")))
|
||||
pairs.append(("INTENT", filler_hv("episodic" if record.tier == "episodic" else "semantic")))
|
||||
pairs.append(("ACTOR", filler_hv("user")))
|
||||
pairs.append(("OBJECT", filler_hv(str(record.id))))
|
||||
pairs.append(("VALENCE", filler_hv("neutral")))
|
||||
pairs.append(("CERTAINTY", filler_hv(f"trust_{round(record.s5_trust_score, 1)}")))
|
||||
pairs.append(("SOURCE", filler_hv("pinned" if record.pinned else "drift")))
|
||||
|
||||
# Content-adjacent fillers (still structural):
|
||||
leading_tag = (record.tags[0] if record.tags else "untagged")
|
||||
pairs.append(("TOPIC", filler_hv(str(leading_tag))))
|
||||
|
||||
# Provenance hop -- session_id from latest provenance entry if any.
|
||||
sid = "no-session"
|
||||
if record.provenance:
|
||||
try:
|
||||
sid = str(record.provenance[-1].get("session_id") or "no-session")
|
||||
except Exception:
|
||||
sid = "no-session"
|
||||
pairs.append(("SESSION_ID", filler_hv(sid)))
|
||||
pairs.append(("PARENT_ID", filler_hv("root")))
|
||||
|
||||
return pack_pairs(pairs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- decay
|
||||
|
||||
|
||||
# FSRS decay on structure edges is IDENTICAL to record-edge decay.
|
||||
# Mirror sleep.py's _decay_edges constants verbatim instead of importing them
|
||||
# (cyclic-import safe; values are part of the constitutional contract).
|
||||
_DECAY_GRACE_DAYS: int = 90
|
||||
_DECAY_BASE: float = 0.9
|
||||
|
||||
|
||||
def decay_structure_edge(stability: float, difficulty: float, dt_days: float) -> float:
|
||||
"""FSRS decay multiplier for structure edges. Identical to content-edge
|
||||
formula (sleep.py:21-26 + _decay_edges body): no decay during grace
|
||||
window, then `weight *= 0.9 ** (days - 90)`. Returns the multiplier
|
||||
(1.0 = no decay; (0..1) decayed; <eps prune at caller).
|
||||
"""
|
||||
age_days = max(0.0, float(dt_days))
|
||||
if age_days <= _DECAY_GRACE_DAYS:
|
||||
return 1.0
|
||||
return _DECAY_BASE ** (age_days - _DECAY_GRACE_DAYS)
|
||||
306
src/iai_mcp/trajectory.py
Normal file
306
src/iai_mcp/trajectory.py
Normal file
|
|
@ -0,0 +1,306 @@
|
|||
"""Trajectory metrics M1..M6 (LEARN-07, D-32) -- Task 4.
|
||||
|
||||
Every session_exit writes one `trajectory_metric` event per metric. The CLI
|
||||
aggregator reads these events via aggregate_trajectory.
|
||||
|
||||
Metrics (all computed in session-local scope):
|
||||
- M1: clarifying questions per session (decreasing over time)
|
||||
- M2: retrieval precision@5 (growing)
|
||||
- M3: tokens per session (decreasing)
|
||||
- M4: profile-vector variance (decreasing -> converged by session ~30)
|
||||
- M5: curiosity question frequency (entropy dropping)
|
||||
- M6: context-repeat rate (> 90% by session ~20)
|
||||
|
||||
Plan 02-03 scope: event emission + basic aggregation. wires the
|
||||
CLI aggregator + synthetic-corpus benchmark.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
METRIC_NAMES: list[str] = ["m1", "m2", "m3", "m4", "m5", "m6"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- emit
|
||||
|
||||
|
||||
def record_session_metrics(
|
||||
store: MemoryStore,
|
||||
session_id: str,
|
||||
metrics: dict[str, float],
|
||||
) -> None:
|
||||
"""Emit one `trajectory_metric` event per valid metric key in `metrics`.
|
||||
|
||||
Keys outside METRIC_NAMES are ignored silently -- this is a public API;
|
||||
strict validation would force every test harness to chase whitespace in
|
||||
metric names.
|
||||
"""
|
||||
for m, v in metrics.items():
|
||||
if m not in METRIC_NAMES:
|
||||
continue
|
||||
try:
|
||||
value = float(v)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
write_event(
|
||||
store,
|
||||
kind="trajectory_metric",
|
||||
data={"metric": m, "value": value},
|
||||
severity="info",
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
|
||||
def aggregate_trajectory(
|
||||
store: MemoryStore,
|
||||
since: datetime | None = None,
|
||||
) -> dict[str, list[tuple[datetime, float]]]:
|
||||
"""CLI support: group all trajectory_metric events by metric.
|
||||
|
||||
Returns {"m1": [(ts, value), ...], ..., "m6": [...]}.
|
||||
"""
|
||||
events = query_events(
|
||||
store, kind="trajectory_metric", since=since, limit=10000,
|
||||
)
|
||||
out: dict[str, list[tuple[datetime, float]]] = {m: [] for m in METRIC_NAMES}
|
||||
for e in events:
|
||||
m = e["data"].get("metric")
|
||||
v = e["data"].get("value")
|
||||
if m in METRIC_NAMES and v is not None:
|
||||
try:
|
||||
out[m].append((e["ts"], float(v)))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
return out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- individual signals
|
||||
|
||||
|
||||
def compute_m1_clarifying_questions_per_session(
|
||||
store: MemoryStore,
|
||||
session_id: str,
|
||||
) -> float:
|
||||
"""M1: count of curiosity_question events for a session."""
|
||||
events = query_events(store, kind="curiosity_question", limit=1000)
|
||||
count = sum(1 for e in events if e.get("session_id") == session_id)
|
||||
return float(count)
|
||||
|
||||
|
||||
def compute_m3_token_budget(
|
||||
store: MemoryStore,
|
||||
session_id: str,
|
||||
) -> float:
|
||||
"""M3: mean of session_start_tokens events for this session."""
|
||||
events = query_events(store, kind="session_start_tokens", limit=100)
|
||||
session_events = [e for e in events if e.get("session_id") == session_id]
|
||||
if not session_events:
|
||||
return 0.0
|
||||
total = 0.0
|
||||
for e in session_events:
|
||||
try:
|
||||
total += float(e["data"].get("tokens", 0))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
return total / len(session_events)
|
||||
|
||||
|
||||
def compute_m5_curiosity_frequency(
|
||||
store: MemoryStore,
|
||||
session_id: str,
|
||||
) -> float:
|
||||
"""M5: sum of curiosity_silent_log + curiosity_question events per session."""
|
||||
silent = query_events(store, kind="curiosity_silent_log", limit=1000)
|
||||
questions = query_events(store, kind="curiosity_question", limit=1000)
|
||||
total = 0
|
||||
for ev_list in (silent, questions):
|
||||
total += sum(1 for e in ev_list if e.get("session_id") == session_id)
|
||||
return float(total)
|
||||
|
||||
|
||||
def compute_session_metrics_snapshot(
|
||||
store: MemoryStore,
|
||||
session_id: str,
|
||||
) -> dict[str, float]:
|
||||
"""Produce a partial snapshot of M1..M6 from the current event stream.
|
||||
|
||||
scope: M1/M3/M5 are computable from the event stream.
|
||||
promotion: M2/M4/M6 are now LIVE (read retrieval_used /
|
||||
profile_updated / session_started events emitted by retrieve.py /
|
||||
profile.py / session.py respectively).
|
||||
"""
|
||||
return {
|
||||
"m1": compute_m1_clarifying_questions_per_session(store, session_id),
|
||||
"m2": m2_precision_at_5_live(store),
|
||||
"m3": compute_m3_token_budget(store, session_id),
|
||||
"m4": m4_profile_variance_live(store),
|
||||
"m5": compute_m5_curiosity_frequency(store, session_id),
|
||||
"m6": m6_context_repeat_rate_live(store),
|
||||
}
|
||||
|
||||
|
||||
# -------------------------------------------------- M2/M4/M6 LIVE
|
||||
|
||||
|
||||
# Backward-compat synthetic constants (Phase 2 baseline; bench compares
|
||||
# live vs synthetic to prove the promotion is real -- see test_trajectory_live_smoke.py).
|
||||
M2_SYNTHETIC_CONSTANT: float = 0.0
|
||||
M4_SYNTHETIC_CONSTANT: float = 0.0
|
||||
M6_SYNTHETIC_CONSTANT: float = 0.0
|
||||
|
||||
|
||||
def m2_precision_at_5_synthetic() -> float:
|
||||
"""Pre-Plan-03-02 placeholder. Kept for trajectory bench comparison."""
|
||||
return M2_SYNTHETIC_CONSTANT
|
||||
|
||||
|
||||
def m4_profile_variance_synthetic() -> float:
|
||||
"""Pre-Plan-03-02 placeholder. Kept for trajectory bench comparison."""
|
||||
return M4_SYNTHETIC_CONSTANT
|
||||
|
||||
|
||||
def m6_context_repeat_rate_synthetic() -> float:
|
||||
"""Pre-Plan-03-02 placeholder. Kept for trajectory bench comparison."""
|
||||
return M6_SYNTHETIC_CONSTANT
|
||||
|
||||
|
||||
def m2_precision_at_5_live(
|
||||
store: MemoryStore,
|
||||
*,
|
||||
window: int = 100,
|
||||
) -> float:
|
||||
"""M2 LIVE: precision@5 over the last ``window`` retrieval_used events.
|
||||
|
||||
Each ``retrieval_used`` event carries ``hit_ids`` (list of UUID strings) and
|
||||
optionally a ``ground_truth`` list. When ground_truth is present, count
|
||||
hits in the top-5 that intersect ground_truth and divide by 5. When absent,
|
||||
fall back to the **hit-presence rate** -- (# events with at least one hit)
|
||||
/ (# events) -- which is a coarse but honest proxy and never returns the
|
||||
synthetic 0.0 when the system is actually retrieving.
|
||||
|
||||
The fallback path is what makes the live value differ from the synthetic
|
||||
constant in production -- the metric stops being a flat zero the moment
|
||||
retrieve.recall starts returning hits.
|
||||
"""
|
||||
events = query_events(store, kind="retrieval_used", limit=window)
|
||||
if not events:
|
||||
return 0.0
|
||||
|
||||
precisions: list[float] = []
|
||||
for ev in events:
|
||||
data = ev.get("data") or {}
|
||||
hits = data.get("hit_ids") or []
|
||||
ground_truth = set(data.get("ground_truth") or [])
|
||||
top5 = list(hits)[:5]
|
||||
if ground_truth:
|
||||
tp = sum(1 for h in top5 if h in ground_truth)
|
||||
precisions.append(tp / 5.0)
|
||||
else:
|
||||
# Fallback: hit-presence at top-5 (1.0 if any hit, else 0.0).
|
||||
precisions.append(1.0 if top5 else 0.0)
|
||||
if not precisions:
|
||||
return 0.0
|
||||
return sum(precisions) / len(precisions)
|
||||
|
||||
|
||||
def m4_profile_variance_live(
|
||||
store: MemoryStore,
|
||||
*,
|
||||
n_updates: int = 20,
|
||||
) -> float:
|
||||
"""M4 LIVE: variance over the last N profile_updated events per knob.
|
||||
|
||||
Aggregates the most recent ``n_updates`` ``profile_updated`` events,
|
||||
groups by knob, computes per-knob variance over the new values (only for
|
||||
numeric knobs -- bool/enum knobs are skipped), and returns the mean
|
||||
variance across knobs.
|
||||
|
||||
Returns 0.0 when no events exist (back-compat with the synthetic baseline).
|
||||
"""
|
||||
events = query_events(store, kind="profile_updated", limit=n_updates * 5)
|
||||
if not events:
|
||||
return 0.0
|
||||
|
||||
per_knob: dict[str, list[float]] = {}
|
||||
for ev in events[:n_updates]:
|
||||
data = ev.get("data") or {}
|
||||
knob = data.get("knob")
|
||||
new_val = data.get("new")
|
||||
if knob is None or new_val is None:
|
||||
continue
|
||||
# Skip bool/enum knobs explicitly: bool is a subclass of int, so
|
||||
# float(True/False) succeeds; we want only int/float values.
|
||||
if isinstance(new_val, bool) or not isinstance(new_val, (int, float)):
|
||||
continue
|
||||
per_knob.setdefault(str(knob), []).append(float(new_val))
|
||||
|
||||
if not per_knob:
|
||||
return 0.0
|
||||
|
||||
variances: list[float] = []
|
||||
for _knob, vals in per_knob.items():
|
||||
if len(vals) < 2:
|
||||
variances.append(0.0)
|
||||
continue
|
||||
mean = sum(vals) / len(vals)
|
||||
var = sum((v - mean) ** 2 for v in vals) / len(vals)
|
||||
variances.append(var)
|
||||
if not variances:
|
||||
return 0.0
|
||||
return sum(variances) / len(variances)
|
||||
|
||||
|
||||
def m6_context_repeat_rate_live(
|
||||
store: MemoryStore,
|
||||
*,
|
||||
window_days: int = 30,
|
||||
) -> float:
|
||||
"""M6 LIVE: context-repeat-rate over the last ``window_days`` of session_started.
|
||||
|
||||
Reads ``kind='session_started'`` events with ``data.session_state_hash``,
|
||||
counts unique vs total hashes, and returns the *repeat rate*:
|
||||
|
||||
repeat_rate = (total - unique) / total
|
||||
|
||||
A value near 0.0 means every session looked novel; near 1.0 means heavy
|
||||
context reuse (which is the continuity ideal at session ~20+).
|
||||
"""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
since = datetime.now(timezone.utc) - timedelta(days=window_days)
|
||||
events = query_events(
|
||||
store, kind="session_started", since=since, limit=10000,
|
||||
)
|
||||
if not events:
|
||||
return 0.0
|
||||
|
||||
hashes: list[str] = []
|
||||
for ev in events:
|
||||
data = ev.get("data") or {}
|
||||
hsh = data.get("session_state_hash")
|
||||
if hsh:
|
||||
hashes.append(str(hsh))
|
||||
if not hashes:
|
||||
return 0.0
|
||||
total = len(hashes)
|
||||
unique = len(set(hashes))
|
||||
return (total - unique) / total
|
||||
|
||||
|
||||
def m2(store: MemoryStore) -> float:
|
||||
"""Public M2 entry point (always live)."""
|
||||
return m2_precision_at_5_live(store)
|
||||
|
||||
|
||||
def m4(store: MemoryStore) -> float:
|
||||
"""Public M4 entry point (always live)."""
|
||||
return m4_profile_variance_live(store)
|
||||
|
||||
|
||||
def m6(store: MemoryStore) -> float:
|
||||
"""Public M6 entry point (always live)."""
|
||||
return m6_context_repeat_rate_live(store)
|
||||
256
src/iai_mcp/types.py
Normal file
256
src/iai_mcp/types.py
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
"""Core types for IAI-MCP.
|
||||
|
||||
Source-of-truth schema for MemoryRecord (canonical for IAI-MCP storage
|
||||
drawer + PROJECT.md constitutional rules).
|
||||
|
||||
Phase 1 storage was English raw verbatim. amended
|
||||
the schema to native-language storage. (2026-04-19)
|
||||
reverted the brain to **English-Only**: the surface (Claude) translates
|
||||
inbound text to English on the way in, and the records table stores the
|
||||
English form. The schema retains the `language` ISO-639-1 column as a
|
||||
historical marker on legacy rows; new records are tagged `"en"`.
|
||||
|
||||
Phase 2 schema additions (backward-compatible for migration):
|
||||
- language: str (ISO-639-1, required) -- D-08a
|
||||
- s5_trust_score: float [0,1] (default 0.5 neutral prior) -- prep
|
||||
- profile_modulation_gain: dict[str, float] (default {}) -- runtime gain
|
||||
- schema_version: int (1 legacy | 2 phase-2) -- migration marker
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
|
||||
# (2026-04-20): revert the Phase-2 deviation back to
|
||||
# PROJECT.md line 125's original spec — bge-small-en-v1.5 (384d English-only).
|
||||
# User directive 2026-04-19: brain stores English, surface translation is
|
||||
# Claude's job. bge-m3 (1024d multilingual) remains selectable via the
|
||||
# IAI_MCP_EMBED_MODEL env var or Embedder(model_key="bge-m3") kwarg;
|
||||
# existing 1024d user stores stay readable via embedder_for_store(store)
|
||||
# (Plan 03-03 commit 808e877). No forced migration of existing data.
|
||||
DEFAULT_EMBED_DIM = 384 # bge-small-en-v1.5 native dimension (PROJECT.md line 125)
|
||||
EMBED_DIM = DEFAULT_EMBED_DIM # legacy alias for callers
|
||||
|
||||
# module-level constants (Plan 02-01 constitutional anchors)
|
||||
SCHEMA_VERSION_LEGACY = 1 # pre-Phase-2 records before migration
|
||||
SCHEMA_VERSION_V2 = 2 # schema (language + s5_trust + profile gain)
|
||||
SCHEMA_VERSION_V3 = 3 # encryption-at-rest data upgrade
|
||||
SCHEMA_VERSION_V4 = 4 # CONN-05 TEM factorization (structure_hv: bytes)
|
||||
SCHEMA_VERSION_CURRENT = SCHEMA_VERSION_V4 # newest version: written to every new record; migration bumps older rows
|
||||
SCHEMA_VERSION_ACCEPTED = frozenset({
|
||||
SCHEMA_VERSION_LEGACY,
|
||||
SCHEMA_VERSION_V2,
|
||||
SCHEMA_VERSION_V3,
|
||||
SCHEMA_VERSION_V4,
|
||||
})
|
||||
|
||||
# CONN-05 TEM factorization (Whittington-Behrens 2020 Cell 183:1249-1263).
|
||||
# Binary BSC hypervector at D=10000 bits, packed 8 bits/byte = 1250 bytes.
|
||||
# `structure_hv` on MemoryRecord is a SEPARATE first-class field alongside `embedding`
|
||||
# (NOT a "VSA retrieval layer over cosine"). Empty bytes = pre-migration sentinel.
|
||||
STRUCTURE_HV_DIM: int = 10000
|
||||
STRUCTURE_HV_BYTES: int = STRUCTURE_HV_DIM // 8 # 1250 bytes packed
|
||||
|
||||
# exactly five tiers per PROJECT.md Memory Core.
|
||||
# adds a sixth: semantic_pruned, used by
|
||||
# cleanup_schema_duplicates as a soft-delete sentinel for duplicate
|
||||
# schema records (Beer VSM S2 anti-oscillation reversibility — pruned
|
||||
# rows stay in the store and can be lifted back to "semantic" via a
|
||||
# reverse migration; physical deletion is forbidden).
|
||||
SEMANTIC_PRUNED_TIER: str = "semantic_pruned"
|
||||
TIER_ENUM = frozenset({
|
||||
"working",
|
||||
"episodic",
|
||||
"semantic",
|
||||
"procedural",
|
||||
"parametric",
|
||||
SEMANTIC_PRUNED_TIER,
|
||||
})
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemoryRecord:
|
||||
"""Canonical memory record (D-08a native-language, D-14, MEM-01..06, prep).
|
||||
|
||||
Constitutional invariants:
|
||||
- `literal_surface` is always raw verbatim. Per the canonical
|
||||
form is English (Claude translates inbound surface text); legacy v2
|
||||
records may carry a non-English `language` tag and are read as-is.
|
||||
- Records with `detail_level >= 3` never decay (MEM-06, D-07).
|
||||
- Records with `never_merge=True` are skipped by ART gate (D-14 L0 guarantee).
|
||||
- `language` is a required ISO-639-1 tag; empty string is rejected.
|
||||
- `s5_trust_score` in [0, 1] (default 0.5 neutral prior, S5 identity kernel prep).
|
||||
- `schema_version` must be 1 (legacy) | 2 | 3 (Phase 2-08 encryption) | 4 (Phase 3-01 TEM).
|
||||
- `structure_hv` (Plan 03-01 CONN-05) is empty bytes (pre-migration) OR exactly
|
||||
STRUCTURE_HV_BYTES (1250) bytes (TorchHD BSC binary at D=10000).
|
||||
"""
|
||||
|
||||
# identity
|
||||
id: UUID # stable UUID4 at creation
|
||||
tier: str # "working" | "episodic" | "semantic" | "procedural" | "parametric" | "semantic_pruned"
|
||||
|
||||
# content (constitutional: raw verbatim in the user's language, D-08a)
|
||||
literal_surface: str # raw verbatim; language tag below
|
||||
aaak_index: str # AAAK metadata line (Plan 03 populates; default "")
|
||||
|
||||
# retrieval features
|
||||
embedding: list[float] # DIM from configured embedder (D-02a registry)
|
||||
|
||||
# graph + salience
|
||||
community_id: UUID | None # assigned by Plan 02; None in Phase 1
|
||||
centrality: float # computed in Plan 02; 0.0 default
|
||||
detail_level: int # 1..5; 5 = never summarize (D-08 constitutional)
|
||||
pinned: bool # user-pinned records (includes L0 identity)
|
||||
|
||||
# FSRS schema fields (MEM-06 fields only; decay scheduler is Phase 2)
|
||||
stability: float # default 0.0
|
||||
difficulty: float # default 0.0
|
||||
last_reviewed: datetime | None # default None
|
||||
never_decay: bool # auto-True when detail_level >= 3 (D-07, MEM-06)
|
||||
never_merge: bool # True for pinned L0
|
||||
|
||||
# provenance (MEM-05 edge-based reconsolidation in Phase 1)
|
||||
provenance: list[dict[str, Any]] # each entry: {"ts", "cue", "session_id"}
|
||||
|
||||
# bookkeeping
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
# REQUIRED language field (keyword-only, no default) -- constitutional.
|
||||
# Placed here (before default-valued fields) so dataclass init enforces it
|
||||
# as a required kwarg for every caller.
|
||||
language: str # ISO-639-1 tag (e.g. "en", "ru", "ja", "ar")
|
||||
|
||||
# fields with defaults -- order must stay after required fields
|
||||
tags: list[str] = field(default_factory=list)
|
||||
s5_trust_score: float = 0.5 # prep; neutral prior
|
||||
profile_modulation_gain: dict[str, float] = field(default_factory=dict) # D-11
|
||||
schema_version: int = SCHEMA_VERSION_CURRENT
|
||||
# CONN-05 TEM factorization (Whittington-Behrens 2020 Cell 183:1249-1263).
|
||||
# Binary BSC hypervector at D=10000 bits, packed to STRUCTURE_HV_BYTES (1250 bytes).
|
||||
# Empty bytes default = pre-migration / lazy-bind sentinel; tem.bind_structure
|
||||
# is called at insert time to fill it. SEPARATE first-class field alongside
|
||||
# `embedding` -- structural queries are peers of cosine, not a rerank layer.
|
||||
structure_hv: bytes = field(default=b"")
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
# rule from + PROJECT.md ("OFF for detail_level >= 3"):
|
||||
# high-detail records never decay, regardless of what caller passed.
|
||||
if self.detail_level >= 3:
|
||||
self.never_decay = True
|
||||
# Tier validation -- fail fast on garbage input
|
||||
if self.tier not in TIER_ENUM:
|
||||
raise ValueError(
|
||||
f"invalid tier {self.tier!r}; must be one of {sorted(TIER_ENUM)}"
|
||||
)
|
||||
# language required non-empty ISO-639-1 tag.
|
||||
if not self.language or not isinstance(self.language, str):
|
||||
raise ValueError(
|
||||
"language is a required non-empty ISO-639-1 string field "
|
||||
"(constitutional violation: D-08a)"
|
||||
)
|
||||
# prep: s5_trust_score in [0, 1].
|
||||
if not (0.0 <= self.s5_trust_score <= 1.0):
|
||||
raise ValueError(
|
||||
f"s5_trust_score must be in [0, 1], got {self.s5_trust_score}"
|
||||
)
|
||||
# Migration marker: v1 (legacy) | v2 | v3 (Plan 02-08 encryption) | v4 (Plan 03-01 TEM).
|
||||
if self.schema_version not in SCHEMA_VERSION_ACCEPTED:
|
||||
raise ValueError(
|
||||
f"schema_version must be one of {sorted(SCHEMA_VERSION_ACCEPTED)}, "
|
||||
f"got {self.schema_version}"
|
||||
)
|
||||
# CONN-05: structure_hv must be empty (pre-migration sentinel)
|
||||
# OR exactly STRUCTURE_HV_BYTES (1250) bytes for D=10000 BSC packed bits.
|
||||
if not isinstance(self.structure_hv, (bytes, bytearray)):
|
||||
raise ValueError(
|
||||
f"structure_hv must be bytes, got {type(self.structure_hv).__name__}"
|
||||
)
|
||||
if self.structure_hv and len(self.structure_hv) != STRUCTURE_HV_BYTES:
|
||||
raise ValueError(
|
||||
f"structure_hv must be empty (pre-migration) or exactly "
|
||||
f"{STRUCTURE_HV_BYTES} bytes (D={STRUCTURE_HV_DIM} BSC packed), "
|
||||
f"got {len(self.structure_hv)} bytes"
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemoryHit:
|
||||
"""Single retrieval result (MCP-01 shape, + D-13)."""
|
||||
|
||||
record_id: UUID
|
||||
score: float # cosine + weighted bonuses (Plan 02 fills full formula)
|
||||
reason: str # human-readable "cosine 0.87 + rich-club 0.05"
|
||||
literal_surface: str # verbatim content (MCP-01 returns content, not only id)
|
||||
adjacent_suggestions: list[UUID] # cued-recognition (Plan 03 populates)
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecallResponse:
|
||||
"""Full response from memory_recall (D-12, D-13).
|
||||
|
||||
`hints` carries per-recall S4 contradiction notices +
|
||||
S5 cooldown + provisional schema candidates. Each hint dict shape:
|
||||
{"kind": "s4_contradiction" | "s5_cooldown" | "provisional_schema",
|
||||
"severity": "info" | "warning",
|
||||
"source_ids": [str(UUID), ...],
|
||||
"text": str,
|
||||
...optional kind-specific fields}
|
||||
|
||||
adds two new fields with backward-compatible defaults:
|
||||
cue_mode: str
|
||||
"verbatim" or "concept" — set by core.dispatch from the cue-router
|
||||
classifier (cue_router._classify_cue). Default "concept" preserves
|
||||
today's behaviour for callers constructing RecallResponse directly
|
||||
without a classified mode (existing 1100+ tests stay green).
|
||||
patterns_observed: list[dict]
|
||||
In concept mode, schema records (tier=semantic AND tag pattern:*)
|
||||
that would have ranked in top-K are surfaced here instead of in
|
||||
hits[]. Each entry: {"pattern": str, "evidence_count": int,
|
||||
"schema_id": str(UUID)}. Max 3 entries. Empty in verbatim mode
|
||||
(schema records excluded from candidate set entirely) and when
|
||||
no schemas were displaced. Default [] is back-compat.
|
||||
|
||||
Constitutional framing for the new fields:
|
||||
- McClelland CLS: episodic and semantic stores are distinguishable;
|
||||
their retrieval surfaces should be too — patterns_observed[] gives
|
||||
the schema layer its own surface instead of mixing it into hits[].
|
||||
- Beer VSM S1 vs S4: operations (verbatim) live at S1; intelligence
|
||||
(schema) at S4. patterns_observed[] makes S4 visible WITHOUT
|
||||
collapsing it into S1.
|
||||
"""
|
||||
|
||||
hits: list[MemoryHit] # excitatory
|
||||
anti_hits: list[MemoryHit] # inhibitory -- cosine match with opposing AAAK or contradicts edge
|
||||
activation_trace: list[UUID] # node ids touched by 2-hop spread (Plan 02 fills)
|
||||
budget_used: int # tokens used by this response
|
||||
hints: list[dict] = field(default_factory=list) # S4/S5/schema hints
|
||||
# cue-router output + concept-mode schema-split surface.
|
||||
# Defaults preserve back-compat: callers that don't classify their cue
|
||||
# see cue_mode='concept' (matches today's mode-less behaviour) and
|
||||
# patterns_observed=[] (no displaced schemas).
|
||||
cue_mode: str = "concept"
|
||||
patterns_observed: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EdgeUpdate:
|
||||
"""Result of memory_reinforce (MCP-02, MEM-04)."""
|
||||
|
||||
edges_boosted: int
|
||||
pairs: list[tuple[UUID, UUID]]
|
||||
# string keys for JSON serialisation ("uuid_a|uuid_b" -> weight)
|
||||
new_weights: dict[str, float]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReconsolidationReceipt:
|
||||
"""Result of memory_contradict (MCP-03, edge-based in Phase 1)."""
|
||||
|
||||
original_id: UUID
|
||||
new_record_id: UUID
|
||||
edge_type: str # "contradicts"
|
||||
ts: datetime
|
||||
135
src/iai_mcp/tz.py
Normal file
135
src/iai_mcp/tz.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""D-34 IANA timezone handling (Plan 02-01, global-product mandate).
|
||||
|
||||
Every global-ready product must respect user timezone. We store all runtime
|
||||
timestamps (events table, BudgetLedger, record created_at, etc.) in UTC and
|
||||
render CLI output in the user's LOCAL timezone.
|
||||
|
||||
The user's timezone lives in ~/.iai-mcp/config.json under `user.timezone`
|
||||
as an IANA string (e.g. "America/Los_Angeles", "Europe/Moscow", "Asia/Tokyo",
|
||||
"UTC"). On first run we auto-detect from the system and seed the config file;
|
||||
thereafter the user can edit config.json to override.
|
||||
|
||||
The sleep-cycle scheduler interprets `quiet_window` (22:00-06:00) in the
|
||||
user's LOCAL time, not UTC. Multi-tenant architecture-ready: Phase 3+ deployments
|
||||
can carry per-user_id tz maps.
|
||||
|
||||
Public surface:
|
||||
- detect_tz() -> str -- best-effort IANA key from system
|
||||
- load_user_tz() -> ZoneInfo -- read config.json + auto-seed
|
||||
- to_local(dt, tz=None) -- convert UTC (or naive) to local TZ
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
CONFIG_FILENAME = "config.json"
|
||||
|
||||
|
||||
def _config_path() -> Path:
|
||||
"""Return the path to the user's config.json.
|
||||
|
||||
Honours IAI_MCP_STORE env var so test isolation + multi-tenant layouts
|
||||
can redirect away from ~/.iai-mcp/.
|
||||
"""
|
||||
env = os.environ.get("IAI_MCP_STORE")
|
||||
root = Path(env) if env else Path.home() / ".iai-mcp"
|
||||
return root / CONFIG_FILENAME
|
||||
|
||||
|
||||
def detect_tz() -> str:
|
||||
"""Auto-detect IANA timezone from the system. Falls back to "UTC"."""
|
||||
try:
|
||||
tz = datetime.now().astimezone().tzinfo
|
||||
if tz is None:
|
||||
return "UTC"
|
||||
# ZoneInfo has .key; plain datetime.timezone does not.
|
||||
key = getattr(tz, "key", None)
|
||||
if key:
|
||||
return str(key)
|
||||
return "UTC"
|
||||
except Exception:
|
||||
return "UTC"
|
||||
|
||||
|
||||
def _seed_config(cfg_path: Path, tz_key: str) -> None:
|
||||
"""Atomically write user.timezone into config.json.
|
||||
|
||||
Preserves any existing keys in the file; only mutates user.timezone.
|
||||
Writes to a .tmp file first and os.replace()s over the target so a
|
||||
crashed process can never leave a half-written config.
|
||||
"""
|
||||
cfg_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
existing: dict = {}
|
||||
if cfg_path.exists():
|
||||
try:
|
||||
with open(cfg_path) as f:
|
||||
existing = json.load(f)
|
||||
if not isinstance(existing, dict):
|
||||
existing = {}
|
||||
except (json.JSONDecodeError, OSError):
|
||||
existing = {}
|
||||
existing.setdefault("user", {})
|
||||
if not isinstance(existing["user"], dict):
|
||||
existing["user"] = {}
|
||||
existing["user"]["timezone"] = tz_key
|
||||
tmp = cfg_path.with_suffix(".tmp")
|
||||
with open(tmp, "w") as f:
|
||||
json.dump(existing, f, indent=2)
|
||||
os.replace(tmp, cfg_path)
|
||||
|
||||
|
||||
def load_user_tz() -> ZoneInfo:
|
||||
"""Read user.timezone from config.json, auto-seed on first run.
|
||||
|
||||
Behaviour:
|
||||
- config.json missing or malformed -> detect_tz() + write seed; return ZoneInfo.
|
||||
- config.json present + user.timezone is a valid IANA string -> return ZoneInfo.
|
||||
- config.json present + user.timezone is an INVALID IANA string -> raise
|
||||
zoneinfo.ZoneInfoNotFoundError. We refuse to silently override the user's
|
||||
edit; a hard error surfaces the typo.
|
||||
"""
|
||||
cfg_path = _config_path()
|
||||
if cfg_path.exists():
|
||||
try:
|
||||
with open(cfg_path) as f:
|
||||
cfg = json.load(f)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
cfg = None
|
||||
if cfg is not None and isinstance(cfg, dict):
|
||||
user = cfg.get("user")
|
||||
if isinstance(user, dict):
|
||||
tz_key = user.get("timezone")
|
||||
if isinstance(tz_key, str) and tz_key.strip():
|
||||
# Raises ZoneInfoNotFoundError on invalid IANA -- by design.
|
||||
return ZoneInfo(tz_key)
|
||||
|
||||
# No config (or config present but no user.timezone) -> detect + seed.
|
||||
detected = detect_tz()
|
||||
try:
|
||||
zi = ZoneInfo(detected)
|
||||
except Exception:
|
||||
detected = "UTC"
|
||||
zi = ZoneInfo("UTC")
|
||||
_seed_config(cfg_path, detected)
|
||||
return zi
|
||||
|
||||
|
||||
def to_local(
|
||||
utc_dt: datetime,
|
||||
tz: ZoneInfo | None = None,
|
||||
) -> datetime:
|
||||
"""Convert a UTC (or naive-UTC-assumed) datetime into the target ZoneInfo.
|
||||
|
||||
When tz is None, falls through to load_user_tz() -- but callers in hot paths
|
||||
should cache the ZoneInfo instance and pass it explicitly to avoid the
|
||||
per-call config.json read.
|
||||
"""
|
||||
if tz is None:
|
||||
tz = load_user_tz()
|
||||
if utc_dt.tzinfo is None:
|
||||
utc_dt = utc_dt.replace(tzinfo=timezone.utc)
|
||||
return utc_dt.astimezone(tz)
|
||||
104
src/iai_mcp/wake_handler.py
Normal file
104
src/iai_mcp/wake_handler.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
"""Phase 10.5 L5 — daemon-side ``wake.signal`` consumer.
|
||||
|
||||
The TypeScript MCP wrapper (``mcp-wrapper/src/lifecycle.ts``) writes a
|
||||
small marker file at ``~/.iai-mcp/wake.signal`` when:
|
||||
|
||||
* the wrapper boots and the daemon socket is unreachable, AND
|
||||
* the platform is NOT macOS (so the wrapper cannot ``launchctl kickstart``
|
||||
the daemon directly), OR
|
||||
* a kickstart attempt failed and the wrapper has fallen back to the
|
||||
cross-platform signal file path.
|
||||
|
||||
This module owns the daemon-side consume side of that signal. It is
|
||||
**deliberately tiny**: read-and-delete on cold start, idempotent,
|
||||
race-safe with a wrapper that may be writing a fresh signal mid-consume.
|
||||
The wrapper's atomic-rename write semantics guarantee that ``read_text``
|
||||
either sees the file fully or not at all; we never have to defend
|
||||
against a torn read of the signal payload itself.
|
||||
|
||||
The placeholder integration in :func:`iai_mcp.daemon.main` calls
|
||||
:meth:`WakeHandler.consume_wake_signal` once during startup. Phase 10.6
|
||||
will dispatch the result into the lifecycle state machine's
|
||||
``WAKE_SIGNAL`` event channel — until then this module is a write-once
|
||||
hook so the wrapper's L5 path has somewhere to write to.
|
||||
|
||||
Constraints (carried from / 10.5 hard-rules):
|
||||
|
||||
- stdlib only — no third-party imports.
|
||||
- macOS-first; non-macOS callers use this same path.
|
||||
- Idempotent: a second ``consume_wake_signal()`` call returns ``False``
|
||||
cleanly without raising.
|
||||
- Race-safe: a ``FileNotFoundError`` between the existence check and the
|
||||
unlink (concurrent wrapper writes a fresh signal that gets consumed
|
||||
before we re-stat) is swallowed and reported as "no pending wake".
|
||||
|
||||
Validates: WAKE-03, (Python-side consume half).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
__all__ = ["WakeHandler"]
|
||||
|
||||
|
||||
class WakeHandler:
|
||||
"""Consume ``wake.signal`` markers written by the MCP wrapper.
|
||||
|
||||
The handler holds the absolute path to the signal file. It does NOT
|
||||
create the directory; the wrapper is responsible for ensuring
|
||||
``~/.iai-mcp/`` exists when it writes the signal. The daemon already
|
||||
creates this directory at boot via ``ProcessLock`` / ``MemoryStore``
|
||||
so by the time this handler is consulted the parent dir is present.
|
||||
"""
|
||||
|
||||
def __init__(self, wake_signal_path: Path) -> None:
|
||||
"""Store the absolute path to the signal file.
|
||||
|
||||
Args:
|
||||
wake_signal_path: Absolute path to ``wake.signal``. Caller is
|
||||
responsible for ``Path.expanduser()`` if a ``~`` was
|
||||
present in the input — production callers pass an
|
||||
already-expanded path.
|
||||
"""
|
||||
self._wake_signal_path = wake_signal_path
|
||||
|
||||
def consume_wake_signal(self) -> bool:
|
||||
"""Atomically delete the signal file if present and return whether one existed.
|
||||
|
||||
Returns:
|
||||
``True`` if a signal was present and has been consumed, else
|
||||
``False``. Idempotent — a second call after the first
|
||||
``True`` returns ``False`` (file already gone).
|
||||
|
||||
Race semantics:
|
||||
``Path.unlink(missing_ok=False)`` is the atomic delete. If
|
||||
two consumers race (this should not happen in practice; the
|
||||
daemon is a singleton via ``ProcessLock``) the loser sees
|
||||
``FileNotFoundError`` which we swallow and report as
|
||||
"no pending wake".
|
||||
"""
|
||||
try:
|
||||
self._wake_signal_path.unlink()
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
except OSError:
|
||||
# Permission / FS error — surface as "no pending wake" rather
|
||||
# than raising, since the wake path must NEVER block daemon
|
||||
# boot. The wrapper will retry on its next boot if it still
|
||||
# cares.
|
||||
return False
|
||||
return True
|
||||
|
||||
def has_pending_wake(self) -> bool:
|
||||
"""Read-only check: does a wake signal currently exist?
|
||||
|
||||
Used by the doctor row to surface pending-wake state without
|
||||
consuming it. Calling ``consume_wake_signal()`` after this method
|
||||
will return ``True`` iff this method returned ``True`` and no
|
||||
other consumer raced in between.
|
||||
"""
|
||||
try:
|
||||
return self._wake_signal_path.is_file()
|
||||
except OSError:
|
||||
return False
|
||||
141
src/iai_mcp/write.py
Normal file
141
src/iai_mcp/write.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
"""ART vigilance write gate (MEM-03, D-07) + S5 identity guard (MEM-09, D-22)
|
||||
+ prompt-injection shield (OPS-07, D-30, D-31).
|
||||
|
||||
Grossberg-style Adaptive Resonance Theory vigilance: on write, compare the new
|
||||
record against existing records by cosine similarity. If the best match exceeds
|
||||
vigilance ρ, merge; else create a new distinct record.
|
||||
|
||||
ρ is fixed at 0.95 for per (matches autistic-kernel literal_preservation=strong).
|
||||
High ρ = prefer distinct record over merge = preserves fine detail.
|
||||
|
||||
Plan 02-02 adds `guarded_insert` which layers the S5 identity gate on top of
|
||||
the ART decision. Identity-tier records (s5_trust_score >= 0.9) must carry
|
||||
the `s5_consensus` tag -- direct writes are rejected to prevent prompt-
|
||||
injection poisoning.
|
||||
|
||||
Plan 02-05 extends `guarded_insert` with a shield pre-check (OPS-07 / D-31):
|
||||
the tier is determined from record properties, and the shield is consulted
|
||||
BEFORE the S5 gate. HARD_BLOCK rejects propagate as (False, "shield: ...");
|
||||
FLAG and LOG tiers emit events but allow the write to proceed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
import numpy as np
|
||||
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
# fixed ρ for (matches literal_preservation=strong in autistic kernel).
|
||||
# DO NOT CHANGE without updating tests.
|
||||
VIGILANCE_RHO = 0.95 # float constant -- plan acceptance criterion greps for exact literal
|
||||
|
||||
|
||||
def cosine(a: list[float], b: list[float]) -> float:
|
||||
"""Cosine similarity in [-1, 1]. Returns 0.0 if either vector is zero-norm."""
|
||||
av = np.asarray(a, dtype=np.float64)
|
||||
bv = np.asarray(b, dtype=np.float64)
|
||||
na = float(np.linalg.norm(av))
|
||||
nb = float(np.linalg.norm(bv))
|
||||
if na == 0.0 or nb == 0.0:
|
||||
return 0.0
|
||||
return float(np.dot(av, bv) / (na * nb))
|
||||
|
||||
|
||||
def apply_art_gate(
|
||||
existing_records: list[MemoryRecord],
|
||||
new_record: MemoryRecord,
|
||||
rho: float = VIGILANCE_RHO,
|
||||
) -> tuple[str, UUID]:
|
||||
"""Return ('create', new_record.id) or ('merge', target_record_id).
|
||||
|
||||
Skips any existing record with `never_merge=True` (D-14 pinned-L0 guarantee):
|
||||
even if the input matches L0 perfectly, L0 is never overwritten.
|
||||
|
||||
Args:
|
||||
existing_records: candidates to compare against.
|
||||
new_record: the write-candidate to admit.
|
||||
rho: vigilance threshold. Defaults to VIGILANCE_RHO (0.95).
|
||||
|
||||
Returns:
|
||||
("create", new_record.id) if novelty > (1 - rho), else ("merge", target_id).
|
||||
"""
|
||||
best_sim: float = -1.0
|
||||
best_id: UUID | None = None
|
||||
for rec in existing_records:
|
||||
if rec.never_merge:
|
||||
continue # L0 and other pinned-immutable records are skipped
|
||||
sim = cosine(new_record.embedding, rec.embedding)
|
||||
if sim > best_sim:
|
||||
best_sim = sim
|
||||
best_id = rec.id
|
||||
if best_id is not None and best_sim >= rho:
|
||||
return ("merge", best_id)
|
||||
return ("create", new_record.id)
|
||||
|
||||
|
||||
def _shield_tier_for_record(record: MemoryRecord):
|
||||
"""Plan 02-05 tier determination.
|
||||
|
||||
HARD_BLOCK: pinned records OR s5_trust_score >= 0.9 (identity-tier)
|
||||
FLAG_FOR_REVIEW: records tagged "profile" (profile-knob updates)
|
||||
LOG_ONLY: everything else (content records)
|
||||
"""
|
||||
from iai_mcp.shield import ShieldTier
|
||||
|
||||
if record.pinned or record.s5_trust_score >= 0.9:
|
||||
return ShieldTier.HARD_BLOCK
|
||||
if "profile" in (record.tags or []):
|
||||
return ShieldTier.FLAG_FOR_REVIEW
|
||||
return ShieldTier.LOG_ONLY
|
||||
|
||||
|
||||
def guarded_insert(
|
||||
store,
|
||||
record: MemoryRecord,
|
||||
profile_state: dict,
|
||||
session_id: str = "-",
|
||||
) -> tuple[bool, str]:
|
||||
"""Central write gate combining shield pre-check + S5 identity check + ART gate.
|
||||
|
||||
(D-30, D-31): determine the shield tier from the record
|
||||
(HARD_BLOCK for pinned/identity-tier, FLAG for profile, LOG for content),
|
||||
evaluate the shield, then:
|
||||
- HARD_BLOCK + detection -> reject (shield_rejection event already logged)
|
||||
- FLAG + detection -> proceed (shield_flag event already logged)
|
||||
- LOG + detection -> proceed (shield_log event already logged)
|
||||
|
||||
identity-tier records (s5_trust_score >= 0.9)
|
||||
must pass through propose_invariant_update. Direct writes -- via this
|
||||
function, the MCP surface, or any other write path -- are rejected unless
|
||||
they carry the `s5_consensus` marker tag.
|
||||
|
||||
Below-identity writes (s5_trust_score < 0.9) fall through the ART gate.
|
||||
Currently we use the existing Phase-1 behaviour (create-or-merge) and
|
||||
report the outcome via the return tuple. Callers receive:
|
||||
(True, "created") -- store.insert succeeded, distinct record
|
||||
(True, "merged_into:<id>") -- ART gate merged into an existing record
|
||||
(True, "flagged") -- shield FLAG tier matched; write still proceeded
|
||||
(False, reason) -- shield OR S5 blocked the write
|
||||
"""
|
||||
# Lazy imports so write.py doesn't pull events/numpy into every read path.
|
||||
from iai_mcp.s5 import check_identity_anchor_on_write
|
||||
from iai_mcp.shield import ShieldTier, apply_shield
|
||||
|
||||
# shield pre-check.
|
||||
tier = _shield_tier_for_record(record)
|
||||
verdict = apply_shield(store, record, tier, session_id=session_id)
|
||||
if verdict.action == "reject":
|
||||
return False, f"shield: {verdict.reason}"
|
||||
flagged = verdict.action == "flag" and verdict.detected
|
||||
|
||||
ok, reason = check_identity_anchor_on_write(store, record, profile_state)
|
||||
if not ok:
|
||||
return False, reason
|
||||
|
||||
existing = store.all_records()
|
||||
gate_verdict, target = apply_art_gate(existing, record)
|
||||
if gate_verdict == "create":
|
||||
store.insert(record)
|
||||
return True, ("flagged" if flagged else "created")
|
||||
return True, f"merged_into:{target}"
|
||||
270
src/iai_mcp/write_queue.py
Normal file
270
src/iai_mcp/write_queue.py
Normal file
|
|
@ -0,0 +1,270 @@
|
|||
"""Plan 05-10 — asyncio-backed coalescing write queue for LanceDB.
|
||||
|
||||
Motivation (from 05-08 diagnosis + 05-10 plan): each synchronous
|
||||
``tbl.add([row])`` call against a LanceDB table allocates roughly
|
||||
~0.3 MB of pyarrow working-set overhead that is sub-linear per call
|
||||
but linear in call count. Seeding the store record-by-record (one
|
||||
call per record) drives peak RSS to ~1.3 GB at N=5k. This module
|
||||
coalesces inserts inside a 100 ms window (or ``max_batch`` records,
|
||||
whichever fires first) and forwards them as a single ``await
|
||||
tbl.add(batch)`` call. At N=10k with max_batch=128 the buffer
|
||||
overhead drops from ~3 GB (10000 * 0.3 MB) to ~24 MB (79 * 0.3 MB).
|
||||
|
||||
Contract (see ``tests/test_write_queue.py`` for the machine-checked
|
||||
version):
|
||||
|
||||
- ``enqueue(record)`` returns an ``asyncio.Future`` that resolves
|
||||
only after the record's batch has landed on disk. Callers that
|
||||
want sync-equivalent durability **must** await the future.
|
||||
- A single ``tbl.add(batch)`` call carries all records coalesced
|
||||
inside one window, up to ``max_batch``.
|
||||
- ``stop()`` drains pending records and flushes them synchronously
|
||||
before returning. Enqueues after ``stop()`` raise ``RuntimeError``.
|
||||
- Back-pressure: when the buffer is already at ``max_queue_size``
|
||||
the next ``enqueue()`` awaits the next flush before accepting —
|
||||
never unbounded memory growth.
|
||||
- Flush failures propagate: if ``tbl.add(batch)`` raises, every
|
||||
pending Future in that batch resolves with that exception. The
|
||||
queue itself stays running so subsequent enqueues still work.
|
||||
- ``on_flushed(batch)`` (optional) fires once per successful flush,
|
||||
synchronously inside the loop, **before** futures are resolved.
|
||||
The callback receives the exact list of records in the order
|
||||
they were flushed — use this to mirror writes to a secondary
|
||||
index (Plan 05-12 runtime-graph hook).
|
||||
|
||||
Constitutional invariants:
|
||||
- C3 (no paid-API): pure stdlib + a LanceDB async table handle.
|
||||
- C6 (LanceDB authoritative): nothing in this module short-circuits
|
||||
the write; ``tbl.add(batch)`` is the only persistence path.
|
||||
- (no drift): a resolved Future means the batch reached
|
||||
disk. An exception means no Future in that batch reached disk;
|
||||
the caller is expected to retry or surface the error.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
__all__ = ["AsyncWriteQueue"]
|
||||
|
||||
|
||||
class AsyncWriteQueue:
|
||||
"""Coalescing write queue on top of a LanceDB AsyncTable.
|
||||
|
||||
The table object only needs to expose ``await add(batch)`` — the
|
||||
tests ship a minimal ``MockAsyncTable`` that satisfies this shape.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
table
|
||||
LanceDB ``AsyncTable`` (or any object with ``async def
|
||||
add(self, batch: list[dict]) -> None``).
|
||||
coalesce_ms
|
||||
Flush window in milliseconds. On every iteration of the
|
||||
coalesce loop we wait at most this long for the next record
|
||||
before flushing whatever we have.
|
||||
max_batch
|
||||
Hard cap on records per ``tbl.add`` call. Reached before the
|
||||
timeout, triggers an immediate flush.
|
||||
max_queue_size
|
||||
Hard cap on buffered (queued + pending) records. The
|
||||
``enqueue()`` call awaits the next flush once the cap is hit.
|
||||
on_flushed
|
||||
Optional callback ``callable(batch: list) -> None`` fired
|
||||
after each successful flush, inside the queue's event loop,
|
||||
before pending futures are resolved. Exceptions raised by the
|
||||
callback are swallowed (logged as a no-op) so a bad hook can
|
||||
never break the write path.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
table: Any,
|
||||
*,
|
||||
coalesce_ms: int = 100,
|
||||
max_batch: int = 128,
|
||||
max_queue_size: int = 4096,
|
||||
on_flushed: Optional[Callable[[list], None]] = None,
|
||||
) -> None:
|
||||
self._table = table
|
||||
self._coalesce_s: float = max(coalesce_ms, 1) / 1000.0
|
||||
self._max_batch: int = max(max_batch, 1)
|
||||
self._max_queue_size: int = max(max_queue_size, 1)
|
||||
self._on_flushed = on_flushed
|
||||
|
||||
# Runtime state (set in start()).
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._queue: Optional[asyncio.Queue] = None
|
||||
# Event set after every flush so back-pressured enqueues can wake.
|
||||
self._flush_event: Optional[asyncio.Event] = None
|
||||
self._coalesce_task: Optional[asyncio.Task] = None
|
||||
self._stopping: bool = False
|
||||
self._stopped: bool = False
|
||||
|
||||
# ------------------------------------------------------------------ lifecycle
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Attach to the current loop and spin up the coalesce task."""
|
||||
if self._coalesce_task is not None:
|
||||
return
|
||||
self._loop = asyncio.get_running_loop()
|
||||
self._queue = asyncio.Queue()
|
||||
self._flush_event = asyncio.Event()
|
||||
self._stopping = False
|
||||
self._stopped = False
|
||||
self._coalesce_task = asyncio.create_task(
|
||||
self._coalesce_loop(), name="iai-mcp-write-coalesce"
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Drain pending records, flush them, then shut the loop down.
|
||||
|
||||
Idempotent: calling stop() on an already-stopped queue is a
|
||||
no-op.
|
||||
"""
|
||||
if self._stopped:
|
||||
return
|
||||
self._stopping = True
|
||||
assert self._queue is not None
|
||||
# Sentinel wakes the coalesce loop out of its wait_for on an
|
||||
# otherwise-empty queue.
|
||||
await self._queue.put(_SENTINEL)
|
||||
if self._coalesce_task is not None:
|
||||
await self._coalesce_task
|
||||
self._coalesce_task = None
|
||||
self._stopped = True
|
||||
|
||||
# ------------------------------------------------------------------ enqueue
|
||||
|
||||
async def enqueue(self, record: Any) -> asyncio.Future:
|
||||
"""Append ``record`` to the coalesce buffer.
|
||||
|
||||
Returns a Future that resolves to ``None`` after the record's
|
||||
batch has been flushed (``tbl.add`` returned), or resolves
|
||||
with the exception raised by ``tbl.add`` for that batch.
|
||||
|
||||
Blocks (awaits) when the queue is already at ``max_queue_size``
|
||||
until a flush frees a slot.
|
||||
"""
|
||||
if self._stopped or self._stopping:
|
||||
raise RuntimeError("AsyncWriteQueue is stopped; cannot enqueue")
|
||||
assert self._queue is not None and self._flush_event is not None
|
||||
|
||||
# Back-pressure: wait for a flush if we're already at the cap.
|
||||
# Use a loop because multiple concurrent enqueues may race on
|
||||
# the same wake-up.
|
||||
while self._queue.qsize() >= self._max_queue_size:
|
||||
self._flush_event.clear()
|
||||
await self._flush_event.wait()
|
||||
|
||||
fut: asyncio.Future = self._loop.create_future() # type: ignore[union-attr]
|
||||
await self._queue.put(_Pending(record=record, future=fut))
|
||||
return fut
|
||||
|
||||
# ------------------------------------------------------------------ internals
|
||||
|
||||
async def _coalesce_loop(self) -> None:
|
||||
"""Main loop: collect up to ``max_batch`` records per window,
|
||||
then flush. Exits after the sentinel drain when ``stop()``
|
||||
is called.
|
||||
"""
|
||||
assert self._queue is not None and self._flush_event is not None
|
||||
while True:
|
||||
batch: list[_Pending] = []
|
||||
# First item: block indefinitely until we get something or
|
||||
# the sentinel arrives.
|
||||
first = await self._queue.get()
|
||||
if first is _SENTINEL:
|
||||
# Drain any stragglers that snuck in before the sentinel.
|
||||
while not self._queue.empty():
|
||||
item = self._queue.get_nowait()
|
||||
if item is _SENTINEL:
|
||||
continue
|
||||
batch.append(item)
|
||||
if batch:
|
||||
await self._flush(batch)
|
||||
return
|
||||
batch.append(first)
|
||||
|
||||
# Fill the batch within the coalesce window.
|
||||
deadline = self._loop.time() + self._coalesce_s # type: ignore[union-attr]
|
||||
while len(batch) < self._max_batch:
|
||||
remaining = deadline - self._loop.time() # type: ignore[union-attr]
|
||||
if remaining <= 0:
|
||||
break
|
||||
try:
|
||||
item = await asyncio.wait_for(
|
||||
self._queue.get(), timeout=remaining
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
break
|
||||
if item is _SENTINEL:
|
||||
# Flush what we have, then re-enter the outer loop
|
||||
# to let the sentinel branch above handle shutdown.
|
||||
await self._flush(batch)
|
||||
# Re-queue the sentinel so the outer loop sees it.
|
||||
await self._queue.put(_SENTINEL)
|
||||
batch = []
|
||||
break
|
||||
batch.append(item)
|
||||
|
||||
if batch:
|
||||
await self._flush(batch)
|
||||
|
||||
async def _flush(self, batch: list[_Pending]) -> None:
|
||||
"""Push a batch through ``tbl.add`` and resolve each Future."""
|
||||
records = [p.record for p in batch]
|
||||
try:
|
||||
await self._table.add(records)
|
||||
except BaseException as exc: # noqa: BLE001
|
||||
for p in batch:
|
||||
if not p.future.done():
|
||||
p.future.set_exception(exc)
|
||||
self._notify_flushed()
|
||||
return
|
||||
|
||||
# Hook first (synchronous, in-loop) — so graph-sync observes
|
||||
# the write before any caller that awaits the future can race
|
||||
# against the in-RAM graph.
|
||||
if self._on_flushed is not None:
|
||||
try:
|
||||
self._on_flushed(records)
|
||||
except Exception:
|
||||
# Invariant: a bad hook can never break the write
|
||||
# path. Swallow; structured logging lives in the
|
||||
# hook owner (store._fire_graph_sync_hook already
|
||||
# handles this for the graph-sync case).
|
||||
pass
|
||||
for p in batch:
|
||||
if not p.future.done():
|
||||
p.future.set_result(None)
|
||||
self._notify_flushed()
|
||||
|
||||
def _notify_flushed(self) -> None:
|
||||
"""Wake any enqueue() calls that are back-pressured."""
|
||||
if self._flush_event is not None and not self._flush_event.is_set():
|
||||
self._flush_event.set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------- internals
|
||||
|
||||
|
||||
class _Pending:
|
||||
"""Record + the Future its caller is awaiting. Tiny wrapper so we
|
||||
can drop it onto asyncio.Queue without worrying about dataclass
|
||||
equality semantics (Futures don't hash)."""
|
||||
|
||||
__slots__ = ("record", "future")
|
||||
|
||||
def __init__(self, record: Any, future: asyncio.Future) -> None:
|
||||
self.record = record
|
||||
self.future = future
|
||||
|
||||
|
||||
class _Sentinel:
|
||||
"""Marker object for graceful shutdown."""
|
||||
|
||||
__repr__ = lambda self: "<AsyncWriteQueue.sentinel>" # noqa: E731
|
||||
|
||||
|
||||
_SENTINEL = _Sentinel()
|
||||
Loading…
Add table
Add a link
Reference in a new issue