/** * iai-mcp memory-injection plugin for OpenCode (approach A). * * Pulls session-start memory from the iai-mcp daemon's localhost HTTP adapter * and injects it directly into the model's system prompt via the * `experimental.chat.system.transform` hook. The memory CONTENT is placed in * context — no tool call, no injected "INIT" user turn, so the session title * is generated from the user's real first message (clean). * * REPLACES iai-mcp-session-init.js — do NOT run both. session-init forces a * tool call via a phantom turn (hijacks the title); this plugin needs neither. * * Requires the daemon's HTTP listener: * - set IAI_DAEMON_HTTP_PORT in the daemon's systemd unit (e.g. "0" for an * OS-assigned port) and restart it. The daemon writes the live port to * ~/.iai-mcp/.http.port, which this plugin reads. * * wake_depth=standard is requested so l0/l1/l2/rich_club carry real content * (minimal mode returns only opaque handles — nothing worth injecting). * Override via IAI_MCP_WAKE_DEPTH (minimal|standard|deep). * * Fail-safe: any error is swallowed; system-prompt assembly must never break. */ const HOME = process.env.HOME || process.cwd(); const PORT_FILE = `${HOME}/.iai-mcp/.http.port`; const WAKE_DEPTH = process.env.IAI_MCP_WAKE_DEPTH || "standard"; const FETCH_TIMEOUT_MS = 10000; // a cold runtime-graph build can exceed 5s const MAX_ATTEMPTS = 3; // give up after this many consecutive failed fetches per session const INJECT_MAX_TURNS = 3; // stop injecting after N turns (token-cost control) const memo = new Map(); // sessionID -> injected text | null (null = permanent failure) const inflight = new Map(); // sessionID -> Promise (await for concurrent callers) const attempts = new Map(); // sessionID -> consecutive-failed-fetch count const turns = new Map(); // sessionID -> number of times memory was injected async function readPort() { const fs = await import("node:fs"); try { const port = parseInt(fs.readFileSync(PORT_FILE, "utf8").trim(), 10); return Number.isInteger(port) && port > 0 ? port : null; } catch { return null; // daemon HTTP not enabled / not up yet } } async function fetchMemory(sessionId) { const port = await readPort(); if (!port) return { ok: false, text: "" }; const url = `http://127.0.0.1:${port}/memory/session-context` + `?session_id=${encodeURIComponent(sessionId)}` + `&wake_depth=${encodeURIComponent(WAKE_DEPTH)}&format=text`; try { const res = await fetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT_MS) }); const text = (await res.text()).trim(); // Distinguish network failure from valid-but-empty response. // An empty body on HTTP 200 is a valid result (new user, minimal mode). return { ok: res.ok, text }; } catch { return { ok: false, text: "" }; } } // Centralised fetch+cache so the warm-on-create event and the per-turn // transform hook share ONE in-flight fetch, one cache, and one attempt budget. // This dedupes concurrent calls so a session never emits duplicate daemon-side // session_started events. // // Fix: inflight stores a Promise so concurrent callers await the same fetch // instead of getting "". Fix: memo stores null for permanent failure so we // don't retry forever. Fix: distinguish {ok:true,text:""} (valid empty) from // {ok:false} (network error). async function ensureMemory(sessionId) { // Already resolved — return cached value (may be "" or null). const cached = memo.get(sessionId); if (cached !== undefined) return cached; // A fetch is already running — await it instead of returning "". const existing = inflight.get(sessionId); if (existing) return existing; // Permanent failure — stop retrying. if ((attempts.get(sessionId) || 0) >= MAX_ATTEMPTS) { memo.set(sessionId, null); return null; } const promise = (async () => { const { ok, text } = await fetchMemory(sessionId); if (ok) { // HTTP 200 — memoize regardless of content (empty is valid). memo.set(sessionId, text); attempts.delete(sessionId); // reset failure counter on success } else { // Network error — increment counter for retry budget. attempts.set(sessionId, (attempts.get(sessionId) || 0) + 1); } return text; })(); inflight.set(sessionId, promise); try { return await promise; } finally { inflight.delete(sessionId); } } export const IaiMcpMemoryInject = async () => { return { // Warm-on-create: prime the daemon graph cache (and our memo) as soon as a // session appears — before the user's first turn — so the first transform // injects immediately instead of paying the cold-build latency. event: async ({ event }) => { if (event.type !== "session.updated") return; const sid = event.properties?.info?.id; if (!sid) return; try { await ensureMemory(sid); } catch { // never throw from an event handler } }, "experimental.chat.system.transform": async (input, output) => { try { const sid = input?.sessionID; if (!sid || !output || !Array.isArray(output.system)) return; // Turn-limited injection: stop after INJECT_MAX_TURNS to control token cost. // Memory was already in the system prompt for the first N turns; the model // has seen it and doesn't need repetition every turn. const currentTurns = turns.get(sid) || 0; if (currentTurns >= INJECT_MAX_TURNS) return; const text = await ensureMemory(sid); if (text) { turns.set(sid, currentTurns + 1); output.system.push(`# iai-mcp memory (session start)\n${text}`); } } catch (err) { // NEVER throw — a plugin error must not break system-prompt assembly. console.error(`[iai-mcp] memory inject failed: ${err.message}`); } }, }; };