/** * iai-mcp memory-injection plugin for OpenCode (approach A). * * Pulls session-start memory from the iai-mcp daemon's localhost HTTP adapter * and injects it directly into the model's system prompt via the * `experimental.chat.system.transform` hook. The memory CONTENT is placed in * context — no tool call, no injected "INIT" user turn, so the session title * is generated from the user's real first message (clean). * * REPLACES iai-mcp-session-init.js — do NOT run both. session-init forces a * tool call via a phantom turn (hijacks the title); this plugin needs neither. * * Requires the daemon's HTTP listener: * - set IAI_DAEMON_HTTP_PORT in the daemon's systemd unit (e.g. "0" for an * OS-assigned port) and restart it. The daemon writes the live port to * ~/.iai-mcp/.http.port, which this plugin reads. * * wake_depth=standard is requested so l0/l1/l2/rich_club carry real content * (minimal mode returns only opaque handles — nothing worth injecting). * Override via IAI_MCP_WAKE_DEPTH (minimal|standard|deep). * * Fail-safe: any error is swallowed; system-prompt assembly must never break. */ const HOME = process.env.HOME || process.cwd(); const PORT_FILE = `${HOME}/.iai-mcp/.http.port`; const WAKE_DEPTH = process.env.IAI_MCP_WAKE_DEPTH || "standard"; const FETCH_TIMEOUT_MS = 10000; // a cold runtime-graph build can exceed 5s const MAX_ATTEMPTS = 3; // give up after this many failed fetches per session const memo = new Map(); // sessionID -> injected text (cached on success) const attempts = new Map(); // sessionID -> failed-fetch count const inflight = new Set(); // sessionIDs with a fetch in flight (dedupe warm+inject) async function readPort() { const fs = await import("node:fs"); try { const port = parseInt(fs.readFileSync(PORT_FILE, "utf8").trim(), 10); return Number.isInteger(port) && port > 0 ? port : null; } catch { return null; // daemon HTTP not enabled / not up yet } } async function fetchMemory(sessionId) { const port = await readPort(); if (!port) return ""; const url = `http://127.0.0.1:${port}/memory/session-context` + `?session_id=${encodeURIComponent(sessionId)}` + `&wake_depth=${encodeURIComponent(WAKE_DEPTH)}&format=text`; try { const res = await fetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT_MS) }); if (!res.ok) return ""; return (await res.text()).trim(); } catch { return ""; } } // Centralised fetch+cache so the warm-on-create event and the per-turn // transform hook share ONE in-flight fetch, one cache, and one attempt budget. // This dedupes concurrent calls so a session never emits duplicate daemon-side // session_started events. async function ensureMemory(sessionId) { const cached = memo.get(sessionId); if (cached !== undefined) return cached; if (inflight.has(sessionId)) return ""; // a fetch is already running if ((attempts.get(sessionId) || 0) >= MAX_ATTEMPTS) return ""; inflight.add(sessionId); try { const text = await fetchMemory(sessionId); if (text) memo.set(sessionId, text); else attempts.set(sessionId, (attempts.get(sessionId) || 0) + 1); return text; } finally { inflight.delete(sessionId); } } export const IaiMcpMemoryInject = async () => { return { // Warm-on-create: prime the daemon graph cache (and our memo) as soon as a // session appears — before the user's first turn — so the first transform // injects immediately instead of paying the cold-build latency. event: async ({ event }) => { if (event.type !== "session.updated") return; const sid = event.properties?.info?.id; if (!sid) return; try { await ensureMemory(sid); } catch { // never throw from an event handler } }, "experimental.chat.system.transform": async (input, output) => { try { const sid = input?.sessionID; if (!sid || !output || !Array.isArray(output.system)) return; const text = await ensureMemory(sid); if (text) { output.system.push(`# iai-mcp memory (session start)\n${text}`); } } catch (err) { // NEVER throw — a plugin error must not break system-prompt assembly. console.error(`[iai-mcp] memory inject failed: ${err.message}`); } }, }; };