From 7559bac57f8e615c66051bbe29d37f3d4f6c399d Mon Sep 17 00:00:00 2001 From: Apunkt Date: Wed, 3 Jun 2026 16:47:55 +0200 Subject: [PATCH] fix(opencode): fix memory-inject race condition, empty-memory ban, add turn limit Three critical fixes to the iai-mcp-memory-inject.js plugin: - Race condition: inflight is now a Map so concurrent callers (warm-on-create event + first transform hook) await the same fetch instead of getting empty string. First turn now gets memory. - Empty-memory ban: fetchMemory returns {ok, text} tuple. HTTP 200 with empty body is memoized as valid (new user, minimal mode). Only actual network errors count against the retry budget. - Turn-limited injection: new INJECT_MAX_TURNS (default 3) stops injecting after N turns. Memory was already in system prompt; the model has seen it. Controls per-turn token cost that was 50x the old approach. Also: README now documents memory-inject.js (not session-init), and session-init.js is marked @deprecated. --- README.md | 12 ++--- deploy/opencode/iai-mcp-memory-inject.js | 66 ++++++++++++++++++------ deploy/opencode/iai-mcp-session-init.js | 15 ++++-- 3 files changed, 69 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 1c4454f..62fb52c 100644 --- a/README.md +++ b/README.md @@ -142,13 +142,13 @@ mkdir -p ~/.config/opencode/plugins cp deploy/opencode/iai-mcp-capture.js ~/.config/opencode/plugins/ ``` -**Session-init plugin** — automatically loads session background memory on every new session. The model calls `memory_session_context` before responding, so it has your identity, topic cluster, and rich club context from turn one. +**Memory-inject plugin** — automatically loads session background memory on every new session via the daemon's HTTP adapter. Memory is injected into the model's system prompt (clean, no phantom messages, preserves session title). ```bash -cp deploy/opencode/iai-mcp-session-init.js ~/.config/opencode/plugins/ +cp deploy/opencode/iai-mcp-memory-inject.js ~/.config/opencode/plugins/ ``` -Note: a brief "INIT" message will appear in chat on the first turn. This is the mechanism that delivers the system instruction to the model. +This plugin requires the daemon's HTTP listener. Set `IAI_DAEMON_HTTP_PORT=0` in your daemon's systemd/launchd config and restart it. The daemon writes the live port to `~/.iai-mcp/.http.port`, which the plugin reads automatically. Make sure `@opencode-ai/plugin` is installed: @@ -163,7 +163,7 @@ Register both plugins in `~/.config/opencode/config.json`: { "plugin": [ "~/.config/opencode/plugins/iai-mcp-capture.js", - "~/.config/opencode/plugins/iai-mcp-session-init.js" + "~/.config/opencode/plugins/iai-mcp-memory-inject.js" ] } ``` @@ -176,7 +176,7 @@ Add the MCP server and plugins to `~/.config/opencode/config.json`: { "plugin": [ "~/.config/opencode/plugins/iai-mcp-capture.js", - "~/.config/opencode/plugins/iai-mcp-session-init.js" + "~/.config/opencode/plugins/iai-mcp-memory-inject.js" ], "mcp": { "iai-mcp": { @@ -385,7 +385,7 @@ Limitations worth knowing about: Claude Code is the primary host, validated in daily use. -OpenCode is supported via the `iai-mcp-capture.js` and `iai-mcp-session-init.js` plugins (see Install the OpenCode plugins above) and MCP server config in `~/.config/opencode/config.json`. +OpenCode is supported via the `iai-mcp-capture.js` and `iai-mcp-memory-inject.js` plugins (see Install the OpenCode plugins above) and MCP server config in `~/.config/opencode/config.json`. The memory-inject plugin requires the daemon's HTTP listener (`IAI_DAEMON_HTTP_PORT` env var). Claude Desktop should work (uses `claude_desktop_config.json` instead of `~/.claude.json`) but hasn't been tested end to end. diff --git a/deploy/opencode/iai-mcp-memory-inject.js b/deploy/opencode/iai-mcp-memory-inject.js index a1c47a3..81162e4 100644 --- a/deploy/opencode/iai-mcp-memory-inject.js +++ b/deploy/opencode/iai-mcp-memory-inject.js @@ -26,11 +26,13 @@ const HOME = process.env.HOME || process.cwd(); const PORT_FILE = `${HOME}/.iai-mcp/.http.port`; const WAKE_DEPTH = process.env.IAI_MCP_WAKE_DEPTH || "standard"; const FETCH_TIMEOUT_MS = 10000; // a cold runtime-graph build can exceed 5s -const MAX_ATTEMPTS = 3; // give up after this many failed fetches per session +const MAX_ATTEMPTS = 3; // give up after this many consecutive failed fetches per session +const INJECT_MAX_TURNS = 3; // stop injecting after N turns (token-cost control) -const memo = new Map(); // sessionID -> injected text (cached on success) -const attempts = new Map(); // sessionID -> failed-fetch count -const inflight = new Set(); // sessionIDs with a fetch in flight (dedupe warm+inject) +const memo = new Map(); // sessionID -> injected text | null (null = permanent failure) +const inflight = new Map(); // sessionID -> Promise (await for concurrent callers) +const attempts = new Map(); // sessionID -> consecutive-failed-fetch count +const turns = new Map(); // sessionID -> number of times memory was injected async function readPort() { const fs = await import("node:fs"); @@ -44,17 +46,19 @@ async function readPort() { async function fetchMemory(sessionId) { const port = await readPort(); - if (!port) return ""; + if (!port) return { ok: false, text: "" }; const url = `http://127.0.0.1:${port}/memory/session-context` + `?session_id=${encodeURIComponent(sessionId)}` + `&wake_depth=${encodeURIComponent(WAKE_DEPTH)}&format=text`; try { const res = await fetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT_MS) }); - if (!res.ok) return ""; - return (await res.text()).trim(); + const text = (await res.text()).trim(); + // Distinguish network failure from valid-but-empty response. + // An empty body on HTTP 200 is a valid result (new user, minimal mode). + return { ok: res.ok, text }; } catch { - return ""; + return { ok: false, text: "" }; } } @@ -62,17 +66,41 @@ async function fetchMemory(sessionId) { // transform hook share ONE in-flight fetch, one cache, and one attempt budget. // This dedupes concurrent calls so a session never emits duplicate daemon-side // session_started events. +// +// Fix: inflight stores a Promise so concurrent callers await the same fetch +// instead of getting "". Fix: memo stores null for permanent failure so we +// don't retry forever. Fix: distinguish {ok:true,text:""} (valid empty) from +// {ok:false} (network error). async function ensureMemory(sessionId) { + // Already resolved — return cached value (may be "" or null). const cached = memo.get(sessionId); if (cached !== undefined) return cached; - if (inflight.has(sessionId)) return ""; // a fetch is already running - if ((attempts.get(sessionId) || 0) >= MAX_ATTEMPTS) return ""; - inflight.add(sessionId); - try { - const text = await fetchMemory(sessionId); - if (text) memo.set(sessionId, text); - else attempts.set(sessionId, (attempts.get(sessionId) || 0) + 1); + + // A fetch is already running — await it instead of returning "". + const existing = inflight.get(sessionId); + if (existing) return existing; + + // Permanent failure — stop retrying. + if ((attempts.get(sessionId) || 0) >= MAX_ATTEMPTS) { + memo.set(sessionId, null); + return null; + } + + const promise = (async () => { + const { ok, text } = await fetchMemory(sessionId); + if (ok) { + // HTTP 200 — memoize regardless of content (empty is valid). + memo.set(sessionId, text); + attempts.delete(sessionId); // reset failure counter on success + } else { + // Network error — increment counter for retry budget. + attempts.set(sessionId, (attempts.get(sessionId) || 0) + 1); + } return text; + })(); + inflight.set(sessionId, promise); + try { + return await promise; } finally { inflight.delete(sessionId); } @@ -98,8 +126,16 @@ export const IaiMcpMemoryInject = async () => { try { const sid = input?.sessionID; if (!sid || !output || !Array.isArray(output.system)) return; + + // Turn-limited injection: stop after INJECT_MAX_TURNS to control token cost. + // Memory was already in the system prompt for the first N turns; the model + // has seen it and doesn't need repetition every turn. + const currentTurns = turns.get(sid) || 0; + if (currentTurns >= INJECT_MAX_TURNS) return; + const text = await ensureMemory(sid); if (text) { + turns.set(sid, currentTurns + 1); output.system.push(`# iai-mcp memory (session start)\n${text}`); } } catch (err) { diff --git a/deploy/opencode/iai-mcp-session-init.js b/deploy/opencode/iai-mcp-session-init.js index 07adfbf..0949d76 100644 --- a/deploy/opencode/iai-mcp-session-init.js +++ b/deploy/opencode/iai-mcp-session-init.js @@ -1,8 +1,17 @@ /** - * iai-mcp session-init plugin for OpenCode. + * @deprecated Use `iai-mcp-memory-inject.js` instead. * - * Sends SDK prompt on session.updated to inject the system instruction. - * The model calls memory_session_context on every new session. + * This plugin forces a tool call via a phantom "INIT" user turn, which + * corrupts session title generation. The memory-inject plugin uses the + * system.transform hook to inject memory directly into the system prompt + * — no phantom turn, clean titles, memory visible every turn. + * + * Retained temporarily for backward compatibility. Will be removed in a + * future release. Migrate to iai-mcp-memory-inject.js: + * + * cp deploy/opencode/iai-mcp-memory-inject.js ~/.config/opencode/plugins/ + * + * Then replace this plugin reference in ~/.config/opencode/config.json. */ export const IaiMcpSessionInit = async ({