Three critical fixes to the iai-mcp-memory-inject.js plugin:
- Race condition: inflight is now a Map<sessionID, Promise> so concurrent
callers (warm-on-create event + first transform hook) await the same
fetch instead of getting empty string. First turn now gets memory.
- Empty-memory ban: fetchMemory returns {ok, text} tuple. HTTP 200 with
empty body is memoized as valid (new user, minimal mode). Only actual
network errors count against the retry budget.
- Turn-limited injection: new INJECT_MAX_TURNS (default 3) stops injecting
after N turns. Memory was already in system prompt; the model has seen
it. Controls per-turn token cost that was 50x the old approach.
Also: README now documents memory-inject.js (not session-init), and
session-init.js is marked @deprecated.
147 lines
5.8 KiB
JavaScript
147 lines
5.8 KiB
JavaScript
/**
|
|
* iai-mcp memory-injection plugin for OpenCode (approach A).
|
|
*
|
|
* Pulls session-start memory from the iai-mcp daemon's localhost HTTP adapter
|
|
* and injects it directly into the model's system prompt via the
|
|
* `experimental.chat.system.transform` hook. The memory CONTENT is placed in
|
|
* context — no tool call, no injected "INIT" user turn, so the session title
|
|
* is generated from the user's real first message (clean).
|
|
*
|
|
* REPLACES iai-mcp-session-init.js — do NOT run both. session-init forces a
|
|
* tool call via a phantom turn (hijacks the title); this plugin needs neither.
|
|
*
|
|
* Requires the daemon's HTTP listener:
|
|
* - set IAI_DAEMON_HTTP_PORT in the daemon's systemd unit (e.g. "0" for an
|
|
* OS-assigned port) and restart it. The daemon writes the live port to
|
|
* ~/.iai-mcp/.http.port, which this plugin reads.
|
|
*
|
|
* wake_depth=standard is requested so l0/l1/l2/rich_club carry real content
|
|
* (minimal mode returns only opaque handles — nothing worth injecting).
|
|
* Override via IAI_MCP_WAKE_DEPTH (minimal|standard|deep).
|
|
*
|
|
* Fail-safe: any error is swallowed; system-prompt assembly must never break.
|
|
*/
|
|
|
|
const HOME = process.env.HOME || process.cwd();
|
|
const PORT_FILE = `${HOME}/.iai-mcp/.http.port`;
|
|
const WAKE_DEPTH = process.env.IAI_MCP_WAKE_DEPTH || "standard";
|
|
const FETCH_TIMEOUT_MS = 10000; // a cold runtime-graph build can exceed 5s
|
|
const MAX_ATTEMPTS = 3; // give up after this many consecutive failed fetches per session
|
|
const INJECT_MAX_TURNS = 3; // stop injecting after N turns (token-cost control)
|
|
|
|
const memo = new Map(); // sessionID -> injected text | null (null = permanent failure)
|
|
const inflight = new Map(); // sessionID -> Promise (await for concurrent callers)
|
|
const attempts = new Map(); // sessionID -> consecutive-failed-fetch count
|
|
const turns = new Map(); // sessionID -> number of times memory was injected
|
|
|
|
async function readPort() {
|
|
const fs = await import("node:fs");
|
|
try {
|
|
const port = parseInt(fs.readFileSync(PORT_FILE, "utf8").trim(), 10);
|
|
return Number.isInteger(port) && port > 0 ? port : null;
|
|
} catch {
|
|
return null; // daemon HTTP not enabled / not up yet
|
|
}
|
|
}
|
|
|
|
async function fetchMemory(sessionId) {
|
|
const port = await readPort();
|
|
if (!port) return { ok: false, text: "" };
|
|
const url =
|
|
`http://127.0.0.1:${port}/memory/session-context` +
|
|
`?session_id=${encodeURIComponent(sessionId)}` +
|
|
`&wake_depth=${encodeURIComponent(WAKE_DEPTH)}&format=text`;
|
|
try {
|
|
const res = await fetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT_MS) });
|
|
const text = (await res.text()).trim();
|
|
// Distinguish network failure from valid-but-empty response.
|
|
// An empty body on HTTP 200 is a valid result (new user, minimal mode).
|
|
return { ok: res.ok, text };
|
|
} catch {
|
|
return { ok: false, text: "" };
|
|
}
|
|
}
|
|
|
|
// Centralised fetch+cache so the warm-on-create event and the per-turn
|
|
// transform hook share ONE in-flight fetch, one cache, and one attempt budget.
|
|
// This dedupes concurrent calls so a session never emits duplicate daemon-side
|
|
// session_started events.
|
|
//
|
|
// Fix: inflight stores a Promise so concurrent callers await the same fetch
|
|
// instead of getting "". Fix: memo stores null for permanent failure so we
|
|
// don't retry forever. Fix: distinguish {ok:true,text:""} (valid empty) from
|
|
// {ok:false} (network error).
|
|
async function ensureMemory(sessionId) {
|
|
// Already resolved — return cached value (may be "" or null).
|
|
const cached = memo.get(sessionId);
|
|
if (cached !== undefined) return cached;
|
|
|
|
// A fetch is already running — await it instead of returning "".
|
|
const existing = inflight.get(sessionId);
|
|
if (existing) return existing;
|
|
|
|
// Permanent failure — stop retrying.
|
|
if ((attempts.get(sessionId) || 0) >= MAX_ATTEMPTS) {
|
|
memo.set(sessionId, null);
|
|
return null;
|
|
}
|
|
|
|
const promise = (async () => {
|
|
const { ok, text } = await fetchMemory(sessionId);
|
|
if (ok) {
|
|
// HTTP 200 — memoize regardless of content (empty is valid).
|
|
memo.set(sessionId, text);
|
|
attempts.delete(sessionId); // reset failure counter on success
|
|
} else {
|
|
// Network error — increment counter for retry budget.
|
|
attempts.set(sessionId, (attempts.get(sessionId) || 0) + 1);
|
|
}
|
|
return text;
|
|
})();
|
|
inflight.set(sessionId, promise);
|
|
try {
|
|
return await promise;
|
|
} finally {
|
|
inflight.delete(sessionId);
|
|
}
|
|
}
|
|
|
|
export const IaiMcpMemoryInject = async () => {
|
|
return {
|
|
// Warm-on-create: prime the daemon graph cache (and our memo) as soon as a
|
|
// session appears — before the user's first turn — so the first transform
|
|
// injects immediately instead of paying the cold-build latency.
|
|
event: async ({ event }) => {
|
|
if (event.type !== "session.updated") return;
|
|
const sid = event.properties?.info?.id;
|
|
if (!sid) return;
|
|
try {
|
|
await ensureMemory(sid);
|
|
} catch {
|
|
// never throw from an event handler
|
|
}
|
|
},
|
|
|
|
"experimental.chat.system.transform": async (input, output) => {
|
|
try {
|
|
const sid = input?.sessionID;
|
|
if (!sid || !output || !Array.isArray(output.system)) return;
|
|
|
|
// Turn-limited injection: stop after INJECT_MAX_TURNS to control token cost.
|
|
// Memory was already in the system prompt for the first N turns; the model
|
|
// has seen it and doesn't need repetition every turn.
|
|
const currentTurns = turns.get(sid) || 0;
|
|
if (currentTurns >= INJECT_MAX_TURNS) return;
|
|
|
|
const text = await ensureMemory(sid);
|
|
if (text) {
|
|
turns.set(sid, currentTurns + 1);
|
|
output.system.push(`# iai-mcp memory (session start)\n${text}`);
|
|
}
|
|
} catch (err) {
|
|
// NEVER throw — a plugin error must not break system-prompt assembly.
|
|
console.error(`[iai-mcp] memory inject failed: ${err.message}`);
|
|
}
|
|
},
|
|
};
|
|
};
|