iai-mcp-opencode/deploy/opencode/iai-mcp-memory-inject.js

/**
 * iai-mcp memory-injection plugin for OpenCode (approach A).
 *
 * Pulls session-start memory from the iai-mcp daemon's localhost HTTP adapter
 * and injects it directly into the model's system prompt via the
 * `experimental.chat.system.transform` hook. The memory CONTENT is placed in
 * context — no tool call, no injected "INIT" user turn, so the session title
 * is generated from the user's real first message (clean).
 *
 * REPLACES iai-mcp-session-init.js — do NOT run both. session-init forces a
 * tool call via a phantom turn (hijacks the title); this plugin needs neither.
 *
 * Requires the daemon's HTTP listener:
 *   - set IAI_DAEMON_HTTP_PORT in the daemon's systemd unit (e.g. "0" for an
 *     OS-assigned port) and restart it. The daemon writes the live port to
 *     ~/.iai-mcp/.http.port, which this plugin reads.
 *
 * wake_depth=standard is requested so l0/l1/l2/rich_club carry real content
 * (minimal mode returns only opaque handles — nothing worth injecting).
 * Override via IAI_MCP_WAKE_DEPTH (minimal|standard|deep).
 *
 * Fail-safe: any error is swallowed; system-prompt assembly must never break.
 */

const HOME = process.env.HOME || process.cwd();
const PORT_FILE = `${HOME}/.iai-mcp/.http.port`;
const WAKE_DEPTH = process.env.IAI_MCP_WAKE_DEPTH || "standard";
const FETCH_TIMEOUT_MS = 10000; // a cold runtime-graph build can exceed 5s
const MAX_ATTEMPTS = 3; // give up after this many consecutive failed fetches per session
const INJECT_MAX_TURNS = 3; // stop injecting after N turns (token-cost control)

const memo = new Map(); // sessionID -> injected text | null (null = permanent failure)
const inflight = new Map(); // sessionID -> Promise (await for concurrent callers)
const attempts = new Map(); // sessionID -> consecutive-failed-fetch count
const turns = new Map(); // sessionID -> number of times memory was injected

async function readPort() {
  const fs = await import("node:fs");
  try {
    const port = parseInt(fs.readFileSync(PORT_FILE, "utf8").trim(), 10);
    return Number.isInteger(port) && port > 0 ? port : null;
  } catch {
    return null; // daemon HTTP not enabled / not up yet
  }
}

async function fetchMemory(sessionId) {
  const port = await readPort();
  if (!port) return { ok: false, text: "" };
  const url =
    `http://127.0.0.1:${port}/memory/session-context` +
    `?session_id=${encodeURIComponent(sessionId)}` +
    `&wake_depth=${encodeURIComponent(WAKE_DEPTH)}&format=text`;
  try {
    const res = await fetch(url, { signal: AbortSignal.timeout(FETCH_TIMEOUT_MS) });
    const text = (await res.text()).trim();
    // Distinguish network failure from valid-but-empty response.
    // An empty body on HTTP 200 is a valid result (new user, minimal mode).
    return { ok: res.ok, text };
  } catch {
    return { ok: false, text: "" };
  }
}

// Centralised fetch+cache so the warm-on-create event and the per-turn
// transform hook share ONE in-flight fetch, one cache, and one attempt budget.
// This dedupes concurrent calls so a session never emits duplicate daemon-side
// session_started events.
//
// Fix: inflight stores a Promise so concurrent callers await the same fetch
// instead of getting "". Fix: memo stores null for permanent failure so we
// don't retry forever. Fix: distinguish {ok:true,text:""} (valid empty) from
// {ok:false} (network error).
async function ensureMemory(sessionId) {
  // Already resolved — return cached value (may be "" or null).
  const cached = memo.get(sessionId);
  if (cached !== undefined) return cached;

  // A fetch is already running — await it instead of returning "".
  const existing = inflight.get(sessionId);
  if (existing) return existing;

  // Permanent failure — stop retrying.
  if ((attempts.get(sessionId) || 0) >= MAX_ATTEMPTS) {
    memo.set(sessionId, null);
    return null;
  }

  const promise = (async () => {
    const { ok, text } = await fetchMemory(sessionId);
    if (ok) {
      // HTTP 200 — memoize regardless of content (empty is valid).
      memo.set(sessionId, text);
      attempts.delete(sessionId); // reset failure counter on success
    } else {
      // Network error — increment counter for retry budget.
      attempts.set(sessionId, (attempts.get(sessionId) || 0) + 1);
    }
    return text;
  })();
  inflight.set(sessionId, promise);
  try {
    return await promise;
  } finally {
    inflight.delete(sessionId);
  }
}

export const IaiMcpMemoryInject = async () => {
  return {
    // Warm-on-create: prime the daemon graph cache (and our memo) as soon as a
    // session appears — before the user's first turn — so the first transform
    // injects immediately instead of paying the cold-build latency.
    event: async ({ event }) => {
      if (event.type !== "session.updated") return;
      const sid = event.properties?.info?.id;
      if (!sid) return;
      try {
        await ensureMemory(sid);
      } catch {
        // never throw from an event handler
      }
    },

    "experimental.chat.system.transform": async (input, output) => {
      try {
        const sid = input?.sessionID;
        if (!sid || !output || !Array.isArray(output.system)) return;

        // Turn-limited injection: stop after INJECT_MAX_TURNS to control token cost.
        // Memory was already in the system prompt for the first N turns; the model
        // has seen it and doesn't need repetition every turn.
        const currentTurns = turns.get(sid) || 0;
        if (currentTurns >= INJECT_MAX_TURNS) return;

        const text = await ensureMemory(sid);
        if (text) {
          turns.set(sid, currentTurns + 1);
          output.system.push(`# iai-mcp memory (session start)\n${text}`);
        }
      } catch (err) {
        // NEVER throw — a plugin error must not break system-prompt assembly.
        console.error(`[iai-mcp] memory inject failed: ${err.message}`);
      }
    },
  };
};