"""Plan 07.1-04 R2/A6 acceptance — bridge.ts is a pure connector (no spawn). # History This file was renamed-in-place from the pre-Phase-7.1 test of the same name. The pre-Phase-7.1 file asserted spawn-fallback behavior: - test_cold_start_spawns_daemon_under_5s — asserted that the wrapper SPAWNS `python -m iai_mcp.daemon` when the socket is missing (`daemon_delta >= 1`). - test_warm_start_reuses_daemon_under_250ms — relied on wrapper #1 to bootstrap the daemon via spawn so wrapper #2 could attach. Phase 7.1 (this plan, 07.1-04) DELETES bridge.ts's spawn capability: the wrapper now ONLY connects to ~/.iai-mcp/.daemon.sock with a 5s timeout; on miss it throws `DaemonUnreachableError` (code -32002) and the wrapper process exits non-zero. Daemon spawning is now launchd's job (Wave 1 plist + Wave 2 install.sh + Wave 2 LISTEN_FDS branch). Both pre-7.1 tests therefore had to be restructured: - Old `test_cold_start_spawns_daemon_under_5s` is REPLACED by `test_start_throws_DaemonUnreachableError_when_socket_missing` which asserts the inverse: NO daemon spawned, wrapper exits non-zero with the new error in stderr. - Old `test_warm_start_reuses_daemon_under_250ms` is REPLACED by `test_start_succeeds_with_warm_daemon_no_extra_spawn` which pre-starts a daemon manually (subprocess.Popen of `python -m iai_mcp.daemon`), waits for socket bind, then spawns the wrapper and asserts initialize handshake succeeds AND daemon process count delta == 0 (the wrapper did NOT spawn a second daemon). # Test isolation strategy Both tests use IAI_DAEMON_SOCKET_PATH env override (HIGH-4 lock at bridge.ts module top — verified preserved through Plan 07.1-04 Task 1 edit) so they target a tmp socket and never touch the user's real ~/.iai-mcp/.daemon.sock — the production daemon (if any) is not disturbed. Delta-snapshot psutil pattern (lesson from / 07-04 SUMMARYs): we count `iai_mcp.daemon` processes BEFORE and AFTER the wrapper boot and assert the DELTA, not the absolute. On a developer machine with a live production daemon, `before["daemon"] >= 1`; an absolute `assert after["daemon"] == 1` would falsely fail. # Pattern reuse Helpers (`_count_iai_mcp_processes`, `_kill_test_daemons`, `_spawn_wrapper`, `_initialize`, `_call_memory_recall`, `_wait_for_daemon_socket`) and the `built_wrapper` fixture are kept verbatim from the pre-7.1 file — they remain valid scaffolding. The `_count_iai_mcp_processes` shape mirrors `tests/test_socket_subagent_reuse.py` and `tests/test_socket_fail_loud.py`. """ from __future__ import annotations import json import os import signal import subprocess import sys import time from pathlib import Path import psutil import pytest REPO = Path(__file__).resolve().parent.parent WRAPPER = REPO / "mcp-wrapper" # --------------------------------------------------------------------------- # Fixture: built wrapper (npm install + npm run build once per module). # --------------------------------------------------------------------------- @pytest.fixture(scope="module") def built_wrapper() -> Path: """Build the TS wrapper once per test module; reuse across tests.""" if not (WRAPPER / "node_modules").exists(): subprocess.run(["npm", "install"], cwd=WRAPPER, check=True) subprocess.run(["npm", "run", "build"], cwd=WRAPPER, check=True) dist = WRAPPER / "dist" / "index.js" assert dist.exists(), "npm run build should have produced dist/index.js" return dist # --------------------------------------------------------------------------- # Helpers: psutil snapshot, wrapper spawn, MCP handshake + recall round-trip. # --------------------------------------------------------------------------- def _count_iai_mcp_processes() -> dict[str, int]: """Snapshot iai_mcp.core / iai_mcp.daemon process counts. Mirrors `tests/test_socket_fail_loud.py:_count_iai_mcp_processes` — same shape, same delta-snapshot assertion strategy. """ counts = {"core": 0, "daemon": 0} for p in psutil.process_iter(["cmdline"]): try: cl = p.info.get("cmdline") or [] if not cl: continue joined = " ".join(c or "" for c in cl) if "iai_mcp.core" in joined: counts["core"] += 1 if "iai_mcp.daemon" in joined: counts["daemon"] += 1 except (psutil.NoSuchProcess, psutil.AccessDenied): continue return counts def _kill_test_daemons(sock_path: Path) -> None: """Cleanup helper — kill any iai_mcp.daemon processes whose env references the test sock_path. Avoids touching the user's real daemon if one is running.""" sock_str = str(sock_path) for p in psutil.process_iter(["cmdline", "environ"]): try: cl = " ".join(p.info.get("cmdline") or []) if "iai_mcp.daemon" not in cl: continue env = p.info.get("environ") or {} if env.get("IAI_DAEMON_SOCKET_PATH") == sock_str: p.send_signal(signal.SIGTERM) except (psutil.NoSuchProcess, psutil.AccessDenied): continue def _spawn_wrapper( built_wrapper: Path, env_overrides: dict[str, str] | None = None, ) -> subprocess.Popen: """Spawn the built TS wrapper with stdin/stdout pipes for JSON-RPC.""" env = os.environ.copy() env["IAI_MCP_PYTHON"] = sys.executable env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "") if env_overrides: env.update(env_overrides) return subprocess.Popen( ["node", str(built_wrapper)], cwd=str(REPO), env=env, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) def _spawn_daemon_in_background( sock_path: Path, store_dir: Path ) -> subprocess.Popen: """Pre-start a daemon manually via `python -m iai_mcp.daemon`. wrappers no longer spawn the daemon themselves — that's launchd's job in production and the test's job here. We use the manual-run code path (no LISTEN_FDS env set), which the daemon supports unchanged per D7.1-09 (backward compat). """ env = os.environ.copy() env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path) env["IAI_MCP_STORE"] = str(store_dir) env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "120" env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "") return subprocess.Popen( [sys.executable, "-m", "iai_mcp.daemon"], cwd=str(REPO), env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) def _initialize(proc: subprocess.Popen, rpc_id: int = 1) -> dict: """MCP initialize handshake — required before tools/call works.""" assert proc.stdin is not None and proc.stdout is not None init = { "jsonrpc": "2.0", "id": rpc_id, "method": "initialize", "params": { "protocolVersion": "2025-03-26", "capabilities": {}, "clientInfo": {"name": "iai-mcp-bridge-no-spawn-test", "version": "0.1.0"}, }, } proc.stdin.write((json.dumps(init) + "\n").encode("utf-8")) proc.stdin.flush() line = proc.stdout.readline() if not line: raise RuntimeError("wrapper closed stdout before initialize reply") resp = json.loads(line.decode("utf-8")) note = {"jsonrpc": "2.0", "method": "notifications/initialized"} proc.stdin.write((json.dumps(note) + "\n").encode("utf-8")) proc.stdin.flush() return resp def _call_memory_recall( proc: subprocess.Popen, cue: str, rpc_id: int = 2, *, timeout_sec: float = 10.0, ) -> tuple[float, dict]: """Send tools/call memory_recall + return (wall-clock-elapsed, response).""" assert proc.stdin is not None and proc.stdout is not None req = { "jsonrpc": "2.0", "id": rpc_id, "method": "tools/call", "params": { "name": "memory_recall", "arguments": {"cue": cue, "budget_tokens": 100}, }, } t0 = time.monotonic() proc.stdin.write((json.dumps(req) + "\n").encode("utf-8")) proc.stdin.flush() import select deadline = time.monotonic() + timeout_sec line = b"" while time.monotonic() < deadline: readable, _, _ = select.select([proc.stdout], [], [], 0.5) if readable: line = proc.stdout.readline() break elapsed = time.monotonic() - t0 if not line: raise RuntimeError( f"no response within {timeout_sec}s " f"(stderr: {proc.stderr.read1(2000) if proc.stderr else b'?'!r})" ) return elapsed, json.loads(line.decode("utf-8")) def _wait_for_daemon_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool: """Poll for sock_path existence at 0.1s cadence; True on bind.""" deadline = time.monotonic() + timeout_sec while time.monotonic() < deadline: if sock_path.exists(): return True time.sleep(0.1) return False # --------------------------------------------------------------------------- # Tests — contract: wrappers are pure connectors, no spawn. # --------------------------------------------------------------------------- def test_start_throws_DaemonUnreachableError_when_socket_missing( built_wrapper, tmp_path ): """Phase 7.1 + mcp-tools-list-empty-cache (2026-05-02): with no daemon on the test socket, the wrapper MUST stay alive and MUST serve tools/list from the static registry within an MCP-client-friendly timeout. tools/call MUST surface daemon_unreachable as an isError response (fail-loud at the right layer). History (this is the same test slot — replaces the pre-2026-05-02 contract that asserted "wrapper exits non-zero on daemon miss"): - Pre-fix the wrapper had a top-level `await bridge.start()` BEFORE `server.connect(transport)`. On a missing/slow daemon socket the Node process either exited non-zero (after 5s timeout) OR — the bug being fixed — replied to MCP `initialize` after a long delay with no tools/list ever cached, making `mcp__iai-mcp__*` invisible for the entire client session. Old assertion 1 (non-zero exit) and assertion 2 (DaemonUnreachableError on stderr) encoded the consequence of that ordering, not the architectural contract. - Post-fix `server.connect(transport)` runs FIRST; bridge.start() is fire-and-forget; tools/list is independent of daemon state; tools/call lazy-awaits bridge readiness and surfaces daemon_unreachable as a structured tool-result error. This is strictly better — Claude Code's "Connected" status now matches reality (transport IS connected), and daemon-down failures are actionable per-call instead of opaque registry-empty. The load-bearing invariant — `daemon_delta == 0` — is UNCHANGED and asserted here exactly as before. The wrapper still must NOT spawn the daemon under any condition. """ sock_dir = Path(f"/tmp/iai-7.1-noconn-{os.getpid()}-{id(tmp_path)}") sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" store_dir = sock_dir / "store" store_dir.mkdir(parents=True, exist_ok=True) # Verify clean state — no socket file at our tmp path. assert not sock_path.exists(), f"tmp socket pre-exists: {sock_path}" # Baseline snapshot. The user's production daemon may exist on the # host (different socket path); we count globally and assert delta. baseline = _count_iai_mcp_processes() daemon_baseline = baseline["daemon"] core_baseline = baseline["core"] env_overrides = { "IAI_DAEMON_SOCKET_PATH": str(sock_path), "IAI_MCP_STORE": str(store_dir), } wrapper_proc = _spawn_wrapper(built_wrapper, env_overrides) try: # ---- Assertion 1 (NEW contract): wrapper survives daemon miss ---- # Wait past the bridge's 5s connectWithTimeout window (and a # generous slack for the fire-and-forget rejection to land in # the .catch handler). Wrapper MUST still be alive — its job # is to serve tools/list to MCP clients regardless of daemon # state. init_resp = _initialize(wrapper_proc, rpc_id=1) assert "result" in init_resp, f"initialize failed: {init_resp}" # tools/list — must respond from static registry within the # MCP-client tools/list timeout window (~3s observed; we allow # 4s for CI overhead). list_req = { "jsonrpc": "2.0", "id": 2, "method": "tools/list", "params": {}, } wrapper_proc.stdin.write((json.dumps(list_req) + "\n").encode("utf-8")) wrapper_proc.stdin.flush() list_t0 = time.monotonic() line = wrapper_proc.stdout.readline() list_elapsed = time.monotonic() - list_t0 assert line, "wrapper closed stdout before tools/list reply" list_resp = json.loads(line.decode("utf-8")) assert "result" in list_resp, f"tools/list error: {list_resp}" tools = list_resp["result"]["tools"] names = {t["name"] for t in tools} assert len(names) == 12, ( f"tools/list returned {len(names)} tools, expected 12. " f"names={sorted(names)}" ) assert list_elapsed < 4.0, ( f"tools/list took {list_elapsed:.2f}s with no daemon — " f"regression: wrapper is blocking server.connect on " f"bridge.start (the mcp-tools-list-empty-cache bug)." ) # ---- Assertion 2 (NEW contract): wait past bridge timeout ---- # 5s SOCKET_CONNECT_TIMEOUT_MS in bridge.ts means the in-flight # bridge.start() promise rejects ~5s after wrapper boot. The # `.catch(() => {})` on the fire-and-forget chain in index.ts # MUST swallow this rejection — wrapper must remain alive. # 7s budget = 5s timeout + 2s slack for slow Node startup. time.sleep(7.0) assert wrapper_proc.poll() is None, ( f"wrapper exited (rc={wrapper_proc.returncode}) past the " f"5s bridge connect timeout — fire-and-forget bridge.start " f"chain is leaking the rejection. The .catch(() => {{}}) on " f"the top-level chain in index.ts must absorb " f"DaemonUnreachableError." ) # ---- Assertion 3 (fail-loud at right layer): tools/call surfaces error ---- # Daemon-down failures must NOT be silent. Pre-fix the symptom # was an empty tools list (silent). Post-fix the wrapper serves # tools/list, but tools/call MUST return an error envelope so # the user sees what happened. call_req = { "jsonrpc": "2.0", "id": 3, "method": "tools/call", "params": { "name": "memory_recall", "arguments": {"cue": "no-daemon test"}, }, } wrapper_proc.stdin.write((json.dumps(call_req) + "\n").encode("utf-8")) wrapper_proc.stdin.flush() # bridge.start() lazy-await inside the call handler will hit # the 5s connect timeout again. Allow 7s. import select as _select deadline = time.monotonic() + 12.0 call_line = b"" while time.monotonic() < deadline: readable, _, _ = _select.select([wrapper_proc.stdout], [], [], 0.5) if readable: call_line = wrapper_proc.stdout.readline() break assert call_line, "wrapper did not respond to tools/call within 12s" call_resp = json.loads(call_line.decode("utf-8")) assert "result" in call_resp, f"tools/call missing result: {call_resp}" result = call_resp["result"] # The wrapper renders bridge errors as content with isError=True # (see CallToolRequestSchema handler in index.ts); some legacy # paths use the JSON-RPC `error` envelope. Either is acceptable # — what's NOT acceptable is silent success. is_error = result.get("isError") is True content_text = "" if isinstance(result.get("content"), list) and result["content"]: content_text = result["content"][0].get("text", "") or "" assert is_error or "daemon_unreachable" in content_text.lower() \ or "daemonunreachable" in content_text.lower(), ( f"tools/call did NOT surface daemon_unreachable when daemon " f"is missing — fail-loud invariant violated. result={result}" ) # ---- Assertion 4 (UNCHANGED invariant): no spawn ---- # Allow ≤1.5s for any (hypothetically) spawned-but-detached # daemon to surface in psutil. time.sleep(1.0) after = _count_iai_mcp_processes() daemon_delta = after["daemon"] - daemon_baseline assert daemon_delta == 0, ( f"REGRESSION: wrapper spawned {daemon_delta} new iai_mcp.daemon " f"process(es) (baseline={daemon_baseline}, after={after['daemon']}). " f"Phase 7.1 wrappers MUST NOT spawn the daemon — the spawn-fallback " f"chain in bridge.ts has been re-introduced." ) core_delta = after["core"] - core_baseline assert core_delta == 0, ( f"wrapper spawned {core_delta} iai_mcp.core process(es) " f"(baseline={core_baseline}, after={after['core']})" ) finally: if wrapper_proc.poll() is None: try: wrapper_proc.terminate() wrapper_proc.wait(timeout=5) except subprocess.TimeoutExpired: wrapper_proc.kill() _kill_test_daemons(sock_path) time.sleep(0.3) try: sock_path.unlink() except OSError: pass def test_start_succeeds_with_warm_daemon_no_extra_spawn(built_wrapper, tmp_path): """R2 happy path: with a daemon ALREADY running on the test socket (started manually by the test, mimicking what launchd does in production), the wrapper must connect successfully, complete the MCP initialize handshake, run a memory_recall round-trip, AND NOT spawn a second daemon. This proves: (a) bridge.ts:start() still works against a warm socket (no regression in the connect path). (b) The wrapper does NOT spawn a second daemon when one already exists (the singleton property — though in 7.1 this is trivially true because the spawn code is GONE). """ sock_dir = Path(f"/tmp/iai-7.1-warm-{os.getpid()}-{id(tmp_path)}") sock_dir.mkdir(parents=True, exist_ok=True) sock_path = sock_dir / "d.sock" store_dir = sock_dir / "store" store_dir.mkdir(parents=True, exist_ok=True) assert not sock_path.exists() # Pre-start a daemon manually (mimics launchd socket-activated spawn # in production; in tests we use the manual-run code path per # D7.1-09 backward compat). daemon_proc = _spawn_daemon_in_background(sock_path, store_dir) try: # Wait for the daemon to bind. Cold-start (bge-small load + # LanceDB open + asyncio.start_unix_server) is empirically # 3-10s on macOS. assert _wait_for_daemon_socket(sock_path, timeout_sec=30.0), ( f"daemon did not bind socket {sock_path} within 30s" ) # Snapshot AFTER daemon is up but BEFORE wrapper spawns. Any # new daemon during wrapper boot = singleton-violation regression. baseline = _count_iai_mcp_processes() daemon_baseline = baseline["daemon"] core_baseline = baseline["core"] env_overrides = { "IAI_DAEMON_SOCKET_PATH": str(sock_path), "IAI_MCP_STORE": str(store_dir), } wrapper_proc = _spawn_wrapper(built_wrapper, env_overrides) try: # MCP initialize handshake — wrapper must connect to the # warm daemon and reply. init_resp = _initialize(wrapper_proc, rpc_id=1) assert "result" in init_resp, f"initialize failed: {init_resp}" # memory_recall round-trip — proves the JSON-RPC wire path # over the socket works end-to-end. elapsed, recall_resp = _call_memory_recall( wrapper_proc, cue="phase 7.1 warm-daemon test", rpc_id=2, timeout_sec=10.0, ) # Either a result (recall hit/miss) or an error envelope is # acceptable — what we care about is that JSON-RPC came back. assert "result" in recall_resp or "error" in recall_resp, recall_resp # Round-trip should be sub-second on a warm daemon. Generous # 2s budget against test-harness overhead (subprocess startup, # MCP handshake jitter); the SPEC A6 250ms budget is verified # in Wave 6 acceptance against the production daemon. assert elapsed < 2.0, ( f"warm-daemon memory_recall took {elapsed:.2f}s, exceeds " f"2.0s safety budget" ) # Allow ≤1s for any (hypothetically) spawned daemon to surface. time.sleep(0.5) after = _count_iai_mcp_processes() # No new daemon — singleton property holds (trivially in 7.1 # because the spawn code is gone). daemon_delta = after["daemon"] - daemon_baseline assert daemon_delta == 0, ( f"REGRESSION: wrapper spawned a second daemon during boot " f"(baseline={daemon_baseline}, after={after['daemon']}, " f"delta={daemon_delta}). wrappers MUST be pure " f"connectors." ) core_delta = after["core"] - core_baseline assert core_delta == 0, ( f"wrapper spawned iai_mcp.core (delta={core_delta})" ) finally: try: wrapper_proc.terminate() wrapper_proc.wait(timeout=5) except subprocess.TimeoutExpired: wrapper_proc.kill() finally: # Stop the test daemon (we started it; we stop it). try: daemon_proc.terminate() daemon_proc.wait(timeout=10) except subprocess.TimeoutExpired: daemon_proc.kill() _kill_test_daemons(sock_path) time.sleep(0.3) try: sock_path.unlink() except OSError: pass