Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
336
tests/test_socket_fail_loud.py
Normal file
336
tests/test_socket_fail_loud.py
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
"""Plan 07-03 Wave 3 R5 daemon-side fail-loud + HIGH-3 yield acceptance tests.
|
||||
|
||||
R5 daemon-side semantics
|
||||
------------------------
|
||||
|
||||
Killing the live daemon (`kill -9` or `kill -TERM`) mid-call MUST leave NO
|
||||
orphan `iai_mcp.core` processes anywhere on the system (post-Phase-7 there
|
||||
should be ZERO `iai_mcp.core` processes under any circumstance — the
|
||||
singleton invariant), AND the next connect attempt to the socket MUST
|
||||
surface as ECONNREFUSED or ENOENT (which Wave 4's `bridge.ts` will
|
||||
translate to the wrapper-side `daemon_unreachable` rejection).
|
||||
|
||||
HIGH-3 yield acceptance (D7-09 LOCKED)
|
||||
--------------------------------------
|
||||
|
||||
The in-process C1 HUMAN-FIRST yield helper `_should_yield_to_mcp` defers
|
||||
REM cycles when EITHER `mcp_socket.active_connections > 0` OR
|
||||
`(time.monotonic() - mcp_socket.last_activity_ts) < 30`. This file exercises
|
||||
the helper directly with mocked `time.monotonic` so we never wait 35
|
||||
seconds wall-clock — keeps the suite brisk.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import socket as sk
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: tmp socket path (mirrors test_socket_server_dispatch.py:short_socket_paths
|
||||
# but does NOT redirect concurrency.SOCKET_PATH because the daemon subprocess
|
||||
# reads IAI_DAEMON_SOCKET_PATH directly via SocketServer.serve()).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def short_socket_paths(tmp_path):
|
||||
"""Yield (lock_path, sock_path, state_path) under a tmp /tmp/iai-fl-... dir.
|
||||
|
||||
AF_UNIX on macOS caps socket paths at ~104 bytes; pytest's tmp_path can
|
||||
be too long under xdist. Use a short /tmp/iai-fl-<pid>-<n>/ fallback.
|
||||
"""
|
||||
lock_path = tmp_path / ".lock"
|
||||
sock_dir = Path(f"/tmp/iai-fl-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
|
||||
try:
|
||||
yield lock_path, sock_path, state_path
|
||||
finally:
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _count_iai_mcp_processes() -> dict[str, int]:
|
||||
"""Snapshot iai_mcp.core / iai_mcp.daemon process counts for fail-loud assertions.
|
||||
|
||||
invariant: `iai_mcp.core` count must be 0 under all
|
||||
circumstances. The daemon is the singleton; wrappers no longer spawn
|
||||
their own Python core processes (Wave 4 bridge.ts refactor).
|
||||
"""
|
||||
counts = {"core": 0, "daemon": 0}
|
||||
for p in psutil.process_iter(["cmdline"]):
|
||||
try:
|
||||
cl = p.info.get("cmdline") or []
|
||||
if not cl:
|
||||
continue
|
||||
joined = " ".join(c or "" for c in cl)
|
||||
if "iai_mcp.core" in joined:
|
||||
counts["core"] += 1
|
||||
if "iai_mcp.daemon" in joined:
|
||||
counts["daemon"] += 1
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
return counts
|
||||
|
||||
|
||||
def _spawn_daemon_for_test(sock_path: Path, store_root: Path) -> subprocess.Popen:
|
||||
"""Spawn `python -m iai_mcp.daemon` against an isolated tmp socket+store.
|
||||
|
||||
Uses IAI_DAEMON_SOCKET_PATH + IAI_MCP_STORE env overrides so the
|
||||
subprocess never touches the user's real ~/.iai-mcp/.daemon.sock.
|
||||
|
||||
IAI_DAEMON_IDLE_SHUTDOWN_SECS=99999 disables idle shutdown so the
|
||||
daemon stays alive for the duration of the test.
|
||||
"""
|
||||
env = os.environ.copy()
|
||||
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
|
||||
env["IAI_MCP_STORE"] = str(store_root)
|
||||
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "99999"
|
||||
return subprocess.Popen(
|
||||
[sys.executable, "-m", "iai_mcp.daemon"],
|
||||
env=env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
def _wait_for_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool:
|
||||
"""Poll for sock_path existence at 0.1 s cadence; return True on bind."""
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
while time.monotonic() < deadline:
|
||||
if sock_path.exists():
|
||||
return True
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: kill -9 daemon mid-call → no orphan iai_mcp.core, ECONNREFUSED on retry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_kill_daemon_midcall_no_orphan_core_spawn(short_socket_paths, tmp_path):
|
||||
"""R5/A8 daemon-side: kill -9 daemon → daemon does NOT spawn any new iai_mcp.core.
|
||||
|
||||
The wrapper-side semantics (Promise rejection with daemon_unreachable, single
|
||||
retry) live in mcp-wrapper/src/bridge.ts and are tested in Wave 4.
|
||||
|
||||
invariant (DELTA-based): the daemon under test must NOT
|
||||
spawn any `iai_mcp.core` subprocesses, even on hard kill. Pre-existing
|
||||
`iai_mcp.core` processes from the host's other MCP wrappers (live
|
||||
Claude Code sessions, etc.) are out of scope — they belong to the
|
||||
user's running stack, not to this daemon. We measure the DELTA
|
||||
(after - before) to filter them out.
|
||||
"""
|
||||
_, sock_path, _ = short_socket_paths
|
||||
store_root = tmp_path / "store"
|
||||
store_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Snapshot existing iai_mcp.core processes BEFORE we spawn our daemon.
|
||||
# Anything still present after the kill that wasn't there now is OUR fault.
|
||||
baseline = _count_iai_mcp_processes()
|
||||
|
||||
proc = _spawn_daemon_for_test(sock_path, store_root)
|
||||
try:
|
||||
assert _wait_for_socket(sock_path, timeout_sec=30), (
|
||||
"daemon never bound socket within 30s"
|
||||
)
|
||||
|
||||
before = _count_iai_mcp_processes()
|
||||
assert before["daemon"] >= baseline["daemon"] + 1, (
|
||||
f"our daemon not visible in process list: baseline={baseline}, before={before}"
|
||||
)
|
||||
# The DELTA from baseline tells us if our daemon spawned any cores.
|
||||
# Any pre-existing cores (host's other MCP wrappers) stay constant.
|
||||
before_delta = before["core"] - baseline["core"]
|
||||
assert before_delta == 0, (
|
||||
f"our daemon spawned {before_delta} iai_mcp.core processes BEFORE kill "
|
||||
f"(baseline={baseline}, before={before}) — post-Phase-7 singleton invariant violated"
|
||||
)
|
||||
|
||||
# SIGKILL — simulate hard daemon death (the threat R5 defends against).
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
proc.wait(timeout=5)
|
||||
|
||||
# Brief pause so psutil reflects the death in subsequent process_iter scans.
|
||||
time.sleep(0.5)
|
||||
|
||||
after = _count_iai_mcp_processes()
|
||||
# DELTA-based assertion: any iai_mcp.core present after the kill must
|
||||
# have been there in the baseline too. Our daemon must NEVER spawn
|
||||
# core processes on death.
|
||||
after_delta = after["core"] - baseline["core"]
|
||||
assert after_delta <= 0, (
|
||||
f"FAIL-LOUD VIOLATION: our daemon spawned {after_delta} new "
|
||||
f"iai_mcp.core processes after kill (baseline={baseline}, after={after}) "
|
||||
"— R5 + A8 invariant: post-Phase-7 daemon must never spawn a core."
|
||||
)
|
||||
|
||||
# Subsequent connect attempts MUST fail. Three acceptable outcomes:
|
||||
# - ConnectionRefusedError: socket file still present, no listener bound
|
||||
# - FileNotFoundError: socket file removed (cleanup_socket on Python 3.13+)
|
||||
# - OSError (generic): platform-dependent ECONNREFUSED variant
|
||||
s = sk.socket(sk.AF_UNIX, sk.SOCK_STREAM)
|
||||
s.settimeout(0.5)
|
||||
err_kind = None
|
||||
try:
|
||||
s.connect(str(sock_path))
|
||||
err_kind = "no_error" # unexpected — daemon should be gone
|
||||
except (ConnectionRefusedError, FileNotFoundError, OSError) as e:
|
||||
err_kind = type(e).__name__
|
||||
finally:
|
||||
try:
|
||||
s.close()
|
||||
except OSError:
|
||||
pass
|
||||
assert err_kind in (
|
||||
"ConnectionRefusedError", "FileNotFoundError", "OSError",
|
||||
), f"unexpected post-kill connect outcome: {err_kind}"
|
||||
finally:
|
||||
if proc.poll() is None:
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
try:
|
||||
proc.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
pass
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: kill daemon during active connection → wrapper sees EOF on next read
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_kill_daemon_during_active_connection(short_socket_paths, tmp_path):
|
||||
"""R5: kill daemon while a wrapper holds an open socket → wrapper sees EOF / OSError.
|
||||
|
||||
The Wave 4 bridge.ts will translate that EOF into a `daemon_unreachable`
|
||||
rejection (which then triggers the single-retry per D7-04). This test
|
||||
just confirms the daemon-side surface: an open connection is broken
|
||||
cleanly when the daemon dies, no half-open zombie socket.
|
||||
"""
|
||||
_, sock_path, _ = short_socket_paths
|
||||
store_root = tmp_path / "store"
|
||||
store_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
proc = _spawn_daemon_for_test(sock_path, store_root)
|
||||
try:
|
||||
assert _wait_for_socket(sock_path, timeout_sec=30), (
|
||||
"daemon never bound socket within 30s"
|
||||
)
|
||||
|
||||
# Open a persistent connection. Send a short control message first
|
||||
# to confirm the connection is live BEFORE we kill the daemon.
|
||||
s = sk.socket(sk.AF_UNIX, sk.SOCK_STREAM)
|
||||
s.settimeout(15)
|
||||
s.connect(str(sock_path))
|
||||
msg = (json.dumps({"type": "status"}) + "\n").encode("utf-8")
|
||||
s.sendall(msg)
|
||||
|
||||
# Read the status response (proves the connection is live).
|
||||
first_response = b""
|
||||
while not first_response.endswith(b"\n"):
|
||||
chunk = s.recv(4096)
|
||||
if not chunk:
|
||||
break
|
||||
first_response += chunk
|
||||
assert first_response, "daemon never replied to initial status"
|
||||
decoded = json.loads(first_response.decode("utf-8"))
|
||||
assert decoded.get("ok") is True, decoded
|
||||
|
||||
# Kill the daemon HARD with the connection still open.
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
proc.wait(timeout=5)
|
||||
|
||||
# The next read on the open socket must surface as EOF (b'') OR raise.
|
||||
# Either is an acceptable fail-loud signal for the wrapper-side
|
||||
# daemon_unreachable translation in Wave 4.
|
||||
s.settimeout(2.0)
|
||||
eof_or_error = False
|
||||
try:
|
||||
chunk = s.recv(4096)
|
||||
if chunk == b"":
|
||||
eof_or_error = True # clean EOF
|
||||
except (ConnectionResetError, BrokenPipeError, OSError):
|
||||
eof_or_error = True # OS surfaced the death
|
||||
finally:
|
||||
try:
|
||||
s.close()
|
||||
except OSError:
|
||||
pass
|
||||
assert eof_or_error, (
|
||||
"daemon kill did not surface as EOF / OSError on open connection — "
|
||||
"wrapper-side daemon_unreachable translation would silently hang"
|
||||
)
|
||||
|
||||
# Subsequent connect attempts also fail (same as test 1's tail check).
|
||||
s2 = sk.socket(sk.AF_UNIX, sk.SOCK_STREAM)
|
||||
s2.settimeout(0.5)
|
||||
err_kind = None
|
||||
try:
|
||||
s2.connect(str(sock_path))
|
||||
err_kind = "no_error"
|
||||
except (ConnectionRefusedError, FileNotFoundError, OSError) as e:
|
||||
err_kind = type(e).__name__
|
||||
finally:
|
||||
try:
|
||||
s2.close()
|
||||
except OSError:
|
||||
pass
|
||||
assert err_kind in (
|
||||
"ConnectionRefusedError", "FileNotFoundError", "OSError",
|
||||
), f"unexpected post-kill connect outcome: {err_kind}"
|
||||
finally:
|
||||
if proc.poll() is None:
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
try:
|
||||
proc.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
pass
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plan 10.6-01 Task 1.8: REMOVED the HIGH-3 yield-acceptance
|
||||
# tests (test_scheduler_yields_to_mcp_within_35s and
|
||||
# test_should_yield_called_in_loop_returns_true_every_5s).
|
||||
#
|
||||
# The D7-09 in-process C1 HUMAN-FIRST yield helper
|
||||
# `_should_yield_to_mcp(socket)` was removed in Task 1.4. The lifecycle
|
||||
# state machine + sleep_pipeline + heartbeat scanner supersede this
|
||||
# design: SLEEP-state coexistence with active MCP traffic is provided
|
||||
# by the bounded-deferral interrupt_check inside lifecycle_tick (each
|
||||
# sleep_pipeline chunk re-checks `mcp_socket.active_connections > 0
|
||||
# OR (now - last_activity_ts) < 30s` and defers if true).
|
||||
#
|
||||
# The kill-daemon-midcall tests above (test 1, test 2) cover the R5
|
||||
# fail-loud contract and stay green; they do not reference the
|
||||
# removed yield helper.
|
||||
# ---------------------------------------------------------------------------
|
||||
Loading…
Add table
Add a link
Reference in a new issue