Initial release: iai-mcp v0.1.0

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
Areg Noya 2026-05-06 01:04:47 -07:00
commit f6b876fbe7
332 changed files with 97258 additions and 0 deletions

0
tests/__init__.py Normal file
View file

41
tests/conftest.py Normal file
View file

@ -0,0 +1,41 @@
"""Project-wide pytest fixtures for the IAI-MCP test suite.
Phase 07.10 (file-based crypto key migration) removed the keyring backend
from `iai_mcp.crypto.CryptoKey.get_or_create()`. Pre-existing tests that
exercised the daemon, store, events, recall, and CLI paths relied on the
keyring auto-fallback to source the encryption key in test environments.
After Phase 07.10, the runtime path is **file passphrase env error**
with no keyring fallback, so those tests now hit `CryptoKeyError` unless
either the file or the passphrase is set.
This module's autouse fixture sets `IAI_MCP_CRYPTO_PASSPHRASE` to a fixed
test passphrase for every test session, restoring the deterministic
`derive_key_from_passphrase(...)` path that the test suite expects.
Production behavior is unaffected the production daemon never sets
this env var and instead reads the 32-byte file at `{IAI_MCP_STORE}/.crypto.key`
written by `iai-mcp crypto migrate-to-file` or `iai-mcp crypto init`.
The dedicated file-backend tests in `tests/test_crypto_file_backend.py`
override this fixture per-test by clearing the env var or by writing an
explicit `.crypto.key` file in their `tmp_path` fixtures.
"""
from __future__ import annotations
import os
import pytest
_TEST_PASSPHRASE = "iai-mcp-test-passphrase-2026-04-30-phase-07.10"
@pytest.fixture(autouse=True)
def _crypto_passphrase_env(monkeypatch: pytest.MonkeyPatch) -> None:
"""Set IAI_MCP_CRYPTO_PASSPHRASE for every test unless already set.
Tests that need to assert the absent-passphrase / missing-key error
path can still call `monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE",
raising=False)` inside the test body to override this default.
"""
if "IAI_MCP_CRYPTO_PASSPHRASE" not in os.environ:
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", _TEST_PASSPHRASE)

10
tests/fixtures/bedtime/ar.txt vendored Normal file
View file

@ -0,0 +1,10 @@
تصبح على خير yes
ليلة سعيدة yes
أنا متعب سأنام yes
إلى الغد yes
وقت النوم yes
قال البطل وداعا ومضى no
كلمة السلام في الأدب العربي no
أكمل العمل على المشروع no
اريد قهوة الآن no
موضوع مهم للبحث no

10
tests/fixtures/bedtime/de.txt vendored Normal file
View file

@ -0,0 +1,10 @@
gute Nacht yes
ich gehe jetzt ins Bett yes
ich bin müde yes
bis morgen yes
Schlafenszeit yes
der Bösewicht sagte auf Wiedersehen und verschwand no
das Wort Kaffee kommt aus dem Arabischen no
dieser Code ist sehr kompliziert no
ein Filmtitel wäre gut no
Kinder brauchen feste Routinen no

10
tests/fixtures/bedtime/en.txt vendored Normal file
View file

@ -0,0 +1,10 @@
good night yes
I'm heading to bed yes
I'm really tired, going to sleep yes
catch you tomorrow yes
it's bedtime yes
I need to finish this code before the deadline no
this function returns a list of users no
the phrase means something specific in context no
let's review the codebase together no
stories are a genre of children's literature no

10
tests/fixtures/bedtime/es.txt vendored Normal file
View file

@ -0,0 +1,10 @@
buenas noches yes
me voy a dormir yes
estoy cansado yes
hasta mañana yes
hora de dormir yes
el villano dijo adiós y se fue no
la frase café con leche en español no
este código es muy complicado no
un título de película interesante no
los niños necesitan rutinas no

10
tests/fixtures/bedtime/fr.txt vendored Normal file
View file

@ -0,0 +1,10 @@
bonne nuit yes
je vais me coucher yes
je suis fatigué yes
à demain yes
il est l'heure de dormir yes
le héros dit au revoir et partit no
l'expression café au lait en français no
ce code est très compliqué no
un titre de film intéressant no
les enfants aiment les histoires no

10
tests/fixtures/bedtime/ja.txt vendored Normal file
View file

@ -0,0 +1,10 @@
おやすみ yes
おやすみなさい yes
寝ます yes
また明日 yes
疲れた yes
小説のキャラはさよならと言って去った no
動詞の活用を教えて no
今日はバグを直す no
映画の話をしよう no
キャラクターのアニメを見る no

10
tests/fixtures/bedtime/ru.txt vendored Normal file
View file

@ -0,0 +1,10 @@
спокойной ночи yes
пойду спать yes
я устал, ложусь yes
до завтра yes
пора ложиться yes
нужно успеть до полуночи no
код работает правильно no
эта фраза означает нечто конкретное no
слишком много багов в проекте no
утром встречаемся в офисе no

10
tests/fixtures/bedtime/zh.txt vendored Normal file
View file

@ -0,0 +1,10 @@
晚安 yes
我要睡觉 yes
累了 yes
明天见 yes
该睡觉了 yes
反派说了再见然后离开 no
这个词的起源很有趣 no
这段代码非常复杂 no
一部有意思的电影 no
孩子需要固定的日常 no

View file

@ -0,0 +1,53 @@
[
{"id": "en-01", "lang": "en", "formal": "The proposal is, therefore, accepted.", "informal": "yeah ok the proposal works"},
{"id": "en-02", "lang": "en", "formal": "I would like to inform you that the deadline has been extended; however, the scope remains unchanged.", "informal": "fyi deadline pushed but scope same"},
{"id": "en-03", "lang": "en", "formal": "Accordingly, we shall proceed with the implementation phase once the review is complete.", "informal": "cool, once review wraps we start building"},
{"id": "en-04", "lang": "en", "formal": "It appears that the hypothesis is partially supported; nonetheless, further evidence is required.", "informal": "looks kinda right but we need more data"},
{"id": "en-05", "lang": "en", "formal": "The committee has concluded its deliberations and shall publish the findings forthwith.", "informal": "board's done talking, they'll post results soon"},
{"id": "en-06", "lang": "en", "formal": "Furthermore, the aforementioned constraints must be addressed prior to deployment.", "informal": "also we gotta fix those limits before shipping"},
{"id": "en-07", "lang": "en", "formal": "The analysis demonstrates a statistically significant correlation; consequently, the null hypothesis is rejected.", "informal": "numbers line up so the original guess was wrong"},
{"id": "en-08", "lang": "en", "formal": "I regret to inform you that, accordingly, the application has not been successful on this occasion.", "informal": "sorry bud u didnt get it this time"},
{"id": "en-09", "lang": "en", "formal": "Please find attached the quarterly report; kindly review at your earliest convenience.", "informal": "attached the q-report, take a look when u can"},
{"id": "en-10", "lang": "en", "formal": "The revised protocol mandates that all submissions be validated by two independent reviewers.", "informal": "new rule: two people gotta check every submission"},
{"id": "en-11", "lang": "en", "formal": "Thus, the empirical evidence substantiates the theoretical framework proposed in the preceding section.", "informal": "so the data backs up the theory from earlier"},
{"id": "en-12", "lang": "en", "formal": "The methodology, though unconventional, yielded results that were seemingly consistent with prior studies.", "informal": "weird method but results kinda matched other studies"},
{"id": "en-13", "lang": "en", "formal": "We hereby confirm receipt of your correspondence dated the 14th instant.", "informal": "got your email from the 14th"},
{"id": "en-14", "lang": "en", "formal": "The remuneration package shall be commensurate with experience and qualifications.", "informal": "pay depends on what u bring to the table"},
{"id": "en-15", "lang": "en", "formal": "Hence, it is imperative that the stakeholders convene to resolve the outstanding issues.", "informal": "so yeah the team needs to meet and sort stuff out"},
{"id": "en-16", "lang": "en", "formal": "The preliminary results indicate that the intervention may possibly reduce latency by approximately 12%.", "informal": "early numbers say it might cut latency ~12%"},
{"id": "en-17", "lang": "en", "formal": "Consequent upon the aforesaid, we shall require an amendment to the existing agreement.", "informal": "because of all that, contract needs updating"},
{"id": "en-18", "lang": "en", "formal": "It is with profound regret that we announce the cessation of operations at the eastern facility.", "informal": "sadly we're shutting down the east site"},
{"id": "en-19", "lang": "en", "formal": "Pursuant to section 4.2, any deviations must be reported to the compliance officer.", "informal": "per 4.2 just tell the compliance person if stuff changes"},
{"id": "en-20", "lang": "en", "formal": "The assertion, while plausible, lacks the empirical rigor necessary for publication.", "informal": "sounds reasonable but not solid enough to publish"},
{"id": "en-21", "lang": "en", "formal": "We hereby authorize the disbursement of funds in accordance with the attached schedule.", "informal": "k we're sending the money per that attached plan"},
{"id": "en-22", "lang": "en", "formal": "The observed phenomena can perhaps be attributed to stochastic fluctuations in the input signal.", "informal": "prolly just noise in the input"},
{"id": "en-23", "lang": "en", "formal": "Notwithstanding the aforementioned caveats, the framework remains broadly applicable.", "informal": "despite those issues the framework still works"},
{"id": "en-24", "lang": "en", "formal": "I should be most grateful if you could furnish me with the relevant documentation by Friday.", "informal": "can u send me the docs by friday thx"},
{"id": "en-25", "lang": "en", "formal": "The present manuscript explores the ramifications of the hypothesis in greater depth.", "informal": "this paper goes deeper into what the theory means"},
{"id": "ru-01", "lang": "ru", "formal": "Следовательно, предложение принимается.", "informal": "ок, предложение норм"},
{"id": "ru-02", "lang": "ru", "formal": "Тем не менее, результаты требуют дополнительной проверки.", "informal": "короче, надо ещё проверить результаты"},
{"id": "ru-03", "lang": "ru", "formal": "Таким образом, проект переходит в завершающую стадию; однако сроки остаются без изменений.", "informal": "в общем проект на финишной, но сроки те же"},
{"id": "ru-04", "lang": "ru", "formal": "Вследствие вышеизложенного, комиссия приняла решение отложить рассмотрение вопроса.", "informal": "из-за всего этого чуваки решили отложить вопрос"},
{"id": "ru-05", "lang": "ru", "formal": "Настоящим уведомляем вас о продлении срока действия соглашения до 31 декабря.", "informal": "договор продлили до 31 декабря"},
{"id": "ru-06", "lang": "ru", "formal": "Впрочем, предварительный анализ свидетельствует о наличии статистически значимой корреляции.", "informal": "короче, по первым цифрам связь есть"},
{"id": "ru-07", "lang": "ru", "formal": "Приношу свои извинения за причинённые неудобства; будем признательны за ваше понимание.", "informal": "сори за неудобства, спс за понимание"},
{"id": "ru-08", "lang": "ru", "formal": "Однако, представленные данные не позволяют сделать однозначного вывода.", "informal": "но из этих данных не понять однозначно"},
{"id": "ru-09", "lang": "ru", "formal": "Просим Вас ознакомиться с прилагаемым документом и, при необходимости, внести поправки.", "informal": "глянь доку и поправь если что"},
{"id": "ru-10", "lang": "ru", "formal": "Возможно, указанное расхождение объясняется особенностями выборки.", "informal": "может, это из-за выборки такая разница"},
{"id": "ru-11", "lang": "ru", "formal": "В силу сложившихся обстоятельств, запланированное мероприятие переносится на неопределённый срок.", "informal": "из-за всего происходящего встречу откладываем хз на когда"},
{"id": "ru-12", "lang": "ru", "formal": "Настоящее исследование посвящено анализу долговременных последствий применённого метода.", "informal": "эта работа про то что будет в долгую от такого метода"},
{"id": "ru-13", "lang": "ru", "formal": "Согласно пункту 3.2, любые изменения должны согласовываться с руководителем проекта.", "informal": "по п.3.2 изменения сначала к руководителю"},
{"id": "ru-14", "lang": "ru", "formal": "Вероятно, наблюдаемое явление обусловлено случайными флуктуациями входного сигнала.", "informal": "похоже это просто шум на входе"},
{"id": "ru-15", "lang": "ru", "formal": "С глубоким прискорбием сообщаем о прекращении деятельности восточного филиала.", "informal": "грустно но мы закрываем восточный филиал"},
{"id": "ru-16", "lang": "ru", "formal": "По-видимому, полученные результаты согласуются с ранее опубликованными данными.", "informal": "вроде цифры совпадают с тем что публиковали"},
{"id": "ru-17", "lang": "ru", "formal": "Настоящим подтверждаем получение вашего письма от 14-го числа.", "informal": "письмо от 14-го получил"},
{"id": "ru-18", "lang": "ru", "formal": "Размер вознаграждения определяется квалификацией и опытом кандидата.", "informal": "сколько платят зависит от опыта и скилов"},
{"id": "ru-19", "lang": "ru", "formal": "Следовательно, необходимо созвать совещание для разрешения возникших разногласий.", "informal": "короче надо созвать встречу и разрулить"},
{"id": "ru-20", "lang": "ru", "formal": "Предварительные данные свидетельствуют о возможном снижении задержки примерно на 12%.", "informal": "по первым цифрам задержка может упасть где-то на 12"},
{"id": "ru-21", "lang": "ru", "formal": "В соответствии с вышеизложенным, требуется внесение изменений в действующий договор.", "informal": "из-за всего этого договор надо менять"},
{"id": "ru-22", "lang": "ru", "formal": "Представленное утверждение, хотя и правдоподобно, не обладает достаточной эмпирической строгостью.", "informal": "звучит норм но научно не тянет"},
{"id": "ru-23", "lang": "ru", "formal": "Настоящим уполномочиваем произвести выплату средств согласно приложенному графику.", "informal": "ок выплачиваем по графику"},
{"id": "ru-24", "lang": "ru", "formal": "Был бы признателен, если бы вы предоставили соответствующую документацию к пятнице.", "informal": "скинь доки до пятницы плиз"},
{"id": "ru-25", "lang": "ru", "formal": "Данная работа рассматривает последствия гипотезы в более глубоком аспекте.", "informal": "эта статья копает глубже в последствия идеи"},
{"id": "en-26", "lang": "en", "formal": "Moreover, the institution maintains that all submissions shall adhere strictly to the prescribed format.", "informal": "plus, stuff has to follow the format they gave u"}
]

View file

@ -0,0 +1,162 @@
#!/usr/bin/env bash
# macOS launchd install/uninstall idempotency.
#
# Verifies:
# - DAEMON-01: plist installed under ~/Library/LaunchAgents
# - DAEMON-10: silent install (--yes bypasses consent banner)
# - C4 invariant: uninstall removes plist + ~/.iai-mcp/.lock +
# ~/.iai-mcp/.daemon.sock + ~/.iai-mcp/.daemon-state.json
# - Idempotency: install twice / uninstall twice -> no error
#
# Skipped on non-macOS (returns 0). Linux equivalent lives in
# tests/shell/test_systemd_install.sh.
#
# This script does NOT actually invoke launchctl in CI environments where it
# would fail (GitHub Actions macos-latest runners have launchd but no UI
# session for `gui/$UID` bootstrap to succeed). The CLI itself uses
# `check=False` on launchctl so a non-zero return there does not abort the
# install -- the plist file write + state file removal still happens.
set -euo pipefail
if [[ "$(uname -s)" != "Darwin" ]]; then
echo "SKIP: not macOS"
exit 0
fi
# Resolve which Python + iai-mcp module to use. Prefer venv, else system.
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
if [[ -x "$ROOT/.venv/bin/python" ]]; then
PY="$ROOT/.venv/bin/python"
else
PY="${PYTHON:-python3}"
fi
CLI=( "$PY" -m iai_mcp.cli )
PLIST="$HOME/Library/LaunchAgents/com.iai-mcp.daemon.plist"
STATE_DIR="$HOME/.iai-mcp"
LOCK="$STATE_DIR/.lock"
SOCK="$STATE_DIR/.daemon.sock"
STATE="$STATE_DIR/.daemon-state.json"
# Snapshot pre-existing state so cleanup restores real user data.
# Backup directory in /tmp scoped to this run.
BACKUP_DIR="$(mktemp -d -t iai-mcp-shtest-XXXXXX)"
PRE_EXISTING_PLIST=0
PRE_EXISTING_LOCK=0
PRE_EXISTING_SOCK=0
PRE_EXISTING_STATE=0
if [[ -f "$PLIST" ]]; then
PRE_EXISTING_PLIST=1
cp "$PLIST" "$BACKUP_DIR/plist.bak"
fi
if [[ -f "$LOCK" ]]; then
PRE_EXISTING_LOCK=1
cp "$LOCK" "$BACKUP_DIR/lock.bak"
fi
if [[ -f "$SOCK" ]]; then
PRE_EXISTING_SOCK=1
cp "$SOCK" "$BACKUP_DIR/sock.bak" 2>/dev/null || true
fi
if [[ -f "$STATE" ]]; then
PRE_EXISTING_STATE=1
cp "$STATE" "$BACKUP_DIR/state.bak"
fi
cleanup() {
# Always restore the user's pre-existing state, even if the test failed.
"${CLI[@]}" daemon uninstall --yes >/dev/null 2>&1 || true
if [[ "$PRE_EXISTING_PLIST" == "1" ]]; then
mkdir -p "$(dirname "$PLIST")"
cp "$BACKUP_DIR/plist.bak" "$PLIST"
fi
mkdir -p "$STATE_DIR"
if [[ "$PRE_EXISTING_LOCK" == "1" ]]; then
cp "$BACKUP_DIR/lock.bak" "$LOCK"
fi
if [[ "$PRE_EXISTING_SOCK" == "1" && -f "$BACKUP_DIR/sock.bak" ]]; then
cp "$BACKUP_DIR/sock.bak" "$SOCK" 2>/dev/null || true
fi
if [[ "$PRE_EXISTING_STATE" == "1" ]]; then
cp "$BACKUP_DIR/state.bak" "$STATE"
fi
rm -rf "$BACKUP_DIR"
}
trap cleanup EXIT
# If the user already has a real plist installed, refuse to run -- this
# script would clobber their service state (separate from file restore).
if [[ "$PRE_EXISTING_PLIST" == "1" ]]; then
echo "SKIP: existing plist at $PLIST -- not clobbering user data"
exit 0
fi
echo "[1/6] First install (--yes bypasses consent banner)..."
"${CLI[@]}" daemon install --yes
if [[ ! -f "$PLIST" ]]; then
echo "FAIL: plist not created at $PLIST"
exit 1
fi
# Pitfall 5 sanity: rendered plist has absolute python path, not /usr/local/bin/python3
if ! grep -q "$PY" "$PLIST"; then
echo "FAIL: plist does not contain absolute sys.executable ($PY)"
cat "$PLIST"
exit 1
fi
echo "[2/6] Second install -- must be idempotent..."
if ! "${CLI[@]}" daemon install --yes; then
echo "FAIL: install #2 returned non-zero"
exit 1
fi
if [[ ! -f "$PLIST" ]]; then
echo "FAIL: plist missing after install #2"
exit 1
fi
# Seed state files so we can verify C4 cleanup actually removes them.
mkdir -p "$STATE_DIR"
touch "$LOCK" "$SOCK"
echo "{}" > "$STATE"
echo "[3/6] First uninstall (C4: remove plist + 3 state files)..."
"${CLI[@]}" daemon uninstall --yes
if [[ -f "$PLIST" ]]; then
echo "FAIL: plist not removed"
exit 1
fi
# C4 invariant: lock + sock + state file all gone
if [[ -f "$LOCK" ]]; then
echo "FAIL: lock file not removed (C4 violation)"
exit 1
fi
if [[ -f "$SOCK" ]]; then
echo "FAIL: socket file not removed (C4 violation)"
exit 1
fi
if [[ -f "$STATE" ]]; then
echo "FAIL: state file not removed (C4 violation)"
exit 1
fi
echo "[4/6] Second uninstall -- must be idempotent (no error on missing files)..."
if ! "${CLI[@]}" daemon uninstall --yes; then
echo "FAIL: uninstall #2 returned non-zero"
exit 1
fi
echo "[5/6] Cross-platform: dry-run install on macOS prints plist..."
if ! "${CLI[@]}" daemon install --dry-run --yes | grep -q "com.iai-mcp.daemon"; then
echo "FAIL: dry-run did not print plist content"
exit 1
fi
echo "[6/6] Cross-platform: dry-run does NOT write plist..."
"${CLI[@]}" daemon install --dry-run --yes >/dev/null
if [[ -f "$PLIST" ]]; then
echo "FAIL: dry-run wrote $PLIST -- it must be a no-write preview"
exit 1
fi
echo "PASS: launchd install/uninstall idempotency + C4 + Pitfall 5"
exit 0

View file

@ -0,0 +1,163 @@
#!/usr/bin/env bash
# Linux systemd install/uninstall idempotency.
#
# Verifies:
# - DAEMON-01: unit installed under ~/.config/systemd/user
# - DAEMON-10: silent install (--yes bypasses consent banner)
# - C4 invariant: uninstall removes unit + ~/.iai-mcp/.lock +
# ~/.iai-mcp/.daemon.sock + ~/.iai-mcp/.daemon-state.json
# - Idempotency: install twice / uninstall twice -> no error
#
# Skipped on non-Linux (returns 0). macOS equivalent lives in
# tests/shell/test_launchd_install.sh.
#
# Skipped if systemctl --user is not usable (headless CI without an active
# user-systemd session, e.g. GitHub Actions ubuntu-latest by default).
# DAEMON-12 cross-platform parity is enforced by CI matrix; this script is
# a smoke test that runs FULL flow when a user session exists.
set -euo pipefail
if [[ "$(uname -s)" != "Linux" ]]; then
echo "SKIP: not Linux"
exit 0
fi
# Skip on CI without user systemd session.
if ! systemctl --user status >/dev/null 2>&1; then
echo "SKIP: no user systemd session available (expected on headless CI without loginctl enable-linger)"
exit 0
fi
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
if [[ -x "$ROOT/.venv/bin/python" ]]; then
PY="$ROOT/.venv/bin/python"
else
PY="${PYTHON:-python3}"
fi
CLI=( "$PY" -m iai_mcp.cli )
UNIT="$HOME/.config/systemd/user/iai-mcp-daemon.service"
STATE_DIR="$HOME/.iai-mcp"
LOCK="$STATE_DIR/.lock"
SOCK="$STATE_DIR/.daemon.sock"
STATE="$STATE_DIR/.daemon-state.json"
BACKUP_DIR="$(mktemp -d -t iai-mcp-shtest-XXXXXX)"
PRE_EXISTING_UNIT=0
PRE_EXISTING_LOCK=0
PRE_EXISTING_SOCK=0
PRE_EXISTING_STATE=0
if [[ -f "$UNIT" ]]; then
PRE_EXISTING_UNIT=1
cp "$UNIT" "$BACKUP_DIR/unit.bak"
fi
if [[ -f "$LOCK" ]]; then
PRE_EXISTING_LOCK=1
cp "$LOCK" "$BACKUP_DIR/lock.bak"
fi
if [[ -f "$SOCK" ]]; then
PRE_EXISTING_SOCK=1
cp "$SOCK" "$BACKUP_DIR/sock.bak" 2>/dev/null || true
fi
if [[ -f "$STATE" ]]; then
PRE_EXISTING_STATE=1
cp "$STATE" "$BACKUP_DIR/state.bak"
fi
cleanup() {
"${CLI[@]}" daemon uninstall --yes >/dev/null 2>&1 || true
if [[ "$PRE_EXISTING_UNIT" == "1" ]]; then
mkdir -p "$(dirname "$UNIT")"
cp "$BACKUP_DIR/unit.bak" "$UNIT"
fi
mkdir -p "$STATE_DIR"
if [[ "$PRE_EXISTING_LOCK" == "1" ]]; then
cp "$BACKUP_DIR/lock.bak" "$LOCK"
fi
if [[ "$PRE_EXISTING_SOCK" == "1" && -f "$BACKUP_DIR/sock.bak" ]]; then
cp "$BACKUP_DIR/sock.bak" "$SOCK" 2>/dev/null || true
fi
if [[ "$PRE_EXISTING_STATE" == "1" ]]; then
cp "$BACKUP_DIR/state.bak" "$STATE"
fi
rm -rf "$BACKUP_DIR"
systemctl --user daemon-reload >/dev/null 2>&1 || true
}
trap cleanup EXIT
if [[ "$PRE_EXISTING_UNIT" == "1" ]]; then
echo "SKIP: existing unit at $UNIT -- not clobbering user data"
exit 0
fi
echo "[1/6] First install (--yes bypasses consent banner)..."
"${CLI[@]}" daemon install --yes
if [[ ! -f "$UNIT" ]]; then
echo "FAIL: unit not created at $UNIT"
exit 1
fi
# Pitfall 5 sanity: rendered unit has absolute python path
if ! grep -q "$PY" "$UNIT"; then
echo "FAIL: unit does not contain absolute sys.executable ($PY)"
cat "$UNIT"
exit 1
fi
echo "[2/6] Verify systemctl shows the unit as enabled..."
if ! systemctl --user is-enabled iai-mcp-daemon.service 2>/dev/null | grep -q enabled; then
echo "WARN: unit not enabled (may be expected on minimal CI sessions)"
fi
echo "[3/6] Second install -- must be idempotent..."
if ! "${CLI[@]}" daemon install --yes; then
echo "FAIL: install #2 returned non-zero"
exit 1
fi
if [[ ! -f "$UNIT" ]]; then
echo "FAIL: unit missing after install #2"
exit 1
fi
# Seed state files so we can verify C4 cleanup actually removes them.
mkdir -p "$STATE_DIR"
touch "$LOCK" "$SOCK"
echo "{}" > "$STATE"
echo "[4/6] First uninstall (C4: remove unit + 3 state files)..."
"${CLI[@]}" daemon uninstall --yes
if [[ -f "$UNIT" ]]; then
echo "FAIL: unit not removed"
exit 1
fi
if [[ -f "$LOCK" ]]; then
echo "FAIL: lock file not removed (C4 violation)"
exit 1
fi
if [[ -f "$SOCK" ]]; then
echo "FAIL: socket file not removed (C4 violation)"
exit 1
fi
if [[ -f "$STATE" ]]; then
echo "FAIL: state file not removed (C4 violation)"
exit 1
fi
echo "[5/6] Second uninstall -- must be idempotent..."
if ! "${CLI[@]}" daemon uninstall --yes; then
echo "FAIL: uninstall #2 returned non-zero"
exit 1
fi
echo "[6/6] Dry-run on Linux prints unit content + does NOT write..."
"${CLI[@]}" daemon install --dry-run --yes | grep -q "iai_mcp.daemon" || {
echo "FAIL: dry-run did not print unit content"
exit 1
}
if [[ -f "$UNIT" ]]; then
echo "FAIL: dry-run wrote $UNIT -- it must be a no-write preview"
exit 1
fi
echo "PASS: systemd install/uninstall idempotency + C4 + Pitfall 5"
exit 0

189
tests/test_aaak.py Normal file
View file

@ -0,0 +1,189 @@
"""Tests for the AAAK index generator + English-raw enforcement (D-08, TOK-10).
D-08 constitutional rule:
- Storage is RAW VERBATIM English always.
- AAAK is a RETRIEVAL VIEW only: wing/room/entities/tags metadata string.
- The index MUST NOT contain literal_surface content.
TOK-10:
- Non-English literal_surface must be flagged with a `raw:<lang>` tag; unflagged
non-English content raises ValueError at write time via enforce_english_raw.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import UUID, uuid4
import pytest
from iai_mcp.aaak import (
enforce_english_raw,
generate_aaak_index,
parse_aaak_index,
)
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _make(
tier: str = "episodic",
text: str = "hello world",
tags: list[str] | None = None,
community_id: UUID | None = None,
language: str = "en",
) -> MemoryRecord:
return MemoryRecord(
id=uuid4(),
tier=tier,
literal_surface=text,
aaak_index="",
embedding=[0.1] * EMBED_DIM,
community_id=community_id,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
tags=list(tags) if tags else [],
language=language,
)
# ------------------------------------------------ generate_aaak_index format
def test_aaak_index_has_exactly_three_slashes():
"""Format invariant: W:<>/R:<>/E:<>/T:<> -> 3 separators regardless of content."""
r = _make()
idx = generate_aaak_index(r)
assert idx.count("/") == 3
def test_aaak_index_starts_with_wing_marker():
r = _make(tier="semantic")
idx = generate_aaak_index(r)
assert idx.startswith("W:S/")
def test_aaak_index_has_four_key_value_segments():
r = _make(tier="episodic", tags=["entity:Alice", "project", "raw:en"])
idx = generate_aaak_index(r)
parts = idx.split("/")
assert len(parts) == 4
assert parts[0].startswith("W:")
assert parts[1].startswith("R:")
assert parts[2].startswith("E:")
assert parts[3].startswith("T:")
def test_aaak_index_includes_entity_tag_stripped():
r = _make(tags=["entity:Alice", "entity:IAI-MCP", "project"])
idx = generate_aaak_index(r)
# entity: prefix stripped; entities comma-joined
assert "Alice" in idx.split("/E:")[1]
assert "IAI-MCP" in idx.split("/E:")[1]
def test_aaak_index_deterministic():
"""Same record -> same index on repeat calls."""
r = _make(tags=["entity:X", "flag"])
assert generate_aaak_index(r) == generate_aaak_index(r)
# -------------------------------------------------------------- no-leak
def test_aaak_index_does_not_contain_literal_surface():
"""Constitutional: literal_surface MUST NOT appear anywhere in the index."""
verbatim = "Alice mentioned the SECRET_PASSWORD_ABC_XYZ on day 3"
r = _make(text=verbatim, tags=["entity:Alice", "project"])
idx = generate_aaak_index(r)
assert verbatim not in idx
assert "SECRET_PASSWORD_ABC_XYZ" not in idx
def test_aaak_index_unknown_community_marker():
"""community_id=None -> room becomes 'unknown'."""
r = _make(community_id=None)
idx = generate_aaak_index(r)
assert "R:unknown" in idx
def test_aaak_index_dash_when_no_entities():
r = _make(tags=["project"])
idx = generate_aaak_index(r)
# No entity: tags -> E:-
assert "/E:-/" in idx
# -------------------------------------------------------- parse round-trip
def test_parse_aaak_index_round_trips_entities_and_tags():
"""parse(generate(r)) recovers the entity + tag lists."""
r = _make(tier="semantic", tags=["entity:Alice", "entity:IAI", "project", "urgent"])
idx = generate_aaak_index(r)
parsed = parse_aaak_index(idx)
assert parsed["wing"] == ["S"]
assert parsed["entities"] == ["Alice", "IAI"]
assert set(parsed["tags"]) == {"project", "urgent"}
def test_parse_aaak_dash_segments_become_empty_lists():
r = _make(tags=[])
idx = generate_aaak_index(r)
parsed = parse_aaak_index(idx)
assert parsed["entities"] == []
assert parsed["tags"] == []
# ------------------------------------------ TOK-10 English-raw enforcement
def test_enforce_english_raw_accepts_pure_english():
r = _make(text="Alice said the IAI-MCP project is go")
# Should not raise
enforce_english_raw(r)
def test_enforce_english_raw_rejects_cyrillic_without_tag():
r = _make(text="Alice said: пусть сохранится точно", tags=["project"])
with pytest.raises(ValueError) as exc:
enforce_english_raw(r)
assert "constitutional" in str(exc.value)
def test_enforce_english_raw_accepts_cyrillic_with_raw_tag():
r = _make(
text="Alice said: пусть сохранится точно",
tags=["raw:ru", "project"],
)
# With explicit raw:ru declaration the rule is satisfied.
enforce_english_raw(r)
def test_enforce_english_raw_rejects_cjk_without_tag():
r = _make(text="Hello 世界 verbatim", tags=[])
with pytest.raises(ValueError):
enforce_english_raw(r)
def test_enforce_english_raw_rejects_hiragana_without_tag():
r = _make(text="Hello こんにちは world", tags=[])
with pytest.raises(ValueError):
enforce_english_raw(r)
def test_enforce_english_raw_accepts_cjk_with_raw_tag():
r = _make(text="Hello 世界", tags=["raw:zh"])
enforce_english_raw(r)
def test_enforce_english_raw_empty_text_passes():
r = _make(text="")
enforce_english_raw(r)

View file

@ -0,0 +1,128 @@
"""Tests for TOK-06 active-inference retrieval gate (Plan 02-04 Task 2, D-26).
D-26 contract: skip full pipeline_recall when expected free-energy reduction
is less than 0.2 bits. Trivial cues (greetings, "thanks", very short strings)
short-circuit to L0-only.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import uuid4
import pytest
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord
def test_theta_skip_constant():
from iai_mcp.gate import THETA_SKIP
assert THETA_SKIP == 0.2
def test_efer_empty_is_zero():
from iai_mcp.gate import expected_free_energy_reduction
assert expected_free_energy_reduction("") == 0.0
def test_efer_trivial_greeting_is_below_theta():
from iai_mcp.gate import THETA_SKIP, expected_free_energy_reduction
for cue in ("hi", "hello", "thanks", "ok", "yes", "no"):
val = expected_free_energy_reduction(cue)
assert val < THETA_SKIP, f"cue={cue!r} val={val}"
def test_efer_rich_is_above_theta():
from iai_mcp.gate import THETA_SKIP, expected_free_energy_reduction
rich = (
"explain how CLS replay interacts with schema induction under "
"monotropic attention"
)
val = expected_free_energy_reduction(rich)
assert val > THETA_SKIP
def test_should_skip_retrieval_trivial():
from iai_mcp.gate import should_skip_retrieval
skip, reason = should_skip_retrieval("hi")
assert skip is True
assert reason
def test_should_skip_retrieval_informative():
from iai_mcp.gate import should_skip_retrieval
skip, _reason = should_skip_retrieval(
"What did we discuss about auth last week?"
)
assert skip is False
def test_should_skip_very_short_cue():
"""Cues shorter than 3 chars always skip (no discriminable signal)."""
from iai_mcp.gate import should_skip_retrieval
skip, _ = should_skip_retrieval("a")
assert skip is True
skip, _ = should_skip_retrieval("")
assert skip is True
def test_pipeline_recall_skip_path_returns_minimal_response(tmp_path, monkeypatch):
"""When gate triggers, pipeline_recall must return the L0 record only."""
from iai_mcp import embed as embed_mod
from iai_mcp.core import _seed_l0_identity, dispatch
class _FakeEmbedder:
DIM = EMBED_DIM
DEFAULT_DIM = EMBED_DIM
DEFAULT_MODEL_KEY = "fake"
def __init__(self, *args, **kwargs):
self.DIM = EMBED_DIM
def embed(self, text: str) -> list[float]:
return [1.0] + [0.0] * (EMBED_DIM - 1)
def embed_batch(self, texts):
return [self.embed(t) for t in texts]
monkeypatch.setattr(embed_mod, "Embedder", _FakeEmbedder)
store = MemoryStore(path=tmp_path)
_seed_l0_identity(store)
# Insert extra records so the pipeline branch would normally run.
now = datetime.now(timezone.utc)
for i in range(3):
rec = MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=f"extra fact {i}",
aaak_index="",
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=[],
language="en",
)
store.insert(rec)
resp = dispatch(store, "memory_recall", {"cue": "hi", "session_id": "s-trivial"})
assert "budget_used" in resp
# Retrieval skip reduces budget dramatically (<50 tokens typical).
assert resp["budget_used"] < 200

69
tests/test_art_gate.py Normal file
View file

@ -0,0 +1,69 @@
"""ART vigilance gate tests (MEM-03, D-07, D-14)."""
from __future__ import annotations
from iai_mcp.types import EMBED_DIM
from iai_mcp.write import VIGILANCE_RHO, apply_art_gate, cosine
from tests.test_store import _make
def test_vigilance_rho_is_0_95():
"""ρ fixed at 0.95 for Phase 1."""
assert VIGILANCE_RHO == 0.95
def test_empty_store_creates():
new = _make()
action, target = apply_art_gate([], new)
assert action == "create"
assert target == new.id
def test_high_similarity_merges():
"""Nearly-identical vectors -> merge target is the existing record."""
existing = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1))
candidate = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1)) # same vector
action, target = apply_art_gate([existing], candidate)
assert action == "merge"
assert target == existing.id
def test_low_similarity_creates():
"""Orthogonal vectors -> cosine 0 < 0.95 -> create new."""
existing = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1))
candidate = _make(vec=[0.0] * (EMBED_DIM - 1) + [1.0])
action, target = apply_art_gate([existing], candidate)
assert action == "create"
assert target == candidate.id
def test_moderate_similarity_below_rho_creates():
"""cos = 0.90 < 0.95 -> create."""
existing = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1))
# Construct a vector with cosine exactly 0.90 to the existing one.
# If we take [0.9, sqrt(1 - 0.81), 0, 0, ...] with unit norm, cosine = 0.9
import math
y = math.sqrt(1 - 0.9 * 0.9)
candidate = _make(vec=[0.9, y] + [0.0] * (EMBED_DIM - 2))
sim = cosine(existing.embedding, candidate.embedding)
assert abs(sim - 0.9) < 1e-6
action, target = apply_art_gate([existing], candidate)
assert action == "create"
assert target == candidate.id
def test_never_merge_record_skipped():
"""records with never_merge=True (L0 identity) are never merge targets."""
pinned = _make(
vec=[1.0] + [0.0] * (EMBED_DIM - 1),
pinned=True,
never_merge=True,
)
candidate = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1)) # identical vector
action, target = apply_art_gate([pinned], candidate)
assert action == "create"
assert target == candidate.id
def test_cosine_zero_vector_returns_zero():
assert cosine([0.0, 0.0, 0.0], [1.0, 2.0, 3.0]) == 0.0
assert cosine([1.0, 0.0], [0.0, 0.0]) == 0.0

View file

@ -0,0 +1,215 @@
"""Tests for autistic-kernel knob registry: 10 AUTIST + 1 wake_depth = 11 sealed.
History: flipped the 9 Phase-2 deferred knobs to phase=1.
PHASE_1_LIVE became a 13-member frozenset, then 14 with flip, then 15
after wake_depth append. Plan 07.12-02 removed 4 dead KnobSpec
entries (AUTIST-02 sensory_channel_weights, event_vs_time_cue,
AUTIST-11 alexithymia_accommodation, double_empathy) final shape
is 11 sealed entries, 10 AUTIST + wake_depth.
Schema/value validation covers enum/bool/int_range/float_range and
`dict:<keytype>:<valuetype>` for monotropism_depth (recursive per-key
validation). dunn_quadrant keeps the enum shape but gains a
float_range-style HIPPEA_precision_spec that migrates cleanly.
"""
from __future__ import annotations
import pytest
from iai_mcp.profile import (
PHASE_1_LIVE,
PHASE_2_DEFERRED,
PHASE_3_DEFERRED,
PROFILE_KNOBS,
default_state,
profile_get,
profile_set,
)
# --------------------------------------------------------------- registry shape
def test_phase_1_live_has_14_knobs():
"""Plan 07.12-02: 10 autistic-kernel + wake_depth = 11 live.
Test name kept for git stability (was 14 pre-MCP-12, 15 post-MCP-12, 11
after Plan 07.12-02 removed AUTIST-02/08/11/12). The autistic-kernel-only
invariant (10) is checked via filter in test_all_14_requirement_ids_present.
"""
assert len(PHASE_1_LIVE) == 11
def test_phase_3_deferred_now_empty_after_autist13_flip():
"""camouflaging_relaxation moved from phase=3 to phase=1."""
assert PHASE_3_DEFERRED == frozenset()
assert len(PHASE_3_DEFERRED) == 0
def test_phase_2_deferred_empty():
"""All 9 Phase-2 knobs move to phase=1."""
assert PHASE_2_DEFERRED == frozenset()
assert len(PHASE_2_DEFERRED) == 0
def test_all_14_requirement_ids_present():
"""Plan 07.12-02: autistic-kernel slice has exactly 10 knobs (AUTIST-02/08/11/12 removed).
appended wake_depth bringing the registry to 15 entries.
Plan 07.12-02 removed 4 dead knobs (AUTIST-02/08/11/12) for final shape
of 11 sealed entries (10 AUTIST + 1 MCP-12). Test name kept for git stability.
"""
autist_specs = [
s for s in PROFILE_KNOBS.values() if s.requirement_id.startswith("AUTIST-")
]
assert len(autist_specs) == 10
req_ids = {spec.requirement_id for spec in autist_specs}
expected = {
"AUTIST-01", "AUTIST-03", "AUTIST-04", "AUTIST-05",
"AUTIST-06", "AUTIST-07", "AUTIST-09", "AUTIST-10",
"AUTIST-13", "AUTIST-14",
}
assert req_ids == expected
# Registry total includes the operator-facing wake_depth knob.
assert len(PROFILE_KNOBS) == 11
assert "wake_depth" in PROFILE_KNOBS
assert PROFILE_KNOBS["wake_depth"].requirement_id == "MCP-12"
# ------------------------------------------------------- dict-schema validator
def test_monotropism_depth_live_accepts_dict():
"""monotropism_depth is a per-domain dict[str, float_range:0..1]."""
state = default_state()
r = profile_set(
"monotropism_depth",
{"coding": 0.8, "gardening": 0.3},
state,
)
assert r["status"] == "ok"
assert state["monotropism_depth"] == {"coding": 0.8, "gardening": 0.3}
def test_monotropism_depth_live_rejects_out_of_range():
state = default_state()
r = profile_set("monotropism_depth", {"x": 1.5}, state)
assert r["status"] == "error"
def test_monotropism_depth_live_rejects_non_dict():
state = default_state()
r = profile_set("monotropism_depth", 3, state)
assert r["status"] == "error"
# Plan 07.12-02 removed test_sensory_channel_weights_live_accepts_dict /
# test_sensory_channel_weights_live_rejects_out_of_range — was a
# DEAD knob (declared but never read in any production scoring/response code);
# the registry entry was removed and profile_set now returns the unknown-knob
# error. See tests/test_profile_no_dead_knobs.py for the post-removal contract.
# ------------------------------------------------------- enum-schema validator
def test_dunn_quadrant_live():
state = default_state()
r = profile_set("dunn_quadrant", "seeking", state)
assert r["status"] == "ok"
assert state["dunn_quadrant"] == "seeking"
def test_dunn_quadrant_rejects_garbage():
state = default_state()
r = profile_set("dunn_quadrant", "garbage", state)
assert r["status"] == "error"
def test_demand_avoidance_tolerance_live():
state = default_state()
for value in ("collaborative", "neutral", "imperative"):
r = profile_set("demand_avoidance_tolerance", value, state)
assert r["status"] == "ok", f"expected {value} accepted"
assert state["demand_avoidance_tolerance"] == "imperative"
# Plan 07.12-02 removed test_event_vs_time_cue_live / test_alexithymia_accommodation_live —
# (event_vs_time_cue) and (alexithymia_accommodation) were
# DEAD knobs (no taxonomy in schema, never read in production). Removed from
# registry; profile_set now returns the unknown-knob error.
# See tests/test_profile_no_dead_knobs.py for the post-removal contract.
# ----------------------------------------------------- bool-schema validator
def test_inertia_awareness_live():
state = default_state()
r_ok = profile_set("inertia_awareness", True, state)
assert r_ok["status"] == "ok"
r_bad = profile_set("inertia_awareness", 1, state)
assert r_bad["status"] == "error"
# Plan 07.12-02 removed test_double_empathy_live — (double_empathy)
# was promoted to a passive system invariant (CLAUDE.md "Architectural
# Invariants — Pinned"); the system never translates phrasing toward NT style
# at any path, so a runtime knob was redundant. Removed from registry.
# See tests/test_profile_no_dead_knobs.py for the post-removal contract.
# ----------------------------------------------------- float-schema validator
def test_interest_boost_live():
state = default_state()
r_ok = profile_set("interest_boost", 0.75, state)
assert r_ok["status"] == "ok"
r_bad = profile_set("interest_boost", 2.0, state)
assert r_bad["status"] == "error"
# ----------------------------------------------------- HIPPEA_precision spec
def test_HIPPEA_precision_spec_added_wire_to_autist_03():
"""AUTIST-03 now maps to dunn_quadrant (enum) AND exposes a
HIPPEA_precision float knob via the dict-key mechanism on a per-domain map
OR via a float_range schema.
For we require either:
- PROFILE_KNOBS["HIPPEA_precision"] exists with float_range:0.0..1.0, or
- PROFILE_KNOBS["dunn_quadrant"] value_schema carries float-range metadata
Accept the simpler form: a new "HIPPEA_precision" knob with requirement id
or a companion 'autist_03_float' marker on dunn_quadrant.
"""
# Check one of the two shapes is present.
if "HIPPEA_precision" in PROFILE_KNOBS:
spec = PROFILE_KNOBS["HIPPEA_precision"]
# Must be a float range between 0 and 1.
assert "float_range:" in spec.value_schema
else:
# dunn_quadrant remains but must retain an enum schema (migration-aware)
spec = PROFILE_KNOBS["dunn_quadrant"]
assert spec.value_schema.startswith("enum:")
# ----------------------------------------------------- profile_get coverage
def test_profile_get_returns_14_live_entries():
"""Plan 07.12-02: 11 live (10 autistic + wake_depth MCP-12). Test name kept for git stability."""
state = default_state()
result = profile_get(None, state)
assert len(result["live"]) == 11
assert len(result["deferred"]) == 0
def test_profile_get_monotropism_depth_returns_default_dict():
state = default_state()
r = profile_get("monotropism_depth", state)
assert r["knob"] == "monotropism_depth"
assert "value" in r
# Default is a dict (per-domain storage)
assert isinstance(r["value"], dict)

120
tests/test_batch_api.py Normal file
View file

@ -0,0 +1,120 @@
"""Tests for TOK-09 Batch API consolidation (Plan 02-04 Task 3, D-29).
submit_batch_consolidation passes through D-GUARD (should_call_llm) before
any network work. On Tier 0 fallback (no llm_enabled, no api key, budget
exceeded, ratelimit cooldown) returns stub results + writes llm_health
event. scope: the gate + event side-effects are load-bearing;
the real anthropic.batches.create call is stubbed (SDK surface varies).
"""
from __future__ import annotations
import pytest
from iai_mcp.events import query_events
from iai_mcp.guard import BudgetLedger, RateLimitLedger
from iai_mcp.store import MemoryStore
def _tasks(n: int = 3) -> list[dict]:
return [
{
"task_id": f"t{i}",
"prompt": f"summarise cluster {i}",
"prompt_tok": 500,
"output_tok": 200,
}
for i in range(n)
]
def test_batch_fallback_when_llm_disabled(tmp_path):
from iai_mcp.batch import submit_batch_consolidation
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
ok, reason, results = submit_batch_consolidation(
store, _tasks(), budget, rate, llm_enabled=False,
)
assert ok is False
assert "llm_enabled" in reason.lower() or "disabled" in reason.lower()
# Fallback returns an empty-but-structured list so downstream consumers
# don't crash on a None.
assert isinstance(results, list)
def test_batch_fallback_when_no_api_key(tmp_path, monkeypatch):
from iai_mcp.batch import submit_batch_consolidation
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
ok, reason, _ = submit_batch_consolidation(
store, _tasks(), budget, rate, llm_enabled=True,
)
assert ok is False
# D-GUARD step 2.
assert "api" in reason.lower() or "key" in reason.lower()
def test_batch_emits_llm_health_on_fallback(tmp_path):
from iai_mcp.batch import submit_batch_consolidation
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
submit_batch_consolidation(
store, _tasks(), budget, rate, llm_enabled=False,
)
events = query_events(store, kind="llm_health")
fallback_events = [
e for e in events
if e["data"].get("component") == "batch_consolidation"
]
assert len(fallback_events) >= 1
def test_batch_50pct_discount():
"""Pricing helper returns 50% of sync cost per D-29."""
from iai_mcp.batch import BATCH_DISCOUNT, _sync_tier_cost
sync = _sync_tier_cost(1_000_000, 1_000_000)
# Haiku 4.5 approximate -- not exact numbers, just shape.
assert sync > 0
discounted = sync * BATCH_DISCOUNT
assert discounted == sync * 0.5
assert BATCH_DISCOUNT == 0.5
def test_batch_records_spend_when_eligible(tmp_path, monkeypatch):
"""Eligible path records a discounted spend to BudgetLedger."""
from iai_mcp.batch import submit_batch_consolidation
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
before = budget.daily_used()
ok, _reason, _results = submit_batch_consolidation(
store, _tasks(5), budget, rate, llm_enabled=True,
)
after = budget.daily_used()
# Whether the SDK is present or not, the eligible gate records a nominal
# spend (Plan 02-04 scaffolds the budget side-effect; real batch API is
# implemented via mock/stub so tests don't hit the network).
if ok:
assert after >= before
else:
# If the SDK is unavailable, spend should NOT increase (we never
# got past the gate).
assert after == before
def test_sync_tier_cost_monotonic():
"""Longer prompts cost more."""
from iai_mcp.batch import _sync_tier_cost
a = _sync_tier_cost(1000, 500)
b = _sync_tier_cost(2000, 500)
assert b > a

199
tests/test_batch_guard.py Normal file
View file

@ -0,0 +1,199 @@
"""Tests for 02-REVIEW.md H-02 (batch scaffold silently debits budget +
flips effective_tier=tier1 on a stub that produces no output).
Bug: submit_batch_consolidation called budget.record_spend BEFORE the real
SDK call and returned (True, "ok", []). run_heavy_consolidation then saw
ok_batch=True and set effective_tier="tier1", logging it in the
consolidation event. Users inspecting `iai-mcp audit` saw Tier-1 events
that were factually false.
Fix:
- Scaffold path returns (False, "stub: batch API not yet wired", []).
- NO budget.record_spend call during the stub period.
- Emit one info-severity llm_health event documenting the gap so the
audit CLI reflects honest state.
- run_heavy_consolidation sees ok_batch=False and keeps tier0; the
cls_consolidation_run event payload carries batch_submitted=False.
Constitutional contract (D-GUARD budget honesty + audit repudiability):
Budget ledger rows MUST correspond to real API spend. Tier flags in
the event log MUST correspond to real Tier-1 output. Both invariants
were silently violated by the scaffold.
"""
from __future__ import annotations
import pytest
from iai_mcp.events import query_events
from iai_mcp.guard import BudgetLedger, RateLimitLedger
from iai_mcp.store import MemoryStore
def _tasks(n: int = 1) -> list[dict]:
return [
{
"task_id": f"t{i}",
"prompt": f"summarise cluster {i}",
"prompt_tok": 500,
"output_tok": 200,
}
for i in range(n)
]
# ==================================================== H-02: batch scaffold guard
def test_batch_stub_returns_false_with_scaffold_reason(tmp_path, monkeypatch):
"""Stub path must return (False, "stub: batch API not yet wired", [])
even when all D-GUARD steps pass (API key + llm_enabled + budget + rate
all clean). This is the load-bearing assertion that neutralises the
tier1 flip."""
from iai_mcp.batch import submit_batch_consolidation
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
ok, reason, results = submit_batch_consolidation(
store, _tasks(3), budget, rate, llm_enabled=True,
)
assert ok is False, "scaffold must return ok=False until real SDK wire-up lands"
assert reason.startswith("stub:"), (
f"reason must advertise scaffold status, got {reason!r}"
)
assert "batch API not yet wired" in reason
assert results == [], "scaffold produces empty result list"
def test_batch_stub_does_not_debit_budget(tmp_path, monkeypatch):
"""Budget MUST NOT increase during the scaffold period. Only a real
successful anthropic.batches.create response may record spend."""
from iai_mcp.batch import submit_batch_consolidation
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
before_daily = budget.daily_used()
before_monthly = budget.monthly_used()
submit_batch_consolidation(
store, _tasks(5), budget, rate, llm_enabled=True,
)
after_daily = budget.daily_used()
after_monthly = budget.monthly_used()
assert after_daily == before_daily, (
f"daily spend changed during stub: {before_daily} -> {after_daily}"
)
assert after_monthly == before_monthly
def test_batch_stub_emits_info_llm_health_event(tmp_path, monkeypatch):
"""Observability contract: scaffold state must be visible in the events
table so `iai-mcp audit` observers can see the gap explicitly.
Severity=info (not warning/critical) because this is intentional
scaffold behaviour, not an error."""
from iai_mcp.batch import submit_batch_consolidation
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
submit_batch_consolidation(
store, _tasks(), budget, rate, llm_enabled=True,
)
events = query_events(store, kind="llm_health")
batch_events = [
e for e in events
if e["data"].get("component") == "batch_consolidation"
]
assert len(batch_events) >= 1, "must emit llm_health for batch stub"
ev = batch_events[0]
assert ev["severity"] == "info", (
f"scaffold event must be info-severity, got {ev['severity']!r}"
)
note = ev["data"].get("note") or ""
assert "scaffold" in note.lower() or "not yet wired" in note.lower(), (
f"event note must advertise scaffold/not-yet-wired status, got {note!r}"
)
def test_run_heavy_does_not_flip_tier1_on_stub(tmp_path, monkeypatch):
"""run_heavy_consolidation must not set effective_tier='tier1' while
submit_batch_consolidation is a stub. Even when the D-GUARD ladder
greenlights Tier-1 (key + enabled + budget + rate), ok_batch=False so
the caller stays on Tier-0."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
cfg = SleepConfig(llm_enabled=True)
result = run_heavy_consolidation(
store,
session_id="h-stub",
config=cfg,
budget=budget,
rate=rate,
has_api_key=True,
)
assert result["tier"] == "tier0", (
f"effective_tier must stay tier0 during scaffold, got {result['tier']!r}"
)
# cls_consolidation_run event has batch_submitted=False
events = query_events(store, kind="cls_consolidation_run")
heavy = [e for e in events if e["data"].get("mode") == "heavy"]
assert len(heavy) >= 1
assert heavy[0]["data"]["batch_submitted"] is False, (
"batch_submitted flag must honestly reflect stub state"
)
# tier_eligible still records that the D-GUARD ladder was CONSULTED (tier1)
# even though effective_tier is tier0 -- lets auditors see the gap.
assert heavy[0]["data"].get("tier") == "tier0"
def test_run_heavy_does_not_debit_budget_during_stub(tmp_path, monkeypatch):
"""End-to-end: running heavy consolidation with full Tier-1 eligibility
must leave the budget untouched because submit_batch_consolidation is a
stub."""
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
store = MemoryStore(path=tmp_path)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
before = budget.daily_used()
cfg = SleepConfig(llm_enabled=True)
run_heavy_consolidation(
store,
session_id="h-no-debit",
config=cfg,
budget=budget,
rate=rate,
has_api_key=True,
)
# Note: schema_induction_tier1 also records a small spend when eligible.
# We assert the batch_consolidation row specifically is NOT present.
tbl = store.db.open_table("budget_ledger")
df = tbl.to_pandas()
if not df.empty:
batch_rows = df[df["kind"] == "batch_consolidation"]
assert len(batch_rows) == 0, (
"stub must not record a batch_consolidation spend row"
)

348
tests/test_bedtime.py Normal file
View file

@ -0,0 +1,348 @@
"""Tests for iai_mcp.bedtime -- Task 1.
Covers 14 behaviours from the plan:
1. English positive -- "good night" / "heading to bed" / "tired"
2. English negative (phrase alone, no dual-gate)
3. Russian positive
4. Japanese positive
5. Arabic positive
6. de/fr/es/zh positive (one phrase per language at minimum)
7. Cross-lingual fallback -- EN always tried; RU NOT tried under language="en"
8. Dual-gate: phrase alone NOT enough (no quiet window -> None)
9. Dual-gate: inside quiet window -> dict
10. Dual-gate: within 30min of start -> dict
11. Dual-gate: 1h before start -> None
12. Fixture-driven corpus: 5 positive + 5 negative per language
13. False positive rate < 10% on phrase-only check across all 8 fixtures
14. ReDoS protection: 10KB input under 100ms total across all patterns
"""
from __future__ import annotations
import time
from datetime import datetime, timezone
from pathlib import Path
from zoneinfo import ZoneInfo
import pytest
from iai_mcp import bedtime
from iai_mcp.bedtime import (
WIND_DOWN_BY_LANG,
WIND_DOWN_GATE_MINUTES_BEFORE,
WIND_DOWN_LANGUAGES_SUPPORTED,
detect_wind_down,
detect_wind_down_phrase,
is_late_in_quiet_window,
)
UTC = timezone.utc
FIXTURES = Path(__file__).parent / "fixtures" / "bedtime"
# ---------------------------------------------------------------- phrase gate
def test_english_positive() -> None:
for cue in [
"good night",
"I'm heading to bed",
"I'm tired, going to sleep",
"catch you tomorrow",
"it's bedtime",
"Goodnight!",
]:
matched, pattern = detect_wind_down_phrase(cue, "en")
assert matched, f"expected EN positive for {cue!r}"
assert pattern
def test_english_phrase_matches_even_rhetorical() -> None:
"""Phrase alone IS enough for the phrase gate -- the dual gate adds
the quiet-window filter. This test locks the phrase behaviour in
isolation so dual-gate tests can differentiate."""
cue = "the villain said good night and laughed"
matched, pattern = detect_wind_down_phrase(cue, "en")
assert matched, "phrase gate alone is intentionally permissive"
assert "night" in pattern.lower()
def test_russian_positive() -> None:
for cue in [
"пойду спать",
"спокойной ночи",
"устал, иду в постель",
"до завтра",
"пора ложиться",
]:
matched, _ = detect_wind_down_phrase(cue, "ru")
assert matched, f"expected RU positive for {cue!r}"
def test_japanese_positive() -> None:
for cue in [
"おやすみ",
"おやすみなさい",
"寝ます",
"また明日",
"疲れた",
]:
matched, _ = detect_wind_down_phrase(cue, "ja")
assert matched, f"expected JA positive for {cue!r}"
def test_arabic_positive() -> None:
for cue in [
"تصبح على خير",
"ليلة سعيدة",
"أنا متعب سأنام",
]:
matched, _ = detect_wind_down_phrase(cue, "ar")
assert matched, f"expected AR positive for {cue!r}"
def test_de_fr_es_zh_positive() -> None:
cases: dict[str, list[str]] = {
"de": ["gute Nacht", "ich bin müde", "bis morgen"],
"fr": ["bonne nuit", "je suis fatigué", "à demain"],
"es": ["buenas noches", "estoy cansado", "hasta mañana"],
"zh": ["晚安", "我要睡觉", "累了"],
}
for lang, cues in cases.items():
for cue in cues:
matched, _ = detect_wind_down_phrase(cue, lang)
assert matched, f"expected {lang.upper()} positive for {cue!r}"
def test_cross_lingual_en_is_fallback_but_ru_is_not() -> None:
# EN fallback always tried: "good night" under language="ru" still matches.
matched_en_under_ru, _ = detect_wind_down_phrase("good night", "ru")
assert matched_en_under_ru, "EN fallback must trigger regardless of language"
# RU is NOT tried under language="en": a purely Russian cue must NOT match.
matched_ru_under_en, _ = detect_wind_down_phrase("я пойду спать", "en")
assert not matched_ru_under_en, (
"RU phrases must not fall back under language=en"
)
def test_phrase_empty_cue_no_match() -> None:
assert detect_wind_down_phrase("", "en") == (False, "")
assert detect_wind_down_phrase("", "ru") == (False, "")
def test_phrase_unknown_language_still_tries_english() -> None:
"""Language we don't support (e.g. 'ko') must still try EN fallback."""
matched, _ = detect_wind_down_phrase("good night", "ko")
assert matched, "EN fallback required for unsupported languages too"
# ---------------------------------------------------------------- quiet-window gate
def _utc(y: int, m: int, d: int, hh: int, mm: int = 0) -> datetime:
return datetime(y, m, d, hh, mm, tzinfo=UTC)
def test_is_late_no_window() -> None:
assert is_late_in_quiet_window(None, _utc(2026, 4, 18, 22, 0), UTC) is False
def test_is_late_inside_window() -> None:
# window = (44, 16) means start at bucket 44 = 22:00, duration 8h.
# 23:30 local should be inside.
assert is_late_in_quiet_window(
(44, 16), _utc(2026, 4, 18, 23, 30), UTC,
) is True
def test_is_late_within_30min_of_start() -> None:
# start 22:00, now 21:45 -> within 30min -> True.
assert is_late_in_quiet_window(
(44, 16), _utc(2026, 4, 18, 21, 45), UTC,
) is True
def test_is_late_exactly_30min_before_start() -> None:
# Boundary: 21:30 should still count (within 30min threshold, inclusive).
assert is_late_in_quiet_window(
(44, 16), _utc(2026, 4, 18, 21, 30), UTC,
) is True
def test_is_late_one_hour_before_start() -> None:
# start 22:00, now 21:00 -> 60min before -> False.
assert is_late_in_quiet_window(
(44, 16), _utc(2026, 4, 18, 21, 0), UTC,
) is False
def test_is_late_window_wraps_midnight() -> None:
# window = (44, 16): 22:00 start + 8h = 06:00 next morning.
# 02:30 local should be inside (post-midnight part of the window).
assert is_late_in_quiet_window(
(44, 16), _utc(2026, 4, 19, 2, 30), UTC,
) is True
def test_is_late_outside_window_afternoon() -> None:
# window = (44, 16): 22:00-06:00. 15:00 afternoon -> outside + not within 30min.
assert is_late_in_quiet_window(
(44, 16), _utc(2026, 4, 18, 15, 0), UTC,
) is False
# ---------------------------------------------------------------- dual-gate
def test_dual_gate_phrase_alone_not_enough() -> None:
# Phrase matches but no quiet window set -> None.
result = detect_wind_down(
"good night", "en", state={}, now=_utc(2026, 4, 18, 12, 0), tz=UTC,
)
assert result is None
def test_dual_gate_no_phrase_inside_window() -> None:
# Inside window but no phrase match -> None.
result = detect_wind_down(
"let me check the code",
"en",
state={"quiet_window": (44, 16)},
now=_utc(2026, 4, 18, 23, 30),
tz=UTC,
)
assert result is None
def test_dual_gate_both_pass_inside_window() -> None:
result = detect_wind_down(
"good night",
"en",
state={"quiet_window": (44, 16)},
now=_utc(2026, 4, 18, 23, 30),
tz=UTC,
)
assert result is not None
assert result["message_hint"] == "user_wind_down_detected"
assert "night" in result["matched_pattern"].lower()
assert result["quiet_window_start_bucket"] == 44
assert result["quiet_window_duration"] == 16
def test_dual_gate_both_pass_30min_before_window() -> None:
# 21:45 local, window starts 22:00 -> within 30min threshold.
result = detect_wind_down(
"good night",
"en",
state={"quiet_window": (44, 16)},
now=_utc(2026, 4, 18, 21, 45),
tz=UTC,
)
assert result is not None
assert result["quiet_window_start_bucket"] == 44
def test_dual_gate_phrase_but_too_early() -> None:
# 21:00 local, window starts 22:00 -> 60min too early -> None.
result = detect_wind_down(
"good night",
"en",
state={"quiet_window": (44, 16)},
now=_utc(2026, 4, 18, 21, 0),
tz=UTC,
)
assert result is None
# ---------------------------------------------------------------- fixture corpus
_LANGS = sorted(WIND_DOWN_BY_LANG.keys())
@pytest.mark.parametrize("lang", _LANGS)
def test_fixture_corpus(lang: str) -> None:
fp = FIXTURES / f"{lang}.txt"
assert fp.exists(), f"fixture file missing: {fp}"
lines = [
ln.strip()
for ln in fp.read_text(encoding="utf-8").splitlines()
if ln.strip() and not ln.lstrip().startswith("#")
]
assert len(lines) >= 10, f"{lang}: expected >=10 fixture lines, got {len(lines)}"
for line in lines:
assert "\t" in line, f"{lang}: fixture line missing tab separator: {line!r}"
sentence, expected = line.rsplit("\t", 1)
matched, _ = detect_wind_down_phrase(sentence, lang)
assert matched == (expected == "yes"), (
f"{lang}: {sentence!r} expected {expected} got {matched}"
)
def test_fixture_corpus_false_positive_rate_under_10_percent() -> None:
"""Across all 8 languages (80 lines = 40 pos + 40 neg), the phrase-only
false positive rate MUST be < 10%. The dual gate ratchets this down to
the target of <5% in practice."""
fp_count = 0
neg_total = 0
for lang in _LANGS:
fp = FIXTURES / f"{lang}.txt"
for line in fp.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line:
continue
if "\t" not in line:
continue
sentence, expected = line.rsplit("\t", 1)
if expected == "no":
neg_total += 1
matched, _ = detect_wind_down_phrase(sentence, lang)
if matched:
fp_count += 1
assert neg_total >= 40, f"expected >=40 negative fixtures, got {neg_total}"
fpr = fp_count / neg_total
assert fpr < 0.10, (
f"phrase-only FPR {fpr:.2%} exceeds 10% ceiling "
f"({fp_count}/{neg_total}). Tighten fixtures or patterns."
)
# ---------------------------------------------------------------- ReDoS guard
def test_redos_protection_bounded_quantifiers_under_100ms() -> None:
"""All patterns are pre-compiled and use bounded quantifiers.
10KB of 'a' characters must execute in < 100ms across every pattern."""
big = "a" * 10240
deadline = 0.100 # seconds
total_start = time.monotonic()
for lang, patterns in bedtime._COMPILED.items():
for p in patterns:
t0 = time.monotonic()
p.search(big)
if time.monotonic() - t0 > deadline:
pytest.fail(
f"ReDoS suspected: {lang} pattern {p.pattern!r} took "
f">{deadline}s on 10KB input"
)
total_elapsed = time.monotonic() - total_start
assert total_elapsed < 1.0, (
f"combined ReDoS sweep took {total_elapsed:.3f}s (budget 1.0s)"
)
# ---------------------------------------------------------------- coverage sanity
def test_language_coverage_is_exactly_eight_d11() -> None:
"""wind-down regex must cover exactly the 8 shield.py languages."""
assert WIND_DOWN_LANGUAGES_SUPPORTED == frozenset(
{"en", "ru", "ja", "ar", "de", "fr", "es", "zh"},
)
assert len(WIND_DOWN_BY_LANG) == 8
def test_gate_minutes_before_is_thirty_d09() -> None:
"""D-09 dual-gate: 30 minutes before quiet-window start counts as late."""
assert WIND_DOWN_GATE_MINUTES_BEFORE == 30

133
tests/test_bench.py Normal file
View file

@ -0,0 +1,133 @@
"""Tests for the Phase-1 benchmark harnesses (D-15, OPS-01/02/04).
All tests inject `count_tokens_fn` where applicable so no live Anthropic API
calls happen in CI. The actual Anthropic integration is exercised only when
`ANTHROPIC_API_KEY` is set and the CLIs are run directly by hand.
"""
from __future__ import annotations
from bench.tokens import FRESH_LIMIT, STEADY_LIMIT, run_token_bench
from bench.verbatim import ACCURACY_FLOOR, run_verbatim_bench
from iai_mcp.store import MemoryStore
# ---------------------------------------------------------- bench/tokens.py
def test_tokens_steady_pass(tmp_path):
"""Injected counter at 2500 tokens -> both steady_ok and fresh_ok pass."""
store = MemoryStore(path=tmp_path)
res = run_token_bench(store=store, n_runs=3, count_tokens_fn=lambda t: 2500)
assert res["steady_ok"] is True
assert res["fresh_ok"] is True
assert all(w == 2500 for w in res["warm"])
assert res["mode"] == "injected"
assert res["limits"]["steady"] == STEADY_LIMIT
assert res["limits"]["fresh"] == FRESH_LIMIT
def test_tokens_steady_fail(tmp_path):
"""3500 tok > STEADY_LIMIT -> steady_ok False, fails."""
store = MemoryStore(path=tmp_path)
res = run_token_bench(store=store, n_runs=3, count_tokens_fn=lambda t: 3500)
assert res["steady_ok"] is False
def test_tokens_fresh_fail(tmp_path):
"""Fresh prompt at 9000 (> FRESH_LIMIT) triggers fresh_ok=False.
We flip counts via an iterator: first call (fresh) returns 9000, subsequent
warm calls return 2500. Demonstrates the boundary.
"""
store = MemoryStore(path=tmp_path)
counts = iter([9000, 2500, 2500, 2500])
def _counter(_text: str) -> int:
return next(counts)
res = run_token_bench(store=store, n_runs=3, count_tokens_fn=_counter)
assert res["fresh_ok"] is False # 9000 > 8000
assert res["steady_ok"] is True # warm still under 3000
def test_tokens_tiktoken_fallback_mode(tmp_path, monkeypatch):
"""No ANTHROPIC_API_KEY but tiktoken installed -> mode == tiktoken-cl100k-proxy."""
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
store = MemoryStore(path=tmp_path)
res = run_token_bench(store=store, n_runs=3)
assert res["mode"] == "tiktoken-cl100k-proxy"
# Payload on an empty store has no L0/L1/L2/rich_club content, so the warm
# prompt is literally ".", which tiktoken counts as a single token.
# Fresh adds the 1k-chars-tail so remains well under FRESH_LIMIT.
assert res["steady_ok"] is True
assert res["fresh_ok"] is True
def test_tokens_char4_fallback_mode(tmp_path, monkeypatch):
"""No ANTHROPIC_API_KEY and no tiktoken -> mode == heuristic-char4."""
import builtins
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
real_import = builtins.__import__
def _fake_import(name, *args, **kwargs):
if name == "tiktoken":
raise ImportError("tiktoken not available in this scenario")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", _fake_import)
store = MemoryStore(path=tmp_path)
res = run_token_bench(store=store, n_runs=3)
assert res["mode"] == "heuristic-char4"
assert res["steady_ok"] is True
def test_tokens_fresh_prompt_is_larger_than_warm(tmp_path):
"""Sanity: the fresh prompt differs from the warm prompt (has the 1k tail)."""
store = MemoryStore(path=tmp_path)
seen_texts: list[str] = []
def _capture(text: str) -> int:
seen_texts.append(text)
return 100
run_token_bench(store=store, n_runs=1, count_tokens_fn=_capture)
# First call was the fresh prompt; second was the warm prompt.
assert len(seen_texts) == 2
assert len(seen_texts[0]) > len(seen_texts[1])
# -------------------------------------------------------- bench/verbatim.py
def test_verbatim_passes_small_n(tmp_path):
"""Small-N smoke test: pinned records recall at >= 0.99 accuracy."""
store = MemoryStore(path=tmp_path)
res = run_verbatim_bench(
store=store, n_records=10, session_gap=2, noise_per_session=2
)
assert res["accuracy"] >= ACCURACY_FLOOR
assert res["passed"] is True
assert res["hits_exact"] == 10
def test_verbatim_returns_floor_constant(tmp_path):
"""The harness exposes its pass/fail threshold so verifiers can assert it."""
store = MemoryStore(path=tmp_path)
res = run_verbatim_bench(
store=store, n_records=5, session_gap=1, noise_per_session=1
)
assert res["floor"] == ACCURACY_FLOOR
assert res["floor"] == 0.99
def test_verbatim_counts_exact_matches(tmp_path):
"""hits_exact <= n_records and accuracy = hits_exact / n_records."""
store = MemoryStore(path=tmp_path)
res = run_verbatim_bench(
store=store, n_records=5, session_gap=1, noise_per_session=1
)
assert res["hits_exact"] <= res["n_records"]
assert res["accuracy"] == res["hits_exact"] / res["n_records"]

View file

@ -0,0 +1,121 @@
"""OPS-10 regression guard: small-N latency stays under D-SPEED p95 ceiling.
Plan 05-05 (D5-08) CI-runnable guard for bench/neural_map.py at the
small-N end of the matrix. The full N {100, 1k, 5k, 10k} matrix runs
ad-hoc on this dev Mac and is recorded in the published bench report; this
test exercises N=100 only so CI catches regressions in <30s.
D-SPEED contract: p95 < 100 ms at every measured N.
Adds the comparative reference flags to argparse:
--ref-mempalace-p95-ms <float>
--ref-claude-mem-p95-ms <float>
When supplied, the bench's per-N `passed` flag flips to False if IAI's p95
exceeds the reference. Tests assert these flags exist on the parser.
See:
- bench/neural_map.py the harness under guard
- tests/test_bench_neural_map.py sibling D-SPEED tests (passed=True at N=100)
- internal architecture spec
Task 2 for the behavior contract
"""
from __future__ import annotations
from pathlib import Path
import pytest
@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
"""Prevent macOS keyring prompts by swapping the keyring backend for an
in-memory dict (same pattern as tests/test_hippea_cascade.py and
tests/test_memory_recall_structural.py)."""
import keyring as _keyring
fake_store: dict[tuple[str, str], str] = {}
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake_store.get((s, u)))
monkeypatch.setattr(
_keyring, "set_password",
lambda s, u, p: fake_store.__setitem__((s, u), p),
)
monkeypatch.setattr(
_keyring, "delete_password", lambda s, u: fake_store.pop((s, u), None),
)
yield fake_store
def test_neural_map_small_n_p95_under_regression_ceiling(tmp_path: Path):
"""OPS-10 regression guard at N=100.
The strict D-SPEED p95 < 100 ms gate is asserted by
tests/test_bench_neural_map.py::test_neural_map_bench_reports_passed_flag
an existing test that famously trips under concurrent system load
(Plan 05-02 SUMMARY notes the same flake). This guard is a
REGRESSION fence: it asserts the bench still produces a numeric p95
in the same order of magnitude as the D-SPEED ceiling, so a
structural regression (e.g. someone breaks the spread pruning and
p95 jumps to 1s+) is caught in CI even when wall-clock noise puts
the strict 100 ms test on a flaky boundary.
The 200 ms ceiling is 2x D-SPEED at N=100; if a real regression
drops latency by 2x or more, this gate catches it and the strict
100 ms gate (run in isolation) handles the absolute measurement.
"""
from bench.neural_map import run_neural_map_bench
out = run_neural_map_bench(n=100, iterations=10, store_path=tmp_path / "store")
assert out["latency_ms_p95"] < 200.0, (
f"OPS-10 regression: p95 {out['latency_ms_p95']:.2f}ms > 200ms at N=100 "
f"(2x D-SPEED ceiling — likely a real regression, not concurrency noise)"
)
# Sanity: the harness always returns a positive p95.
assert out["latency_ms_p95"] > 0.0
def test_neural_map_main_with_matrix_returns_int(tmp_path: Path):
"""CLI entry-point honours an explicit ns list (the N matrix)."""
from bench import neural_map
code = neural_map.main(ns=[50], iterations=3, store_path=tmp_path)
assert code in (0, 1)
def test_neural_map_argparse_has_reference_flags():
"""OPS-10 comparative gate: argparse exposes the reference-p95 flags so
the bench can compare IAI to mempalace/claude-mem reference numbers
measured separately on this host.
Grep-verifiable contract: any ratification of these names elsewhere in
the report harness has to update the test.
"""
from bench import neural_map
parser = neural_map._parse_args.__defaults__ # noqa: SLF001
# Inspect the actual parser by parsing a dry args list.
ns = neural_map._parse_args([
"--n", "100",
"--ref-mempalace-p95-ms", "42.5",
"--ref-claude-mem-p95-ms", "61.0",
])
assert getattr(ns, "ref_mempalace_p95_ms", None) == 42.5
assert getattr(ns, "ref_claude_mem_p95_ms", None) == 61.0
def test_neural_map_comparative_gate_flips_passed_false_when_above_ref(tmp_path: Path):
"""If IAI p95 > mempalace ref, the per-N JSON's `passed` flips False
AND `reason` carries the reference name.
"""
from bench import neural_map
# An impossibly low ref that any realistic bench will exceed.
code = neural_map.main(
ns=[50],
iterations=3,
store_path=tmp_path,
ref_mempalace_p95_ms=0.0001,
)
# With a 0.0001 ms reference, the bench cannot pass.
assert code == 1

View file

@ -0,0 +1,92 @@
"""Tests for bench/neural_map.py (Plan 02-04 Task 4, D-SPEED).
D-SPEED contract: pipeline_recall <100ms at 10k records. The bench harness
measures per-N latency distribution (p50, p95) and returns a structured
dict. Main returns 0 iff all Ns pass thresholds.
"""
from __future__ import annotations
import pytest
def test_neural_map_bench_runs_small_n(tmp_path):
from bench.neural_map import run_neural_map_bench
out = run_neural_map_bench(n=50, iterations=3, store_path=tmp_path)
assert out["n"] == 50
assert "latency_ms_p50" in out
assert "latency_ms_p95" in out
assert "passed" in out
assert isinstance(out["latency_ms_p50"], float)
assert isinstance(out["latency_ms_p95"], float)
def test_neural_map_bench_returns_stage_timings(tmp_path):
"""Per-stage timings aid D-SPEED triage."""
from bench.neural_map import run_neural_map_bench
out = run_neural_map_bench(n=50, iterations=2, store_path=tmp_path)
assert "stage_timings_ms" in out
# Must cover the five pipeline stages named in pipeline.py.
stages = out["stage_timings_ms"]
for expected in ("embed", "gate", "seeds", "spread", "rank"):
assert expected in stages
def test_neural_map_bench_reports_passed_flag(tmp_path):
"""D-SPEED gate: bench at N=100 MUST report passed=True.
closes the D-SPEED gap from 02-VERIFICATION. The assertion
upgrade from `isinstance(out["passed"], bool)` to `out["passed"] is True`
is the bar-raising moment: honest benchmark discipline is no longer just
"report truth" -- now "meet the target at N=100". Pipeline was rewired
to use `store.append_provenance_batch` (one call) + `s4.on_read_check_batch`
with records_cache passthrough (zero round-trips) per L-02 fix.
"""
from bench.neural_map import run_neural_map_bench
out = run_neural_map_bench(n=100, iterations=10, store_path=tmp_path)
# Contract: threshold surfaced.
assert out.get("threshold_ms") == 100.0
# D-SPEED quality gate: p95 must be UNDER 100ms at N=100.
assert out["passed"] is True, (
f"D-SPEED violated: p95={out['latency_ms_p95']:.2f}ms > 100ms at N=100. "
f"Full output: {out}"
)
assert out["latency_ms_p95"] < 100.0
def test_neural_map_main_exits_zero_at_n100(tmp_path, capsys):
"""main(ns=[100]) returns 0 (all-pass exit) post fix."""
from bench import neural_map
code = neural_map.main(ns=[100], iterations=10, store_path=tmp_path)
assert code == 0, (
f"bench.neural_map.main(ns=[100]) should exit 0 post-02-07; got {code}"
)
def test_neural_map_bench_main_runs_and_returns_int(tmp_path, capsys):
"""Main is runnable end-to-end and returns 0 or 1 (bench CI contract)."""
from bench import neural_map
code = neural_map.main(ns=[50], iterations=2, store_path=tmp_path)
assert code in (0, 1)
def test_neural_map_bench_deterministic_within_tolerance(tmp_path):
"""Two runs at the same N produce latency within the same order.
Uses separate subdirs so each run starts with a fresh store.
"""
from bench.neural_map import run_neural_map_bench
a = run_neural_map_bench(
n=50, iterations=5, store_path=tmp_path / "a", seed=42,
)
b = run_neural_map_bench(
n=50, iterations=5, store_path=tmp_path / "b", seed=42,
)
# Latencies are wall-clock; both should fit a generous ceiling.
assert a["latency_ms_p50"] < 2000.0
assert b["latency_ms_p50"] < 2000.0

View file

@ -0,0 +1,70 @@
"""OPS-11 regression guard: small-N RAM bench stays under threshold.
Plan 05-05 (D5-08) CI-runnable guard for bench/memory_footprint.py. The
large-N target (RSS <= 300 MB at N=10k warm on 16+ GB machine) runs
ad-hoc from the published bench report; this test exercises the small-N path
(N=100-500 with a 64d embedding) so CI catches harness drift without
spinning up a 10k-record LanceDB table.
See:
- bench/memory_footprint.py the harness under guard
- internal architecture spec
Task 1 for the behavior contract
"""
from __future__ import annotations
from pathlib import Path
import pytest
def test_memory_footprint_small_n_under_threshold(tmp_path: Path):
"""Smoke: small-N run populates rss_mb_peak under a generous ceiling.
The 300 MB large-N target is NOT asserted here a fresh LanceDB +
NetworkX graph at N=500 already allocates more than that on macOS
when bge-m3 is loaded via embed import. This guard only asserts that
the harness returns a plausible positive reading and respects the
JSON schema the BENCH_REPORT consumes.
"""
from bench.memory_footprint import run_memory_footprint
out = run_memory_footprint(n=100, store_path=tmp_path / "store", dim=64)
# Shape: every key promised in the module docstring is present.
assert "n" in out
assert "rss_mb_peak" in out
assert "threshold_mb" in out
assert "passed" in out
assert "platform" in out
# Values: rss is a real positive reading; threshold is the design target.
assert out["n"] == 100
assert isinstance(out["rss_mb_peak"], float)
assert out["rss_mb_peak"] > 0.0
assert out["threshold_mb"] == 300.0
# Generous outer bound — catches a clearly broken reading (e.g. reporting
# nanoseconds as MB). The tight 300 MB fence belongs to the large-N run.
assert out["rss_mb_peak"] < 4000.0, (
f"small-N RSS {out['rss_mb_peak']} MB suspicious"
)
def test_memory_footprint_main_exits_int(tmp_path: Path):
"""CLI entry-point returns 0 or 1 (bench CI contract)."""
from bench import memory_footprint
code = memory_footprint.main(argv=["--n", "50", "--dim", "32"])
assert code in (0, 1)
def test_memory_footprint_platform_units_documented(tmp_path: Path):
"""Harness records the platform it measured on — macOS bytes vs Linux KB
is an correctness trap; the JSON output must carry the marker so
downstream reports can reproduce the unit conversion.
"""
from bench.memory_footprint import run_memory_footprint
out = run_memory_footprint(n=50, store_path=tmp_path / "store2", dim=32)
assert out["platform"] in ("darwin", "linux", "win32")

View file

@ -0,0 +1,117 @@
"""OPS-12 regression guard: 3-turn sanity for total_session_cost.
Plan 05-05 (D5-08) CI-runnable guard for bench/total_session_cost.py.
The full 10-turn script runs ad-hoc on this dev Mac and populates
the published bench report rows; this test exercises the shape
contracts and the minimal-vs-standard invariant at CI speed.
Acceptance contracts:
- minimal total <= standard total (TOK-11 sanity; if not, Plan 05-03
regressed somewhere)
- per_turn list has exactly 10 entries (fixed D5-08 script)
- counter mode honest-disclosed in JSON (anthropic-count-tokens |
tiktoken-cl100k-proxy | heuristic-char4)
- reference-gate failure flips passed=False
See:
- bench/total_session_cost.py the harness under guard
- bench/tokens.py 3-tier counter fallback pattern reused here
- internal architecture spec
Task 3 for the behavior contract
"""
from __future__ import annotations
import pytest
def test_total_session_cost_reports_per_turn():
"""M-07 script is the fixed D5-08 10-turn sequence."""
from bench.total_session_cost import run_total_session_cost
out = run_total_session_cost(wake_depth="minimal")
assert "per_turn" in out
assert isinstance(out["per_turn"], list)
assert len(out["per_turn"]) == 10, (
f"D5-08 script has 10 turns; got {len(out['per_turn'])}"
)
assert out["total_tokens"] == sum(out["per_turn"])
assert out["adapter"] == "iai-mcp"
assert out["wake_depth"] == "minimal"
def test_total_session_cost_minimal_le_standard():
"""TOK-11 invariant: wake_depth=minimal must not cost more than
wake_depth=standard over the same 10-turn script. If this fails,
Plan 05-03's lazy session-start work regressed.
"""
from bench.total_session_cost import run_total_session_cost
minimal = run_total_session_cost(wake_depth="minimal")
standard = run_total_session_cost(wake_depth="standard")
assert minimal["total_tokens"] <= standard["total_tokens"], (
f"minimal {minimal['total_tokens']} > standard {standard['total_tokens']}"
" — TOK-11 regression"
)
def test_total_session_cost_counter_mode_disclosed():
"""BENCH_REPORT honesty: every JSON output must name the counter mode
used so downstream reports can flag non-official numbers."""
from bench.total_session_cost import run_total_session_cost
out = run_total_session_cost(wake_depth="minimal")
assert out["mode"] in (
"anthropic-count-tokens",
"tiktoken-cl100k-proxy",
"heuristic-char4",
"injected",
)
def test_total_session_cost_fails_when_above_ref():
"""When the reference-adapter number is explicitly lower than IAI's,
the comparative gate flips passed=False. Tests supply an
impossibly-low ref so the assertion is host-independent.
"""
from bench.total_session_cost import run_total_session_cost
out = run_total_session_cost(wake_depth="standard", mempalace_ref=1)
assert out["passed"] is False
assert out["refs"]["mempalace"] == 1
def test_total_session_cost_passes_without_refs():
"""When no reference numbers supplied, passed=True is the degenerate
answer (the bench still records IAI totals for BENCH_REPORT to pick
up). Honest-disclosure about ref absence lives in the report prose."""
from bench.total_session_cost import run_total_session_cost
out = run_total_session_cost(wake_depth="minimal")
assert out["passed"] is True
assert out["refs"] == {}
def test_total_session_cost_main_exits_int():
"""CLI entry-point returns 0 or 1 (bench CI contract)."""
from bench import total_session_cost
code = total_session_cost.main(argv=["--wake-depth", "minimal"])
assert code in (0, 1)
def test_total_session_cost_injected_counter():
"""Test-only counter injection: caller can pass a deterministic
token-count function so the test is not hostage to the proxy
tokeniser's drift."""
from bench.total_session_cost import run_total_session_cost
def _fixed(text: str) -> int:
return max(1, len(text)) # 1-char-per-token for deterministic checks
out = run_total_session_cost(
wake_depth="minimal", count_tokens_fn=_fixed,
)
assert out["mode"] == "injected"
assert out["total_tokens"] >= 10 # at least 1/turn * 10 turns

View file

@ -0,0 +1,167 @@
"""Plan 05-06 Task 3 — mempalace / claude-mem subprocess adapters in
``bench/total_session_cost.py``.
These adapters let the reference column carry a live measurement
from the mempalace CLI when it is installed locally, falling back to
honest "adapter unavailable" disclosure when absent. They never block
the bench: subprocess timeouts and non-zero exits return None and emit
a ``bench_adapter_unavailable`` stderr event.
Covered contracts:
Test 1 _run_mempalace_adapter signature exists and accepts the 10-turn script
Test 2 mempalace CLI absent -> None + stderr event, no exception
Test 3 mempalace CLI present -> sums per-turn token counts via the 3-tier counter
Test 4 --measure-mempalace flag wires the live adapter into refs["mempalace_measured"]
Test 5 _run_claude_mem_adapter mirrors mempalace shape for forward compat
Test 6 manual --ref-mempalace alongside --measure-mempalace keeps both values,
but LIVE measurement is the comparator for the `passed` flag
"""
from __future__ import annotations
import json
import subprocess
from unittest import mock
import pytest
from bench.total_session_cost import (
_SCRIPT,
_run_claude_mem_adapter,
_run_mempalace_adapter,
main,
run_total_session_cost,
)
# --------------------------------------------------------------------------- helpers
def _fixed_counter(text: str) -> int:
"""Deterministic counter: 1 token per word. Keeps assertions stable
across tiktoken / anthropic / char4 drift."""
return max(1, len(text.split()))
# --------------------------------------------------------------------------- Test 1
def test_mempalace_adapter_signature():
# Signature must accept the canonical 10-turn script and a counter.
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
# Will be None on a machine without mempalace *responding cleanly*, but
# the function must exist and not raise — callers depend on that contract.
assert result is None or isinstance(result, int)
# --------------------------------------------------------------------------- Test 2
def test_mempalace_adapter_absent_cli_returns_none(capsys):
with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
assert result is None
err = capsys.readouterr().err
assert "bench_adapter_unavailable" in err
assert "mempalace" in err
# --------------------------------------------------------------------------- Test 3
def test_mempalace_adapter_live_run_sums_stdout_tokens():
"""With ``shutil.which`` finding the CLI and ``subprocess.run`` returning
deterministic stdout, the adapter sums the token counts across all 10
turns using the injected counter."""
def fake_which(name):
return "/fake/bin/mempalace" if name == "mempalace" else None
def fake_run(*args, **kwargs):
# stdout carries 3 words per turn -> 3 tokens per turn under _fixed_counter.
return subprocess.CompletedProcess(
args=args[0] if args else [],
returncode=0,
stdout="one two three",
stderr="",
)
with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
assert result == 3 * len(_SCRIPT)
# --------------------------------------------------------------------------- Test 4
def test_measure_mempalace_flag_populates_refs(monkeypatch, capsys):
"""End-to-end: running `main` with --measure-mempalace populates
refs["mempalace_measured"] when the adapter returns a number."""
def fake_which(name):
return "/fake/bin/mempalace" if name == "mempalace" else None
def fake_run(*args, **kwargs):
return subprocess.CompletedProcess(
args=args[0] if args else [],
returncode=0,
stdout="hello world",
stderr="",
)
with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
rc = main(["--wake-depth", "minimal", "--measure-mempalace"])
captured = capsys.readouterr()
result = json.loads(captured.out.strip())
assert "mempalace_measured" in result["refs"]
assert isinstance(result["refs"]["mempalace_measured"], int)
assert result["refs"]["mempalace_measured"] > 0
# --------------------------------------------------------------------------- Test 5
def test_claude_mem_adapter_mirrors_mempalace_shape(capsys):
"""The claude-mem adapter has the same signature and absent-CLI fallback
as the mempalace adapter, even though claude-mem is not installed
locally. This keeps the forward-compat path live."""
with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
result = _run_claude_mem_adapter(_SCRIPT, _fixed_counter)
assert result is None
err = capsys.readouterr().err
assert "bench_adapter_unavailable" in err
assert "claude-mem" in err
# --------------------------------------------------------------------------- Test 6
def test_live_measurement_wins_over_manual_ref():
"""When both ``--measure-mempalace`` and ``--ref-mempalace <int>`` are
supplied, the live measurement lands in ``refs["mempalace_measured"]``
and is the comparator for ``passed``; the manual int is recorded in
``refs["mempalace_manual"]`` for audit trail."""
with mock.patch("bench.total_session_cost.shutil.which",
side_effect=lambda n: "/fake/bin/mempalace" if n == "mempalace" else None), \
mock.patch("bench.total_session_cost.subprocess.run",
return_value=subprocess.CompletedProcess(
args=[], returncode=0,
stdout="token " * 5000, # 5000 tokens across 10 turns
stderr="",
)):
result = run_total_session_cost(
wake_depth="minimal",
mempalace_ref=10, # manual ref — deliberately tiny to force fail IF used
measure_mempalace=True,
count_tokens_fn=_fixed_counter,
)
assert "mempalace_measured" in result["refs"]
assert "mempalace_manual" in result["refs"]
assert result["refs"]["mempalace_manual"] == 10
# LIVE measurement is the gate; with 50000+ tokens live, IAI total
# (<~3000) is well below, so passed is True.
assert result["passed"] is True

View file

@ -0,0 +1,105 @@
"""Tests for bench/trajectory.py (Plan 02-04 Task 4, D-33).
D-33 (benchmark corpus): 30-session synthetic corpus (autism/NT interaction
pattern models), reproducible from seed=42. Diverse-language fixture:
corpus includes English + Russian + Japanese + Arabic + German records for
corpus-shape variance testing NOT a multilingual product mandate. Brain
is English-only since (default bge-small-en-v1.5).
"""
from __future__ import annotations
import pytest
def test_synthetic_corpus_generates_30_sessions():
from bench.trajectory import generate_synthetic_corpus
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
assert len(corpus) == 30
for s in corpus:
assert "session_id" in s
assert "records" in s
assert "curiosity_events" in s
assert "trajectory_metrics" in s
def test_synthetic_corpus_deterministic_from_seed():
from bench.trajectory import generate_synthetic_corpus
a = generate_synthetic_corpus(n_sessions=5, seed=42)
b = generate_synthetic_corpus(n_sessions=5, seed=42)
# Session ids are deterministic under fixed seed.
assert [s["session_id"] for s in a] == [s["session_id"] for s in b]
def test_synthetic_corpus_multilingual():
"""Diverse-language fixture: corpus-shape variance check.
NOT a product mandate IAI-MCP brain is English-only since Plan 05-08.
The presence of non-English samples here exercises corpus-shape
variance in trajectory aggregation, nothing more.
"""
from bench.trajectory import generate_synthetic_corpus
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
languages: set[str] = set()
for s in corpus:
for r in s["records"]:
languages.add(r.get("language", "en"))
# At minimum: en + one non-English (ru/ja/ar/de) must appear.
assert "en" in languages
non_english = languages - {"en"}
assert len(non_english) >= 1, (
f"diverse-language fixture has only languages={languages}"
)
# Aspirational: at least 4 distinct languages over 30 sessions
# (corpus-shape diversity, not a multilingual product claim).
assert len(languages) >= 4
def test_synthetic_corpus_covers_six_metrics():
"""Each session emits trajectory data for all six metric slots."""
from bench.trajectory import generate_synthetic_corpus
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
metric_keys: set[str] = set()
for s in corpus:
for k in s["trajectory_metrics"]:
metric_keys.add(k)
assert metric_keys >= {"m1", "m2", "m3", "m4", "m5", "m6"}
def test_trajectory_bench_runs_over_corpus(tmp_path):
from bench.trajectory import (
generate_synthetic_corpus,
run_trajectory_bench,
)
corpus = generate_synthetic_corpus(n_sessions=6, seed=42)
out = run_trajectory_bench(corpus, store_path=tmp_path)
assert "m1_trend" in out
assert "m2_trend" in out
assert "m3_trend" in out
assert "m4_trend" in out
assert "m5_trend" in out
assert "m6_trend" in out
assert "passed" in out
def test_trajectory_bench_main_runs(tmp_path, capsys):
from bench.trajectory import main
# Main defaults to synthetic; tiny n_sessions for CI speed.
code = main(n_sessions=5, store_path=tmp_path)
assert code in (0, 1)
def test_trajectory_bench_accepts_real_logs_flag(tmp_path):
"""CLI flag accepts --real-logs=path; when absent, falls back to synthetic."""
from bench.trajectory import main
# Missing path -> falls back to synthetic.
code = main(
n_sessions=3, real_logs_path=None, store_path=tmp_path,
)
assert code in (0, 1)

View file

@ -0,0 +1,161 @@
"""Tests for diagnostic flags on bench/verbatim.py.
Covers the 5 behaviors from the plan:
1. `python -m bench.verbatim --help` lists --skip-l0-seed, --storage-direct,
--n, --gap, --noise-per-session, --k.
2. `run_verbatim_bench(skip_l0_seed=True, ...)` does NOT seed L0 identity.
3. `run_verbatim_bench(storage_direct=True, ...)` writes zero provenance
entries on pinned records across the query loop.
4. Default invocation (no new flags set) is byte-identical to pre-plan
behavior on the public dict keys.
5. `--k` override propagates to `recall(k_hits=K)` (or `query_similar(k=K)`
in storage-direct mode).
All tests use tmp_path for hermeticity; N kept tiny for CI speed.
"""
from __future__ import annotations
import io
import json
import subprocess
import sys
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parent.parent
def test_cli_help_lists_all_new_flags():
"""Behavior 1: --help must list all 6 diagnostic/config flags."""
out = subprocess.run(
[sys.executable, "-m", "bench.verbatim", "--help"],
capture_output=True,
text=True,
cwd=str(REPO_ROOT),
timeout=30,
)
assert out.returncode == 0, f"--help exited {out.returncode}: {out.stderr}"
text = out.stdout
for flag in (
"--skip-l0-seed",
"--storage-direct",
"--n",
"--gap",
"--noise-per-session",
"--k",
):
assert flag in text, f"--help missing flag {flag}\n\n{text}"
def test_skip_l0_seed_does_not_seed_l0(tmp_path):
"""Behavior 2: with skip_l0_seed=True no L0 record exists in the store."""
from bench.verbatim import run_verbatim_bench
from iai_mcp.core import L0_ID
from iai_mcp.store import MemoryStore
s = MemoryStore(path=tmp_path)
result = run_verbatim_bench(
store=s,
n_records=5,
session_gap=2,
noise_per_session=3,
skip_l0_seed=True,
)
assert "accuracy" in result
assert result["skip_l0_seed"] is True
assert s.get(L0_ID) is None, (
"skip_l0_seed=True must not seed L0 identity record"
)
def test_storage_direct_writes_zero_provenance_to_pinned(tmp_path):
"""Behavior 3: storage_direct bypasses recall() so no provenance writes."""
from bench.verbatim import run_verbatim_bench
from iai_mcp.store import MemoryStore
s = MemoryStore(path=tmp_path)
result = run_verbatim_bench(
store=s,
n_records=5,
session_gap=2,
noise_per_session=3,
storage_direct=True,
)
assert "accuracy" in result
assert result["storage_direct"] is True
# Every pinned record must have an empty provenance list after the run
# (storage_direct bypass -> no append_provenance calls).
pinned_offenders: list[tuple[str, int]] = []
for rec in s.all_records():
if rec.pinned and "benchmark" in (rec.tags or []):
if len(rec.provenance or []) != 0:
pinned_offenders.append(
(rec.literal_surface[:40], len(rec.provenance or []))
)
assert not pinned_offenders, (
f"storage_direct must leave pinned provenance empty, got: {pinned_offenders}"
)
def test_default_invocation_keys_preserved(tmp_path):
"""Behavior 4: default invocation returns legacy keys unchanged."""
from bench.verbatim import run_verbatim_bench
from iai_mcp.store import MemoryStore
s = MemoryStore(path=tmp_path)
result = run_verbatim_bench(
store=s,
n_records=5,
session_gap=2,
noise_per_session=3,
)
# Legacy keys (pre-Plan-05-01) still present.
for key in (
"accuracy",
"n_records",
"session_gap",
"noise_per_session",
"hits_exact",
"passed",
"floor",
"noise_mode",
):
assert key in result, f"legacy key {key} missing"
# New diagnostic traceability keys added.
for key in ("skip_l0_seed", "storage_direct", "k"):
assert key in result, f"diagnostic key {key} missing"
assert result["skip_l0_seed"] is False
assert result["storage_direct"] is False
def test_k_override_propagates_in_storage_direct(tmp_path):
"""Behavior 5: --k override in storage_direct mode propagates to query_similar.
With n_records=5 and k=3, storage-direct can only return 3 rows per query;
the pinned-text hit count is therefore capped at a function of k rather
than the default max(n_records+10, 20). We assert that a deliberately
tiny k drives accuracy strictly below 1.0 on a harness where the default
k would return all pinned records.
"""
from bench.verbatim import run_verbatim_bench
from iai_mcp.store import MemoryStore
s = MemoryStore(path=tmp_path)
result = run_verbatim_bench(
store=s,
n_records=5,
session_gap=2,
noise_per_session=3,
storage_direct=True,
k=3,
)
assert result["k"] == 3, f"k should be echoed back, got {result.get('k')!r}"
# With k < n_records, at least some pinned cues will not find their exact
# literal in the top-k -> accuracy strictly below 1.0. This would not
# happen with the default k (max(n+10, 20) = 20 for n=5).
assert result["accuracy"] < 1.0, (
f"k=3 with n=5 must cap accuracy below 1.0, got {result['accuracy']}"
)

View file

@ -0,0 +1,178 @@
"""Plan 07.1-04 R5 acceptance — compile-output regression trap.
This is the regression-trap that catches a future revert of Phase 7.1's
no-spawn architecture. If `child_process.spawn` reappears in
`mcp-wrapper/dist/bridge.js`, this test FAILS alerting the developer
(or a future Claude) that someone has reintroduced the TOCTOU spawn
race that explicitly removed.
# Why a compile-output trap, not just a source-level grep?
A source-level grep would also catch the regression, but it would NOT
catch:
- A spawn call introduced via a transitive import (e.g., a helper
module that imports `node:child_process` and re-exports a spawn
wrapper).
- A spawn call introduced via dynamic `require("child_process")` at
runtime (which tsc compiles into the JS but a source grep for
`import { spawn }` would miss).
- A spawn introduced into a NEW module that bridge.ts imports.
The compiled `dist/bridge.js` is what actually ships and runs. Greping
THAT is the load-bearing assertion.
# Reference
- Plan 07.1-04 Task 3
- 07.1-CONTEXT.md D7.1-07 (bridge.ts spawn-removal scope)
- The mirror source-level assertion lives in Task 1
(``grep -c 'child_process[.]spawn|^import.*spawn|spawnDaemon'
mcp-wrapper/src/bridge.ts`` returns 0)
"""
from __future__ import annotations
import platform
import subprocess
from pathlib import Path
import pytest
REPO = Path(__file__).resolve().parent.parent
WRAPPER = REPO / "mcp-wrapper"
pytestmark = pytest.mark.skipif(
platform.system() == "Windows",
reason="bash + npm tooling assumed POSIX (mcp-wrapper build path)",
)
# ---------------------------------------------------------------------------
# Fixture: build the wrapper once per module so all 3 tests reuse the same
# dist/bridge.js artifact. Mirrors the pattern in
# tests/test_socket_subagent_reuse.py:built_wrapper.
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def built_bridge_js() -> Path:
"""Build the TS wrapper once; return the path to compiled bridge.js."""
if not (WRAPPER / "node_modules").exists():
subprocess.run(["npm", "install"], cwd=WRAPPER, check=True)
subprocess.run(["npm", "run", "build"], cwd=WRAPPER, check=True)
dist = WRAPPER / "dist" / "bridge.js"
assert dist.exists(), (
"npm run build should have produced dist/bridge.js — actual: "
f"{list((WRAPPER / 'dist').glob('*.js')) if (WRAPPER / 'dist').exists() else 'no dist dir'}"
)
return dist
# ---------------------------------------------------------------------------
# Tests.
# ---------------------------------------------------------------------------
def test_dist_bridge_js_has_no_child_process_spawn(built_bridge_js):
"""REGRESSION TRAP: assert the compiled bridge.js contains zero
references to child_process.spawn in any of its post-tsc forms.
Catches:
- `import { spawn } from "node:child_process"` (ESM, what
TypeScript writes; tsc with module=ESNext keeps the import)
- `from "node:child_process"` (any other named import from the
same module)
- `require("node:child_process")` (CJS form if module target
ever changes to CommonJS)
- `require("child_process")` (legacy CJS form)
- `child_process.spawn` (after a `.spawn` access on a module
namespace import)
All five forms are checked because tsc's exact output bytes depend
on tsconfig (module=ESNext vs CommonJS), and a future config
change must NOT silently allow spawn back in.
"""
text = built_bridge_js.read_text(encoding="utf-8")
forbidden_substrings = [
'child_process.spawn',
'from "node:child_process"',
"from 'node:child_process'",
'require("node:child_process")',
"require('node:child_process')",
'require("child_process")',
"require('child_process')",
]
found = [s for s in forbidden_substrings if s in text]
assert not found, (
"REGRESSION: dist/bridge.js contains spawn-related substring(s) "
f"that explicitly removed: {found}. "
"Someone has re-introduced the TOCTOU spawn race that Phase 7.1's "
"pure-connector refactor eliminated. Re-read 07.1-CONTEXT.md "
"D7.1-07 (bridge.ts spawn-removal scope) before pushing."
)
def test_dist_bridge_js_has_DaemonUnreachableError(built_bridge_js):
"""Assert the compiled bridge.js still contains the
DaemonUnreachableError class proves the no-spawn error-throwing
path is preserved post-build.
If start() somehow stops throwing (e.g., a future refactor
silently swallows the connect failure and degrades to a no-op),
the symptom would be: wrappers boot fine even with no daemon, but
every tools/call returns daemon_unreachable. That's a regression
we want to catch at compile-output level.
The presence of `DaemonUnreachableError` as a string in dist/bridge.js
verifies the class definition + at least one throw-site survived
compilation.
"""
text = built_bridge_js.read_text(encoding="utf-8")
# Plan 07.1-04 done criteria for Task 1: DaemonUnreachableError
# appears ≥2 times in the source (class def + at least one throw).
# Same expectation for the compiled output — tsc preserves named
# class identifiers exactly.
count = text.count("DaemonUnreachableError")
assert count >= 2, (
f"REGRESSION: dist/bridge.js contains DaemonUnreachableError "
f"only {count} times (expected >=2: class definition + at least "
f"one throw-site). The fail-loud error path may have been "
f"removed or renamed."
)
def test_dist_bridge_js_has_5000_socket_timeout(built_bridge_js):
"""Assert the SOCKET_CONNECT_TIMEOUT_MS constant is set to 5000ms
(raised from 250ms in pre-7.1 to cover launchd socket-activation
cold-start window).
Anchored to the named constant (`SOCKET_CONNECT_TIMEOUT_MS = 5000`)
rather than a bare `5000` substring tsc default does NOT minify
so the constant declaration survives compilation verbatim, and a
bare `5000` could match unrelated literals (timestamps, byte
counts) the compiler emits.
If this test fails:
- The constant was renamed: update the assertion AND verify the
new name is the connect timeout (not idle-shutdown / heartbeat).
- The value was lowered (e.g., back to 250): re-read CONTEXT.md
D7.1-07 5s is required because launchd cold-spawn of the
daemon (bge-small embedder load + LanceDB open) is empirically
3-10s on macOS. A lower timeout will spuriously throw
DaemonUnreachableError on legitimate cold-starts.
"""
text = built_bridge_js.read_text(encoding="utf-8")
# Anchored to the named constant — survives tsc default (no
# minification, target ES2022).
assert "SOCKET_CONNECT_TIMEOUT_MS = 5000" in text, (
"REGRESSION: dist/bridge.js does not contain "
"'SOCKET_CONNECT_TIMEOUT_MS = 5000'. Either the constant was "
"renamed, the value was changed, or tsc minification was "
"enabled (which would also break the source-level grep done "
"criteria in Task 1). requires 5000ms to cover "
"launchd socket-activation cold-start window — see "
"07.1-CONTEXT.md D7.1-07."
)

View file

@ -0,0 +1,541 @@
"""Plan 07.1-04 R2/A6 acceptance — bridge.ts is a pure connector (no spawn).
# History
This file was renamed-in-place from the pre-Phase-7.1 test of the same
name. The pre-Phase-7.1 file asserted spawn-fallback
behavior:
- test_cold_start_spawns_daemon_under_5s asserted that the wrapper
SPAWNS `python -m iai_mcp.daemon` when the socket is missing
(`daemon_delta >= 1`).
- test_warm_start_reuses_daemon_under_250ms relied on wrapper #1 to
bootstrap the daemon via spawn so wrapper #2 could attach.
Phase 7.1 (this plan, 07.1-04) DELETES bridge.ts's spawn capability:
the wrapper now ONLY connects to ~/.iai-mcp/.daemon.sock with a 5s
timeout; on miss it throws `DaemonUnreachableError` (code -32002) and
the wrapper process exits non-zero. Daemon spawning is now launchd's
job (Wave 1 plist + Wave 2 install.sh + Wave 2 LISTEN_FDS branch).
Both pre-7.1 tests therefore had to be restructured:
- Old `test_cold_start_spawns_daemon_under_5s` is REPLACED by
`test_start_throws_DaemonUnreachableError_when_socket_missing`
which asserts the inverse: NO daemon spawned, wrapper exits
non-zero with the new error in stderr.
- Old `test_warm_start_reuses_daemon_under_250ms` is REPLACED by
`test_start_succeeds_with_warm_daemon_no_extra_spawn` which
pre-starts a daemon manually (subprocess.Popen of
`python -m iai_mcp.daemon`), waits for socket bind, then spawns
the wrapper and asserts initialize handshake succeeds AND
daemon process count delta == 0 (the wrapper did NOT spawn a
second daemon).
# Test isolation strategy
Both tests use IAI_DAEMON_SOCKET_PATH env override (HIGH-4 lock at
bridge.ts module top verified preserved through Plan 07.1-04 Task 1
edit) so they target a tmp socket and never touch the user's real
~/.iai-mcp/.daemon.sock the production daemon (if any) is not
disturbed.
Delta-snapshot psutil pattern (lesson from / 07-04
SUMMARYs): we count `iai_mcp.daemon` processes BEFORE and AFTER the
wrapper boot and assert the DELTA, not the absolute. On a developer
machine with a live production daemon, `before["daemon"] >= 1`; an
absolute `assert after["daemon"] == 1` would falsely fail.
# Pattern reuse
Helpers (`_count_iai_mcp_processes`, `_kill_test_daemons`,
`_spawn_wrapper`, `_initialize`, `_call_memory_recall`,
`_wait_for_daemon_socket`) and the `built_wrapper` fixture are kept
verbatim from the pre-7.1 file they remain valid scaffolding.
The `_count_iai_mcp_processes` shape mirrors
`tests/test_socket_subagent_reuse.py` and `tests/test_socket_fail_loud.py`.
"""
from __future__ import annotations
import json
import os
import signal
import subprocess
import sys
import time
from pathlib import Path
import psutil
import pytest
REPO = Path(__file__).resolve().parent.parent
WRAPPER = REPO / "mcp-wrapper"
# ---------------------------------------------------------------------------
# Fixture: built wrapper (npm install + npm run build once per module).
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def built_wrapper() -> Path:
"""Build the TS wrapper once per test module; reuse across tests."""
if not (WRAPPER / "node_modules").exists():
subprocess.run(["npm", "install"], cwd=WRAPPER, check=True)
subprocess.run(["npm", "run", "build"], cwd=WRAPPER, check=True)
dist = WRAPPER / "dist" / "index.js"
assert dist.exists(), "npm run build should have produced dist/index.js"
return dist
# ---------------------------------------------------------------------------
# Helpers: psutil snapshot, wrapper spawn, MCP handshake + recall round-trip.
# ---------------------------------------------------------------------------
def _count_iai_mcp_processes() -> dict[str, int]:
"""Snapshot iai_mcp.core / iai_mcp.daemon process counts.
Mirrors `tests/test_socket_fail_loud.py:_count_iai_mcp_processes`
same shape, same delta-snapshot assertion strategy.
"""
counts = {"core": 0, "daemon": 0}
for p in psutil.process_iter(["cmdline"]):
try:
cl = p.info.get("cmdline") or []
if not cl:
continue
joined = " ".join(c or "" for c in cl)
if "iai_mcp.core" in joined:
counts["core"] += 1
if "iai_mcp.daemon" in joined:
counts["daemon"] += 1
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return counts
def _kill_test_daemons(sock_path: Path) -> None:
"""Cleanup helper — kill any iai_mcp.daemon processes whose env
references the test sock_path. Avoids touching the user's real
daemon if one is running."""
sock_str = str(sock_path)
for p in psutil.process_iter(["cmdline", "environ"]):
try:
cl = " ".join(p.info.get("cmdline") or [])
if "iai_mcp.daemon" not in cl:
continue
env = p.info.get("environ") or {}
if env.get("IAI_DAEMON_SOCKET_PATH") == sock_str:
p.send_signal(signal.SIGTERM)
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
def _spawn_wrapper(
built_wrapper: Path,
env_overrides: dict[str, str] | None = None,
) -> subprocess.Popen:
"""Spawn the built TS wrapper with stdin/stdout pipes for JSON-RPC."""
env = os.environ.copy()
env["IAI_MCP_PYTHON"] = sys.executable
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
if env_overrides:
env.update(env_overrides)
return subprocess.Popen(
["node", str(built_wrapper)],
cwd=str(REPO),
env=env,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
def _spawn_daemon_in_background(
sock_path: Path, store_dir: Path
) -> subprocess.Popen:
"""Pre-start a daemon manually via `python -m iai_mcp.daemon`.
wrappers no longer spawn the daemon themselves that's
launchd's job in production and the test's job here. We use the
manual-run code path (no LISTEN_FDS env set), which the
daemon supports unchanged per D7.1-09 (backward compat).
"""
env = os.environ.copy()
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
env["IAI_MCP_STORE"] = str(store_dir)
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "120"
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
return subprocess.Popen(
[sys.executable, "-m", "iai_mcp.daemon"],
cwd=str(REPO),
env=env,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def _initialize(proc: subprocess.Popen, rpc_id: int = 1) -> dict:
"""MCP initialize handshake — required before tools/call works."""
assert proc.stdin is not None and proc.stdout is not None
init = {
"jsonrpc": "2.0",
"id": rpc_id,
"method": "initialize",
"params": {
"protocolVersion": "2025-03-26",
"capabilities": {},
"clientInfo": {"name": "iai-mcp-bridge-no-spawn-test", "version": "0.1.0"},
},
}
proc.stdin.write((json.dumps(init) + "\n").encode("utf-8"))
proc.stdin.flush()
line = proc.stdout.readline()
if not line:
raise RuntimeError("wrapper closed stdout before initialize reply")
resp = json.loads(line.decode("utf-8"))
note = {"jsonrpc": "2.0", "method": "notifications/initialized"}
proc.stdin.write((json.dumps(note) + "\n").encode("utf-8"))
proc.stdin.flush()
return resp
def _call_memory_recall(
proc: subprocess.Popen,
cue: str,
rpc_id: int = 2,
*,
timeout_sec: float = 10.0,
) -> tuple[float, dict]:
"""Send tools/call memory_recall + return (wall-clock-elapsed, response)."""
assert proc.stdin is not None and proc.stdout is not None
req = {
"jsonrpc": "2.0",
"id": rpc_id,
"method": "tools/call",
"params": {
"name": "memory_recall",
"arguments": {"cue": cue, "budget_tokens": 100},
},
}
t0 = time.monotonic()
proc.stdin.write((json.dumps(req) + "\n").encode("utf-8"))
proc.stdin.flush()
import select
deadline = time.monotonic() + timeout_sec
line = b""
while time.monotonic() < deadline:
readable, _, _ = select.select([proc.stdout], [], [], 0.5)
if readable:
line = proc.stdout.readline()
break
elapsed = time.monotonic() - t0
if not line:
raise RuntimeError(
f"no response within {timeout_sec}s "
f"(stderr: {proc.stderr.read1(2000) if proc.stderr else b'?'!r})"
)
return elapsed, json.loads(line.decode("utf-8"))
def _wait_for_daemon_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool:
"""Poll for sock_path existence at 0.1s cadence; True on bind."""
deadline = time.monotonic() + timeout_sec
while time.monotonic() < deadline:
if sock_path.exists():
return True
time.sleep(0.1)
return False
# ---------------------------------------------------------------------------
# Tests — contract: wrappers are pure connectors, no spawn.
# ---------------------------------------------------------------------------
def test_start_throws_DaemonUnreachableError_when_socket_missing(
built_wrapper, tmp_path
):
"""Phase 7.1 + mcp-tools-list-empty-cache (2026-05-02): with no daemon
on the test socket, the wrapper MUST stay alive and MUST serve
tools/list from the static registry within an MCP-client-friendly
timeout. tools/call MUST surface daemon_unreachable as an isError
response (fail-loud at the right layer).
History (this is the same test slot replaces the pre-2026-05-02
contract that asserted "wrapper exits non-zero on daemon miss"):
- Pre-fix the wrapper had a top-level `await bridge.start()` BEFORE
`server.connect(transport)`. On a missing/slow daemon socket the
Node process either exited non-zero (after 5s timeout) OR the
bug being fixed replied to MCP `initialize` after a long delay
with no tools/list ever cached, making `mcp__iai-mcp__*` invisible
for the entire client session. Old assertion 1 (non-zero exit) and
assertion 2 (DaemonUnreachableError on stderr) encoded the
consequence of that ordering, not the architectural contract.
- Post-fix `server.connect(transport)` runs FIRST; bridge.start()
is fire-and-forget; tools/list is independent of daemon state;
tools/call lazy-awaits bridge readiness and surfaces
daemon_unreachable as a structured tool-result error. This is
strictly better Claude Code's "Connected" status now matches
reality (transport IS connected), and daemon-down failures are
actionable per-call instead of opaque registry-empty.
The load-bearing invariant `daemon_delta == 0` is
UNCHANGED and asserted here exactly as before. The wrapper still
must NOT spawn the daemon under any condition.
"""
sock_dir = Path(f"/tmp/iai-7.1-noconn-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
store_dir = sock_dir / "store"
store_dir.mkdir(parents=True, exist_ok=True)
# Verify clean state — no socket file at our tmp path.
assert not sock_path.exists(), f"tmp socket pre-exists: {sock_path}"
# Baseline snapshot. The user's production daemon may exist on the
# host (different socket path); we count globally and assert delta.
baseline = _count_iai_mcp_processes()
daemon_baseline = baseline["daemon"]
core_baseline = baseline["core"]
env_overrides = {
"IAI_DAEMON_SOCKET_PATH": str(sock_path),
"IAI_MCP_STORE": str(store_dir),
}
wrapper_proc = _spawn_wrapper(built_wrapper, env_overrides)
try:
# ---- Assertion 1 (NEW contract): wrapper survives daemon miss ----
# Wait past the bridge's 5s connectWithTimeout window (and a
# generous slack for the fire-and-forget rejection to land in
# the .catch handler). Wrapper MUST still be alive — its job
# is to serve tools/list to MCP clients regardless of daemon
# state.
init_resp = _initialize(wrapper_proc, rpc_id=1)
assert "result" in init_resp, f"initialize failed: {init_resp}"
# tools/list — must respond from static registry within the
# MCP-client tools/list timeout window (~3s observed; we allow
# 4s for CI overhead).
list_req = {
"jsonrpc": "2.0",
"id": 2,
"method": "tools/list",
"params": {},
}
wrapper_proc.stdin.write((json.dumps(list_req) + "\n").encode("utf-8"))
wrapper_proc.stdin.flush()
list_t0 = time.monotonic()
line = wrapper_proc.stdout.readline()
list_elapsed = time.monotonic() - list_t0
assert line, "wrapper closed stdout before tools/list reply"
list_resp = json.loads(line.decode("utf-8"))
assert "result" in list_resp, f"tools/list error: {list_resp}"
tools = list_resp["result"]["tools"]
names = {t["name"] for t in tools}
assert len(names) == 12, (
f"tools/list returned {len(names)} tools, expected 12. "
f"names={sorted(names)}"
)
assert list_elapsed < 4.0, (
f"tools/list took {list_elapsed:.2f}s with no daemon — "
f"regression: wrapper is blocking server.connect on "
f"bridge.start (the mcp-tools-list-empty-cache bug)."
)
# ---- Assertion 2 (NEW contract): wait past bridge timeout ----
# 5s SOCKET_CONNECT_TIMEOUT_MS in bridge.ts means the in-flight
# bridge.start() promise rejects ~5s after wrapper boot. The
# `.catch(() => {})` on the fire-and-forget chain in index.ts
# MUST swallow this rejection — wrapper must remain alive.
# 7s budget = 5s timeout + 2s slack for slow Node startup.
time.sleep(7.0)
assert wrapper_proc.poll() is None, (
f"wrapper exited (rc={wrapper_proc.returncode}) past the "
f"5s bridge connect timeout — fire-and-forget bridge.start "
f"chain is leaking the rejection. The .catch(() => {{}}) on "
f"the top-level chain in index.ts must absorb "
f"DaemonUnreachableError."
)
# ---- Assertion 3 (fail-loud at right layer): tools/call surfaces error ----
# Daemon-down failures must NOT be silent. Pre-fix the symptom
# was an empty tools list (silent). Post-fix the wrapper serves
# tools/list, but tools/call MUST return an error envelope so
# the user sees what happened.
call_req = {
"jsonrpc": "2.0",
"id": 3,
"method": "tools/call",
"params": {
"name": "memory_recall",
"arguments": {"cue": "no-daemon test"},
},
}
wrapper_proc.stdin.write((json.dumps(call_req) + "\n").encode("utf-8"))
wrapper_proc.stdin.flush()
# bridge.start() lazy-await inside the call handler will hit
# the 5s connect timeout again. Allow 7s.
import select as _select
deadline = time.monotonic() + 12.0
call_line = b""
while time.monotonic() < deadline:
readable, _, _ = _select.select([wrapper_proc.stdout], [], [], 0.5)
if readable:
call_line = wrapper_proc.stdout.readline()
break
assert call_line, "wrapper did not respond to tools/call within 12s"
call_resp = json.loads(call_line.decode("utf-8"))
assert "result" in call_resp, f"tools/call missing result: {call_resp}"
result = call_resp["result"]
# The wrapper renders bridge errors as content with isError=True
# (see CallToolRequestSchema handler in index.ts); some legacy
# paths use the JSON-RPC `error` envelope. Either is acceptable
# — what's NOT acceptable is silent success.
is_error = result.get("isError") is True
content_text = ""
if isinstance(result.get("content"), list) and result["content"]:
content_text = result["content"][0].get("text", "") or ""
assert is_error or "daemon_unreachable" in content_text.lower() \
or "daemonunreachable" in content_text.lower(), (
f"tools/call did NOT surface daemon_unreachable when daemon "
f"is missing — fail-loud invariant violated. result={result}"
)
# ---- Assertion 4 (UNCHANGED invariant): no spawn ----
# Allow ≤1.5s for any (hypothetically) spawned-but-detached
# daemon to surface in psutil.
time.sleep(1.0)
after = _count_iai_mcp_processes()
daemon_delta = after["daemon"] - daemon_baseline
assert daemon_delta == 0, (
f"REGRESSION: wrapper spawned {daemon_delta} new iai_mcp.daemon "
f"process(es) (baseline={daemon_baseline}, after={after['daemon']}). "
f"Phase 7.1 wrappers MUST NOT spawn the daemon — the spawn-fallback "
f"chain in bridge.ts has been re-introduced."
)
core_delta = after["core"] - core_baseline
assert core_delta == 0, (
f"wrapper spawned {core_delta} iai_mcp.core process(es) "
f"(baseline={core_baseline}, after={after['core']})"
)
finally:
if wrapper_proc.poll() is None:
try:
wrapper_proc.terminate()
wrapper_proc.wait(timeout=5)
except subprocess.TimeoutExpired:
wrapper_proc.kill()
_kill_test_daemons(sock_path)
time.sleep(0.3)
try:
sock_path.unlink()
except OSError:
pass
def test_start_succeeds_with_warm_daemon_no_extra_spawn(built_wrapper, tmp_path):
"""R2 happy path: with a daemon ALREADY running on the test
socket (started manually by the test, mimicking what launchd does
in production), the wrapper must connect successfully, complete
the MCP initialize handshake, run a memory_recall round-trip, AND
NOT spawn a second daemon.
This proves:
(a) bridge.ts:start() still works against a warm socket
(no regression in the connect path).
(b) The wrapper does NOT spawn a second daemon when one already
exists (the singleton property though in 7.1 this is
trivially true because the spawn code is GONE).
"""
sock_dir = Path(f"/tmp/iai-7.1-warm-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
store_dir = sock_dir / "store"
store_dir.mkdir(parents=True, exist_ok=True)
assert not sock_path.exists()
# Pre-start a daemon manually (mimics launchd socket-activated spawn
# in production; in tests we use the manual-run code path per
# D7.1-09 backward compat).
daemon_proc = _spawn_daemon_in_background(sock_path, store_dir)
try:
# Wait for the daemon to bind. Cold-start (bge-small load +
# LanceDB open + asyncio.start_unix_server) is empirically
# 3-10s on macOS.
assert _wait_for_daemon_socket(sock_path, timeout_sec=30.0), (
f"daemon did not bind socket {sock_path} within 30s"
)
# Snapshot AFTER daemon is up but BEFORE wrapper spawns. Any
# new daemon during wrapper boot = singleton-violation regression.
baseline = _count_iai_mcp_processes()
daemon_baseline = baseline["daemon"]
core_baseline = baseline["core"]
env_overrides = {
"IAI_DAEMON_SOCKET_PATH": str(sock_path),
"IAI_MCP_STORE": str(store_dir),
}
wrapper_proc = _spawn_wrapper(built_wrapper, env_overrides)
try:
# MCP initialize handshake — wrapper must connect to the
# warm daemon and reply.
init_resp = _initialize(wrapper_proc, rpc_id=1)
assert "result" in init_resp, f"initialize failed: {init_resp}"
# memory_recall round-trip — proves the JSON-RPC wire path
# over the socket works end-to-end.
elapsed, recall_resp = _call_memory_recall(
wrapper_proc, cue="phase 7.1 warm-daemon test",
rpc_id=2, timeout_sec=10.0,
)
# Either a result (recall hit/miss) or an error envelope is
# acceptable — what we care about is that JSON-RPC came back.
assert "result" in recall_resp or "error" in recall_resp, recall_resp
# Round-trip should be sub-second on a warm daemon. Generous
# 2s budget against test-harness overhead (subprocess startup,
# MCP handshake jitter); the SPEC A6 250ms budget is verified
# in Wave 6 acceptance against the production daemon.
assert elapsed < 2.0, (
f"warm-daemon memory_recall took {elapsed:.2f}s, exceeds "
f"2.0s safety budget"
)
# Allow ≤1s for any (hypothetically) spawned daemon to surface.
time.sleep(0.5)
after = _count_iai_mcp_processes()
# No new daemon — singleton property holds (trivially in 7.1
# because the spawn code is gone).
daemon_delta = after["daemon"] - daemon_baseline
assert daemon_delta == 0, (
f"REGRESSION: wrapper spawned a second daemon during boot "
f"(baseline={daemon_baseline}, after={after['daemon']}, "
f"delta={daemon_delta}). wrappers MUST be pure "
f"connectors."
)
core_delta = after["core"] - core_baseline
assert core_delta == 0, (
f"wrapper spawned iai_mcp.core (delta={core_delta})"
)
finally:
try:
wrapper_proc.terminate()
wrapper_proc.wait(timeout=5)
except subprocess.TimeoutExpired:
wrapper_proc.kill()
finally:
# Stop the test daemon (we started it; we stop it).
try:
daemon_proc.terminate()
daemon_proc.wait(timeout=10)
except subprocess.TimeoutExpired:
daemon_proc.kill()
_kill_test_daemons(sock_path)
time.sleep(0.3)
try:
sock_path.unlink()
except OSError:
pass

View file

@ -0,0 +1,175 @@
"""Plan 03-03 Task 1 RED + Task 2 GREEN — camouflaging detector.
Constitutional guard: detector observes user SURFACE formality trajectory (D-AUTIST13-01,
D-AUTIST13-03). When an over-formal sliding-5 weekly trajectory is confirmed, the system
adjusts OUR register (D-AUTIST13-02) never pushes the user to change. Masking
modeling is forbidden (Cook 2021 / Raymaker 2020).
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
import pytest
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
def _seed_weekly_scores(store, values: list[float]) -> None:
"""Seed N formality_score_weekly events with given score sequence."""
base = datetime.now(timezone.utc) - timedelta(days=7 * len(values))
for i, v in enumerate(values):
write_event(
store,
kind="formality_score_weekly",
data={
"score": float(v),
"lang": "en",
"week_iso": (base + timedelta(days=7 * i)).isoformat(),
"samples": 10,
},
severity="info",
)
# ------------------------------------------------------------- detector
def test_detect_camouflaging_rising_trajectory(tmp_path):
"""Slope > 0.05 and mean > 0.6 on the last 5 weekly scores -> detected."""
from iai_mcp.camouflaging import detect_camouflaging
store = MemoryStore(path=tmp_path)
_seed_weekly_scores(store, [0.4, 0.55, 0.65, 0.75, 0.85])
result = detect_camouflaging(store)
assert result["detected"] is True
assert result["trajectory_slope"] > 0.05
assert result["current_mean"] > 0.6
def test_detect_camouflaging_flat_trajectory(tmp_path):
"""Flat scores at 0.5 -> not detected (slope ~ 0, mean ~ 0.5)."""
from iai_mcp.camouflaging import detect_camouflaging
store = MemoryStore(path=tmp_path)
_seed_weekly_scores(store, [0.5, 0.5, 0.5, 0.5, 0.5])
result = detect_camouflaging(store)
assert result["detected"] is False
def test_detect_camouflaging_insufficient_samples(tmp_path):
"""Less than window_size samples -> not detected."""
from iai_mcp.camouflaging import detect_camouflaging
store = MemoryStore(path=tmp_path)
_seed_weekly_scores(store, [0.3, 0.5])
result = detect_camouflaging(store)
assert result["detected"] is False
assert result["sample_count"] == 2
def test_detect_camouflaging_high_mean_but_flat_no_detect(tmp_path):
"""Mean > 0.6 but slope ~ 0 -> not detected (needs BOTH conditions)."""
from iai_mcp.camouflaging import detect_camouflaging
store = MemoryStore(path=tmp_path)
_seed_weekly_scores(store, [0.7, 0.7, 0.7, 0.7, 0.7])
result = detect_camouflaging(store)
assert result["detected"] is False # no slope
def test_detect_camouflaging_rising_but_low_mean_no_detect(tmp_path):
"""Rising but mean stays under 0.6 -> not detected."""
from iai_mcp.camouflaging import detect_camouflaging
store = MemoryStore(path=tmp_path)
_seed_weekly_scores(store, [0.1, 0.15, 0.2, 0.3, 0.4])
result = detect_camouflaging(store)
assert result["detected"] is False
# ------------------------------------------------------------- weekly pass
def test_run_weekly_pass_emits_events_and_bumps_knob(tmp_path):
"""On detected trajectory: emits camouflaging_detected + register_relaxed, bumps knob."""
from iai_mcp.camouflaging import run_weekly_pass
from iai_mcp.profile import profile_get
# Reset the per-process profile state so we start at 0.0 regardless of earlier tests.
import iai_mcp.core as core
core._profile_state["camouflaging_relaxation"] = 0.0
store = MemoryStore(path=tmp_path)
_seed_weekly_scores(store, [0.4, 0.55, 0.65, 0.75, 0.85])
run_weekly_pass(store)
detected = query_events(store, kind="camouflaging_detected", limit=5)
relaxed = query_events(store, kind="register_relaxed", limit=5)
assert len(detected) >= 1
assert len(relaxed) >= 1
# Knob moved up from 0.0.
value = core._profile_state["camouflaging_relaxation"]
assert value > 0.0
def test_run_weekly_pass_flat_no_events(tmp_path):
"""Flat trajectory -> no camouflaging_detected / register_relaxed events."""
from iai_mcp.camouflaging import run_weekly_pass
import iai_mcp.core as core
core._profile_state["camouflaging_relaxation"] = 0.0
store = MemoryStore(path=tmp_path)
_seed_weekly_scores(store, [0.5, 0.5, 0.5, 0.5, 0.5])
run_weekly_pass(store)
detected = query_events(store, kind="camouflaging_detected", limit=5)
relaxed = query_events(store, kind="register_relaxed", limit=5)
assert detected == []
assert relaxed == []
assert core._profile_state["camouflaging_relaxation"] == 0.0
# ------------------------------------------------------------- record + relax
def test_record_user_formality_writes_weekly_event(tmp_path):
"""record_user_formality emits a formality_score_weekly event."""
from iai_mcp.camouflaging import record_user_formality
store = MemoryStore(path=tmp_path)
record_user_formality(
store,
"The proposal is, therefore, accepted.",
"en",
)
events = query_events(store, kind="formality_score_weekly", limit=5)
assert len(events) == 1
assert "score" in events[0]["data"]
assert 0.0 <= events[0]["data"]["score"] <= 1.0
def test_relax_register_bumps_and_emits(tmp_path):
"""relax_register increments knob + writes register_relaxed event."""
from iai_mcp.camouflaging import relax_register
import iai_mcp.core as core
core._profile_state["camouflaging_relaxation"] = 0.0
store = MemoryStore(path=tmp_path)
relax_register(store, delta=0.25)
assert abs(core._profile_state["camouflaging_relaxation"] - 0.25) < 1e-9
events = query_events(store, kind="register_relaxed", limit=5)
assert len(events) == 1
assert abs(events[0]["data"]["delta"] - 0.25) < 1e-9
assert abs(events[0]["data"]["from"] - 0.0) < 1e-9
assert abs(events[0]["data"]["to"] - 0.25) < 1e-9
def test_relax_register_caps_at_one(tmp_path):
"""Knob stays within [0, 1] even with oversized deltas."""
from iai_mcp.camouflaging import relax_register
import iai_mcp.core as core
core._profile_state["camouflaging_relaxation"] = 0.95
store = MemoryStore(path=tmp_path)
relax_register(store, delta=0.5)
assert core._profile_state["camouflaging_relaxation"] == 1.0

View file

@ -0,0 +1,207 @@
"""Phase 07.11 Plan 01 / — `memory_capture` dedup contract.
These four regression tests are the executable specification for D-01:
* `test_query_similar_accepts_tier_kwarg` `query_similar` must accept a
`tier` kwarg, must filter at the LanceDB where-layer when it is given, and
must `ValueError` BEFORE any I/O on bad tier values.
* `test_capture_turn_dedups_on_high_cos_match` capturing the same cue twice
yields one inserted + one reinforced; the dedup branch is reachable.
* `test_capture_turn_inserts_on_low_cos` distinct cues both insert; no
false dedup.
* `test_reinforce_record_increments_edge_weight` the new
`store.reinforce_record` typed wrapper is a thin `boost_edges` delegate
whose self-loop weight increases monotonically across calls.
Honesty constraint: every test below MUST fail on `git stash` of the
plan's source diffs and pass on `git stash pop`. RED-witness ran 2026-04-30
on un-fixed source: tier-kwarg + reinforce_record cases TypeError before the
fix; dedup cases fail because the dedup branch is unreachable dead code.
"""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from uuid import UUID, uuid4
import pytest
from iai_mcp.capture import capture_turn
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord
# --------------------------------------------------------------------------- fixtures
# Pattern copied verbatim from tests/test_pipeline_anti_hits_malformed.py:33-50
# (`_isolated_keyring` autouse fixture is the project canon for tests touching
# encrypted records on the construction host where the real keyring is absent
# or hangs).
@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
import keyring as _keyring
fake: dict[tuple[str, str], str] = {}
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
monkeypatch.setattr(
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
)
monkeypatch.setattr(
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
)
yield fake
@pytest.fixture
def store(tmp_path: Path) -> MemoryStore:
return MemoryStore(path=tmp_path / "lancedb")
def _make_record(
rid: UUID,
surface: str = "topic",
*,
tier: str = "episodic",
embedding: list[float] | None = None,
) -> MemoryRecord:
"""Minimal-record helper. Mirrors the shape used in the sibling test file
`test_pipeline_anti_hits_malformed.py:_make_record` so existing fixture
expectations transfer exactly. Defaults to a deterministic seed embedding
(`[0.1] * EMBED_DIM`) so multiple records made with this helper share a
high-cosine neighbourhood (the dedup tests need that)."""
now = datetime.now(timezone.utc)
return MemoryRecord(
id=rid,
tier=tier,
literal_surface=surface,
aaak_index="",
embedding=list(embedding) if embedding is not None else [0.1] * EMBED_DIM,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=[],
language="en",
)
# --------------------------------------------------------------------------- tests
def test_query_similar_accepts_tier_kwarg(store):
"""D-01 step 1: tier kwarg filters at the LanceDB where-layer.
Pre-fix: TypeError("got an unexpected keyword argument 'tier'").
Post-fix: returns only episodic rows; bad tier values raise ValueError
BEFORE any I/O.
"""
rid_e = uuid4()
rid_s = uuid4()
store.insert(_make_record(rid_e, "episodic-cue", tier="episodic"))
store.insert(_make_record(rid_s, "semantic-cue", tier="semantic"))
embedding = [0.1] * EMBED_DIM
out = store.query_similar(embedding, k=10, tier="episodic")
ids = {r.id for r, _ in out}
assert rid_e in ids, "episodic record should be returned by tier='episodic'"
assert rid_s not in ids, "semantic record must be filtered out by tier='episodic'"
# Bad tier -> ValueError before any I/O.
with pytest.raises(ValueError):
store.query_similar(embedding, k=10, tier="bogus")
# Backwards-compat: tier=None preserves the legacy behaviour (both rows
# are returned by the cosine query, no where-clause applied).
out_none = store.query_similar(embedding, k=10, tier=None)
ids_none = {r.id for r, _ in out_none}
assert rid_e in ids_none and rid_s in ids_none
def test_capture_turn_dedups_on_high_cos_match(store):
"""D-01 step 3: second capture of identical cue -> reinforced, not inserted.
Pre-fix: dedup branch unreachable. Bug A (TypeError on tier kwarg) is
swallowed by `except Exception`; `neighbours = []` so the loop never
executes. Even if Bug A were fixed, Bug B (`getattr(n, "score", None)`
on a tuple) returns None so the `if score is not None` guard never
fires. Even if both A+B were fixed, Bug C (single-UUID list to
boost_edges which expects pairs) crashes. Result: every capture inserts.
Post-fix: dedup branch is reachable; second call returns
`status="reinforced"` and the episodic-record count stays at 1.
"""
text = "the user prefers Russian on the surface; English in storage"
cue = "lang preference"
r1 = capture_turn(
store=store, text=text, cue=cue, tier="episodic",
session_id="s1", role="user",
)
assert r1["status"] == "inserted", f"first capture should insert, got {r1}"
r2 = capture_turn(
store=store, text=text, cue=cue, tier="episodic",
session_id="s1", role="user",
)
assert r2["status"] == "reinforced", f"second capture should reinforce, got {r2}"
assert "cos=" in r2["reason"], f"reason should record cosine score, got {r2}"
# Record count remains 1 -- no duplicate inserted.
rows = list(store.iter_records())
assert len([r for r in rows if r.tier == "episodic"]) == 1
def test_capture_turn_inserts_on_low_cos(store):
"""distinct cues -> two inserts, no false dedup.
Asymmetric guard against an over-eager fix: if the dedup branch fires
on EVERY capture (e.g. cos threshold misread), this test catches it.
"""
r1 = capture_turn(
store=store, text="apples are red", cue="apple",
tier="episodic", session_id="s1", role="user",
)
r2 = capture_turn(
store=store,
text="quantum chromodynamics describes the strong force",
cue="qcd", tier="episodic", session_id="s1", role="user",
)
assert r1["status"] == "inserted", f"first insert expected, got {r1}"
assert r2["status"] == "inserted", f"second insert expected, got {r2}"
rows = list(store.iter_records())
assert len([r for r in rows if r.tier == "episodic"]) == 2
def test_reinforce_record_increments_edge_weight(store):
"""D-01 step 2: reinforce_record self-loop weight increases monotonically.
Pre-fix: AttributeError -- `reinforce_record` does not exist on store.
Post-fix: the typed wrapper builds `[(rid, rid)]` and delegates to
`boost_edges`; the canonical-pair coalescer at boost_edges:1244-1247
produces the canonical `(str(rid), str(rid))` self-loop key, and the
weight strictly increases on each successive call.
"""
rid = uuid4()
store.insert(_make_record(rid, "anchor-record"))
w1 = store.reinforce_record(rid)
w2 = store.reinforce_record(rid)
# Both calls return dict[(str, str), float] keyed by the canonical
# sorted-self-loop pair.
key = (str(rid), str(rid))
assert key in w1, f"self-loop key missing from first call: {w1}"
assert key in w2, f"self-loop key missing from second call: {w2}"
assert w2[key] > w1[key], (
f"weight must strictly increase across calls: w1={w1[key]} w2={w2[key]}"
)

428
tests/test_capture_queue.py Normal file
View file

@ -0,0 +1,428 @@
"""Phase 10.2 Plan 10.2-01 Task 1.2 -- capture_queue.py test suite.
Covers atomic append (incl. crash simulation), 50-thread concurrent
append, idempotent ingest with mid-handler crash, lock-skip semantics,
overflow + audit log, verbatim Unicode round-trip, list_pending sort
order, schema-version mismatch, empty-queue ingest, ULID lex<->time
order, and lock-file cleanup on success/failure.
All tests use ``tmp_path`` -- no production queue at ``~/.iai-mcp/pending/``
is touched.
"""
from __future__ import annotations
import errno
import fcntl
import json
import os
import threading
import time
from pathlib import Path
from typing import Any
import pytest
from iai_mcp.capture_queue import (
DEFAULT_MAX_SIZE,
SCHEMA_VERSION,
CaptureQueue,
CaptureQueueSchemaError,
generate_ulid,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _sample_record(i: int = 0, surface: str | None = None) -> dict:
"""Return a minimally valid record envelope dict."""
return {
"surface": surface if surface is not None else f"sample text {i}",
"cue": f"cue {i}",
"tier": "episodic",
"session_id": "test-session",
"role": "user",
}
def _write_envelope_directly(
queue_dir: Path,
ulid: str,
record: dict,
*,
schema_version: int = SCHEMA_VERSION,
appended_at: str = "2026-05-02T15:00:00+00:00",
) -> Path:
"""Bypass ``CaptureQueue.append`` to seed a pending file with custom fields."""
path = queue_dir / f"pending-{ulid}.json"
envelope = {
"ulid": ulid,
"appended_at": appended_at,
"record": record,
"schema_version": schema_version,
}
path.write_text(
json.dumps(envelope, ensure_ascii=False, separators=(",", ":")),
encoding="utf-8",
)
return path
# ---------------------------------------------------------------------------
# 1. Basic append + file creation
# ---------------------------------------------------------------------------
def test_append_returns_ulid_and_creates_file(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
ulid = q.append(_sample_record(0))
assert isinstance(ulid, str)
assert len(ulid) == 26
final = tmp_path / f"pending-{ulid}.json"
assert final.exists()
envelope = json.loads(final.read_text(encoding="utf-8"))
assert envelope["ulid"] == ulid
assert envelope["schema_version"] == SCHEMA_VERSION
assert envelope["record"]["surface"] == "sample text 0"
# appended_at is ISO-8601 parseable.
from datetime import datetime
datetime.fromisoformat(envelope["appended_at"])
assert q.pending_count() == 1
# ---------------------------------------------------------------------------
# 2. Atomic append under simulated crash (os.replace patched to raise)
# ---------------------------------------------------------------------------
def test_append_atomic_under_crash_simulation(tmp_path, monkeypatch):
"""If ``os.replace`` fails, no committed pending file appears.
The temp file may or may not be left around depending on where the
failure happens; what matters is that ``pending_count`` stays 0
because no ``pending-<ulid>.json`` was successfully published.
"""
q = CaptureQueue(queue_dir=tmp_path)
real_replace = os.replace
def boom(src, dst):
raise OSError(errno.EIO, "simulated crash mid-rename")
monkeypatch.setattr("iai_mcp.capture_queue.os.replace", boom)
with pytest.raises(OSError):
q.append(_sample_record(0))
# No final pending file appeared.
assert q.pending_count() == 0
finals = list(tmp_path.glob("pending-*.json"))
finals = [p for p in finals if not p.name.endswith(".tmp")]
assert finals == []
# Restore + verify a real append still works.
monkeypatch.setattr("iai_mcp.capture_queue.os.replace", real_replace)
q.append(_sample_record(1))
assert q.pending_count() == 1
# ---------------------------------------------------------------------------
# 3. Concurrent append (50 threads * 10 records each = 500)
# ---------------------------------------------------------------------------
def test_concurrent_append_50_threads(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
n_threads = 50
n_per_thread = 10
errors: list[BaseException] = []
ulids: list[str] = []
ulids_lock = threading.Lock()
def worker(tid: int) -> None:
try:
local: list[str] = []
for i in range(n_per_thread):
ulid = q.append(_sample_record(i, f"thread-{tid}-record-{i}"))
local.append(ulid)
with ulids_lock:
ulids.extend(local)
except BaseException as exc: # pragma: no cover - surfaced via assertion
errors.append(exc)
threads = [threading.Thread(target=worker, args=(t,)) for t in range(n_threads)]
for t in threads:
t.start()
for t in threads:
t.join(timeout=30)
assert not t.is_alive(), "worker thread hung"
assert errors == [], f"workers raised: {errors!r}"
assert len(ulids) == n_threads * n_per_thread
# No ULID collisions.
assert len(set(ulids)) == len(ulids)
# Every committed file is well-formed JSON.
pending = q.list_pending()
assert len(pending) == n_threads * n_per_thread
for p in pending:
envelope = json.loads(p.read_text(encoding="utf-8"))
assert envelope["schema_version"] == SCHEMA_VERSION
assert envelope["record"]["surface"].startswith("thread-")
# ---------------------------------------------------------------------------
# 4. Idempotent ingest -- crash mid-handler leaves both files, retry works
# ---------------------------------------------------------------------------
def test_idempotent_ingest_crash_mid_handler(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
ulid = q.append(_sample_record(42, surface="payload-42"))
pending_path = tmp_path / f"pending-{ulid}.json"
lock_path = tmp_path / f"pending-{ulid}.lock"
def crashing_handler(_record: dict) -> None:
raise RuntimeError("handler exploded")
with pytest.raises(RuntimeError):
q.ingest_pending(crashing_handler)
# Both pending and lock remain on disk.
assert pending_path.exists(), "pending file must remain after handler exception"
assert lock_path.exists(), "lock file must remain to mark mid-flight crash"
assert q.pending_count() == 1
# Retry with a clean handler -- should succeed.
seen: list[dict] = []
def good_handler(record: dict) -> None:
seen.append(record)
n = q.ingest_pending(good_handler)
assert n == 1
assert len(seen) == 1
assert seen[0]["surface"] == "payload-42"
# Both files cleaned up after success.
assert not pending_path.exists()
assert not lock_path.exists()
assert q.pending_count() == 0
# ---------------------------------------------------------------------------
# 5. Lock contention -- A held externally, B and C still ingest
# ---------------------------------------------------------------------------
def test_idempotent_ingest_lock_skipped(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
ulid_a = q.append(_sample_record(1, surface="A"))
ulid_b = q.append(_sample_record(2, surface="B"))
ulid_c = q.append(_sample_record(3, surface="C"))
# Externally lock A's lock file in non-blocking exclusive mode.
lock_a = tmp_path / f"pending-{ulid_a}.lock"
fd = os.open(str(lock_a), os.O_WRONLY | os.O_CREAT, 0o600)
try:
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
seen: list[str] = []
def handler(record: dict) -> None:
seen.append(record["surface"])
n = q.ingest_pending(handler)
# B and C ingested; A skipped because we hold its lock.
assert n == 2
assert sorted(seen) == ["B", "C"]
# A still pending.
assert (tmp_path / f"pending-{ulid_a}.json").exists()
assert not (tmp_path / f"pending-{ulid_b}.json").exists()
assert not (tmp_path / f"pending-{ulid_c}.json").exists()
finally:
try:
fcntl.flock(fd, fcntl.LOCK_UN)
except OSError:
pass
os.close(fd)
# ---------------------------------------------------------------------------
# 6. Overflow -- exceed max, oldest pruned, audit log populated
# ---------------------------------------------------------------------------
def test_overflow_prune_oldest(tmp_path):
"""At ``max_size=100``, 110 appends end with count=99 (max-100 headroom)
and 11 audit entries (10 over + 1 to descend below max).
The exact post-prune count is ``max_size - 100`` because the prune
batch headroom in capture_queue is 100. With ``max_size=100`` the
target is therefore 0; the actual pruned count equals the excess at
the moment of first overflow plus subsequent appends that re-trigger
overflow.
The deterministic invariants are:
1. Final ``pending_count`` <= ``max_size``.
2. Total appends == kept + dropped.
3. Audit log has exactly ``dropped`` JSONL lines, all with
reason="queue_overflow" and a known ULID.
"""
max_size = 100
n_total = 110
q = CaptureQueue(queue_dir=tmp_path, max_size=max_size)
appended_ulids: list[str] = []
for i in range(n_total):
appended_ulids.append(q.append(_sample_record(i)))
final_count = q.pending_count()
assert final_count <= max_size
audit_path = tmp_path / ".overflow-audit.log"
assert audit_path.exists(), "audit log must exist after overflow"
audit_lines = audit_path.read_text(encoding="utf-8").splitlines()
audit_records = [json.loads(line) for line in audit_lines if line]
dropped = n_total - final_count
assert dropped > 0, "at least one record must have been dropped on overflow"
assert len(audit_records) == dropped, (
f"expected {dropped} audit entries, got {len(audit_records)}"
)
for rec in audit_records:
assert rec["reason"] == "queue_overflow"
assert rec["dropped_ulid"] in appended_ulids
assert isinstance(rec["queue_size_before_prune"], int)
# ---------------------------------------------------------------------------
# 7. Verbatim round-trip -- Russian + English + emoji + Greek + symbols
# ---------------------------------------------------------------------------
def test_verbatim_round_trip_unicode(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
payload = "Привет, world! 🧠 Δ ∑ — combining é vs é"
q.append(_sample_record(0, surface=payload))
seen: list[str] = []
def handler(record: dict) -> None:
seen.append(record["surface"])
n = q.ingest_pending(handler)
assert n == 1
assert len(seen) == 1
# Byte-identical surface preserved through JSON encode + decode.
assert seen[0] == payload
assert seen[0].encode("utf-8") == payload.encode("utf-8")
# ---------------------------------------------------------------------------
# 8. list_pending sort order is oldest-first
# ---------------------------------------------------------------------------
def test_list_pending_sort_order(tmp_path):
"""ULIDs are time-sorted by construction; listing them sorted by name
must yield the same order in which they were appended.
"""
q = CaptureQueue(queue_dir=tmp_path)
ulids = [q.append(_sample_record(i)) for i in range(20)]
listed = [q._ulid_from_path(p) for p in q.list_pending()]
assert listed == ulids, "list_pending must be oldest-first"
# ---------------------------------------------------------------------------
# 9. Schema-version mismatch raises CaptureQueueSchemaError
# ---------------------------------------------------------------------------
def test_schema_version_mismatch_raises(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
_write_envelope_directly(
tmp_path,
ulid="01HZQTESTBADSCHEMA00000000",
record=_sample_record(0),
schema_version=99,
)
assert q.pending_count() == 1
def handler(_record: dict) -> None: # pragma: no cover -- never called
pytest.fail("handler must not be called on schema mismatch")
with pytest.raises(CaptureQueueSchemaError) as excinfo:
q.ingest_pending(handler)
assert "schema_version" in str(excinfo.value)
assert "99" in str(excinfo.value)
# ---------------------------------------------------------------------------
# 10. Empty queue -- ingest returns 0, no errors
# ---------------------------------------------------------------------------
def test_empty_queue_ingest_returns_zero(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
assert q.pending_count() == 0
handler_called = [False]
def handler(_record: dict) -> None: # pragma: no cover -- never called
handler_called[0] = True
n = q.ingest_pending(handler)
assert n == 0
assert handler_called[0] is False
# ---------------------------------------------------------------------------
# 11. ULID lex sort matches generation/time order over many samples
# ---------------------------------------------------------------------------
def test_ulid_lexicographic_sort_matches_time_order():
"""Generate 1000 ULIDs as fast as possible; their natural string sort
must equal generation order. The internal monotonic guard guarantees
this even when many ULIDs collide on the same wall-clock millisecond.
"""
n = 1000
ulids = [generate_ulid() for _ in range(n)]
assert len(set(ulids)) == n, "no ULID collisions allowed"
assert sorted(ulids) == ulids, "lex sort must equal generation order"
# ---------------------------------------------------------------------------
# 12. Lock file cleaned up on handler success
# ---------------------------------------------------------------------------
def test_lock_file_cleanup_on_handler_success(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
ulid = q.append(_sample_record(0))
lock_path = tmp_path / f"pending-{ulid}.lock"
def handler(_record: dict) -> None:
# While the handler runs, the lock file IS on disk -- but we
# cannot easily inspect that without breaking the lock owner
# invariant. The post-condition is what matters here.
pass
n = q.ingest_pending(handler)
assert n == 1
assert not lock_path.exists(), "lock file must be cleaned on success"
assert not (tmp_path / f"pending-{ulid}.json").exists()
# ---------------------------------------------------------------------------
# 13. Lock file persists on handler exception (mid-flight crash marker)
# ---------------------------------------------------------------------------
def test_lock_file_persists_on_handler_exception(tmp_path):
q = CaptureQueue(queue_dir=tmp_path)
ulid = q.append(_sample_record(0))
pending_path = tmp_path / f"pending-{ulid}.json"
lock_path = tmp_path / f"pending-{ulid}.lock"
def handler(_record: dict) -> None:
raise ValueError("simulated mid-handler crash")
with pytest.raises(ValueError):
q.ingest_pending(handler)
assert pending_path.exists(), "pending must remain after handler exception"
assert lock_path.exists(), "lock must remain to mark mid-flight crash"

View file

@ -0,0 +1,332 @@
"""Phase 7.1 Plan 05 / R3 acceptance — `iai-mcp capture-transcript --no-spawn`.
Eliminates the third spawn vector from forensic anomaly #3 (Stop-hook
spawning iai_mcp.daemon under N-session race). When 3 Claude sessions close
within seconds, 3 hooks each fire `iai-mcp capture-transcript --no-spawn`;
ZERO daemons get spawned. Each invocation either (a) talks to the existing
daemon if one is up, or (b) writes a JSONL deferral file and exits 0 within
2s. The hook never blocks session teardown.
This module covers:
- Test A: writes deferred file when daemon is unreachable
- Test B: completes in under 2s wall-clock (R3 budget)
- Test C: spawns ZERO new iai_mcp.* processes
- Test D: --no-spawn surfaces in --help; default (no flag) keeps Phase 6
behavior (exit 0 + stdout JSON, no deferred file)
- Test E: deferred JSONL v1 header + per-turn event lines (D7.1-04)
- Test F: missing transcript -> header-only file, no exception
Test isolation:
- HOME=tmp_path so `Path.home()` resolves to a fresh dir; the user's
real ~/.iai-mcp/.deferred-captures/ is never touched.
- IAI_DAEMON_SOCKET_PATH=/tmp/iai-no-spawn-<pid>-<n>/d.sock so the
250ms socket probe never hits the user's real daemon.
- Subprocess invocation: `[sys.executable, '-m', 'iai_mcp.cli', ...]`
with PYTHONPATH set; we don't depend on the `iai-mcp` console script
being on PATH (test_socket_subagent_reuse.py:115-116 pattern).
"""
from __future__ import annotations
import json
import os
import platform
import subprocess
import sys
import time
from pathlib import Path
import psutil
import pytest
REPO = Path(__file__).resolve().parent.parent
# POSIX-only: subprocess + AF_UNIX socket probe; fork-style daemon counts.
pytestmark = pytest.mark.skipif(
platform.system() == "Windows",
reason="POSIX subprocess + AF_UNIX",
)
# ---------------------------------------------------------------------------
# Helpers (copied from test_socket_subagent_reuse.py to keep this module
# standalone — that test owns the canonical pattern, but cross-importing
# would couple two unrelated test modules).
# ---------------------------------------------------------------------------
def _count_iai_mcp_processes() -> dict[str, int]:
"""Snapshot iai_mcp.core / iai_mcp.daemon process counts on host."""
counts = {"core": 0, "daemon": 0}
for p in psutil.process_iter(["cmdline"]):
try:
cl = p.info.get("cmdline") or []
if not cl:
continue
joined = " ".join(c or "" for c in cl)
if "iai_mcp.core" in joined:
counts["core"] += 1
if "iai_mcp.daemon" in joined:
counts["daemon"] += 1
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return counts
def _isolated_env(tmp_path: Path) -> tuple[dict[str, str], Path]:
"""Build env that isolates HOME + socket path to tmp_path. Returns
(env_dict, deferred_dir). Forces the keyring fail-backend so any
accidental MemoryStore() doesn't prompt the macOS keychain.
"""
sock_dir = Path(f"/tmp/iai-no-spawn-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
iai_dir = tmp_path / ".iai-mcp"
iai_dir.mkdir(parents=True, exist_ok=True)
env = os.environ.copy()
env["HOME"] = str(tmp_path)
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
# Defense-in-depth: if the inline path is somehow exercised, force the
# fail-backend so we don't hang on the real keychain prompt.
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-no-spawn-pass"
# Make the spawned python find iai_mcp without an editable install.
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
return env, iai_dir / ".deferred-captures"
def _make_transcript(tmp_path: Path) -> Path:
"""Write a 3-turn Claude Code-style JSONL transcript."""
turns = [
{"type": "user", "message": {"role": "user", "content": "hello world"}},
{"type": "assistant", "message": {"role": "assistant", "content": "hi back at you"}},
{"type": "user", "message": {"role": "user", "content": "third turn here"}},
]
transcript_path = tmp_path / "transcript.jsonl"
transcript_path.write_text("\n".join(json.dumps(t) for t in turns) + "\n")
return transcript_path
def _run_no_spawn(env: dict[str, str], transcript_path: Path) -> subprocess.CompletedProcess:
"""Invoke `iai-mcp capture-transcript --no-spawn <transcript>` via
`python -m iai_mcp.cli`. 5s wall-clock budget well above the 2s
contract the implementation must meet.
"""
return subprocess.run(
[
sys.executable,
"-m",
"iai_mcp.cli",
"capture-transcript",
"--no-spawn",
"--session-id",
"test-r3",
str(transcript_path),
],
env=env,
capture_output=True,
text=True,
timeout=5,
)
# ---------------------------------------------------------------------------
# Subprocess tests (Tests A-D).
# ---------------------------------------------------------------------------
def test_no_spawn_writes_deferred_when_daemon_down(tmp_path):
"""Test A: --no-spawn writes a JSONL deferral file when daemon unreachable."""
env, deferred_dir = _isolated_env(tmp_path)
transcript = _make_transcript(tmp_path)
proc = _run_no_spawn(env, transcript)
assert proc.returncode == 0, f"stderr={proc.stderr!r} stdout={proc.stdout!r}"
payload = json.loads(proc.stdout.strip())
assert payload.get("status") == "deferred", payload
files = sorted(deferred_dir.glob("*.jsonl"))
assert len(files) == 1, f"expected 1 deferral file, got {files}"
out_path = files[0]
lines = out_path.read_text().splitlines()
assert len(lines) >= 2, f"expected header + ≥1 event, got {lines}"
header = json.loads(lines[0])
assert header["version"] == 1, header
assert header["session_id"] == "test-r3", header
assert "deferred_at" in header
assert "cwd" in header
# Subsequent lines are events with text/cue/tier/role/ts.
for line in lines[1:]:
ev = json.loads(line)
assert "text" in ev and ev["text"], ev
assert ev["tier"] == "episodic", ev
assert ev["role"] in {"user", "assistant"}, ev
def test_no_spawn_completes_in_under_2s(tmp_path):
"""Test B: R3 acceptance — wall-clock under 2s."""
env, _ = _isolated_env(tmp_path)
transcript = _make_transcript(tmp_path)
t0 = time.time()
proc = _run_no_spawn(env, transcript)
duration = time.time() - t0
assert proc.returncode == 0, f"stderr={proc.stderr!r}"
assert duration < 2.0, (
f"--no-spawn took {duration:.3f}s; R3 budget is <2.0s. "
f"Hook would block session teardown."
)
def test_no_spawn_does_not_spawn_daemon(tmp_path):
"""Test C: ZERO new iai_mcp.* processes appear after invocation."""
env, _ = _isolated_env(tmp_path)
transcript = _make_transcript(tmp_path)
before = _count_iai_mcp_processes()
proc = _run_no_spawn(env, transcript)
# Brief settle for any would-be spawn; cap at 0.5s — if a daemon were
# going to appear, it would be visible within this window (psutil enum
# picks up forked children immediately).
time.sleep(0.5)
after = _count_iai_mcp_processes()
assert proc.returncode == 0, f"stderr={proc.stderr!r}"
# Delta-snapshot: assert no new daemon or core processes appeared.
delta_daemon = after["daemon"] - before["daemon"]
delta_core = after["core"] - before["core"]
assert delta_daemon <= 0, (
f"--no-spawn spawned {delta_daemon} new daemon(s); R3 violated. "
f"before={before} after={after}"
)
assert delta_core <= 0, (
f"--no-spawn spawned {delta_core} new core(s); R3 violated. "
f"before={before} after={after}"
)
def test_no_spawn_flag_default_false(tmp_path):
"""Test D: --no-spawn appears in --help; default path keeps behavior.
Per design, capture_transcript() returns a JSON dict with errors=1
on missing transcript and the CLI prints that to stdout (NOT stderr).
Default invocation without --no-spawn must:
- exit 0 (fail-safe hook contract from Plan 06)
- produce JSON-parsable stdout
- NOT create any deferred-captures file (only --no-spawn does that)
"""
env, deferred_dir = _isolated_env(tmp_path)
# 1) --help advertises --no-spawn.
help_proc = subprocess.run(
[sys.executable, "-m", "iai_mcp.cli", "capture-transcript", "--help"],
env=env,
capture_output=True,
text=True,
timeout=5,
)
assert help_proc.returncode == 0, help_proc.stderr
assert "--no-spawn" in help_proc.stdout, help_proc.stdout
# 2) Default path with non-existent transcript: behavior.
default_proc = subprocess.run(
[
sys.executable,
"-m",
"iai_mcp.cli",
"capture-transcript",
str(tmp_path / "no-such-file.jsonl"),
],
env=env,
capture_output=True,
text=True,
timeout=10,
)
assert default_proc.returncode == 0, default_proc.stderr
# prints the {errors: N, ...} JSON to STDOUT, not stderr.
# We just need it to be valid JSON with no .deferred-captures created.
payload = json.loads(default_proc.stdout.strip())
assert "errors" in payload or "inserted" in payload, payload
# CRITICAL: default path must NOT write a deferred-captures file.
if deferred_dir.exists():
assert not list(deferred_dir.glob("*.jsonl")), (
f"default capture-transcript must not write deferred files; got "
f"{list(deferred_dir.glob('*.jsonl'))}"
)
# ---------------------------------------------------------------------------
# Pure unit tests of write_deferred_captures (Tests E and F).
# ---------------------------------------------------------------------------
def test_deferred_jsonl_format_v1_header(tmp_path, monkeypatch):
"""Test E: write_deferred_captures emits v1 header + 1 event per turn."""
monkeypatch.setenv("HOME", str(tmp_path))
transcript = _make_transcript(tmp_path)
from iai_mcp.capture import write_deferred_captures
out_path = write_deferred_captures(
session_id="unit-e",
transcript_path=transcript,
cwd="/some/cwd",
)
assert out_path.exists()
assert out_path.parent == tmp_path / ".iai-mcp" / ".deferred-captures"
# Filename pattern: <session_id>-<unix_ts>.jsonl
assert out_path.name.startswith("unit-e-"), out_path.name
assert out_path.suffix == ".jsonl", out_path.name
lines = out_path.read_text().splitlines()
# Header + 3 events (one per turn from _make_transcript).
assert len(lines) == 4, lines
header = json.loads(lines[0])
assert header["version"] == 1
assert header["session_id"] == "unit-e"
assert header["cwd"] == "/some/cwd"
assert "deferred_at" in header
# Subsequent lines carry the event schema.
for ln in lines[1:]:
ev = json.loads(ln)
assert set(ev.keys()) >= {"text", "cue", "tier", "role", "ts"}, ev.keys()
assert ev["tier"] == "episodic"
assert ev["role"] in {"user", "assistant"}
assert ev["text"] in {"hello world", "hi back at you", "third turn here"}
def test_deferred_jsonl_handles_missing_transcript(tmp_path, monkeypatch):
"""Test F: missing transcript -> header-only file, no exception, exit 0 path."""
monkeypatch.setenv("HOME", str(tmp_path))
from iai_mcp.capture import write_deferred_captures
# Should NOT raise; should return a Path; file should exist with header only.
out_path = write_deferred_captures(
session_id="unit-f",
transcript_path=tmp_path / "does-not-exist.jsonl",
)
assert out_path.exists()
lines = out_path.read_text().splitlines()
assert len(lines) == 1, f"expected header-only, got {lines}"
header = json.loads(lines[0])
assert header["version"] == 1
assert header["session_id"] == "unit-f"
# cwd defaults to os.getcwd() when not passed — non-empty string.
assert isinstance(header.get("cwd"), str) and header["cwd"], header

View file

@ -0,0 +1,360 @@
"""Phase 7.5 acceptance — `iai-mcp capture-transcript --no-spawn` ALWAYS defers.
Closes the embedder cold-load amplification documented in SPEC 07.5: every
Stop-hook invocation (286/day on 2026-04-27) was loading bge-small-en-v1.5
in a brand-new Python subprocess on the daemon-reachable path. Forensic
evidence: stderr `Loading weights: 0%|...| 0/391 ...|| 391/391` × 10 +
`leaked semaphore objects at shutdown` × 7.
Fix: `cmd_capture_transcript` `--no-spawn` branch in `src/iai_mcp/cli.py`
no longer probes the socket and no longer imports
`iai_mcp.capture.capture_transcript` / `iai_mcp.store.MemoryStore`. It
unconditionally calls `write_deferred_captures(...)` and prints
`{"status": "deferred", "path": "..."}`. The daemon's WAKE drain (Phase
7.1 R3 / Plan 07.1-06) consumes deferred files with the daemon's
already-loaded embedder.
Test matrix:
- Test 1: subprocess + reachable mock socket (real AF_UNIX listener)
status="deferred", stderr has ZERO `Loading weights` and ZERO
`sentence_transformers` mentions. The reachable case used to inline-embed;
now it must defer just like the unreachable case.
- Test 2: subprocess + unreachable socket (back-compat) identical output.
Locks down that the new always-defer path doesn't regress the existing
unreachable behaviour.
- Test 3: subprocess + fresh interpreter introspects `sys.modules` AFTER the
CLI handler runs end-to-end asserts `iai_mcp.embed` and
`sentence_transformers` are NOT loaded. Subprocess required because other
pytest tests in the same session may pre-load `iai_mcp.embed`, which
poisons in-process `sys.modules` checks.
- Test 4: in-process source-string scan of the modified function body
asserts the `--no-spawn` block contains zero `capture_transcript` /
`MemoryStore` import statements. Cheap structural lockdown so the inline
path can't be reintroduced without breaking a test (SPEC A1).
Test isolation:
- HOME=tmp_path so `Path.home()` resolves to a fresh dir; the user's
real ~/.iai-mcp/.deferred-captures/ is never touched.
- IAI_DAEMON_SOCKET_PATH=/tmp/iai-no-spawn-defer-<pid>-<n>/d.sock so the
reachable case binds a real listener and the unreachable case points to
a non-existent path.
- Subprocess invocation: `[sys.executable, '-m', 'iai_mcp.cli', ...]` with
PYTHONPATH set; we don't depend on the `iai-mcp` console script being on
PATH (matches the test_capture_transcript_no_spawn.py pattern).
"""
from __future__ import annotations
import json
import os
import platform
import re
import socket
import subprocess
import sys
from pathlib import Path
import pytest
REPO = Path(__file__).resolve().parent.parent
# POSIX-only: subprocess + AF_UNIX socket; matches the existing module's gate.
pytestmark = pytest.mark.skipif(
platform.system() == "Windows",
reason="POSIX subprocess + AF_UNIX",
)
# ---------------------------------------------------------------------------
# Shared helpers (kept local to keep this module standalone — the canonical
# pattern lives in test_capture_transcript_no_spawn.py but cross-importing
# would couple two unrelated test modules).
# ---------------------------------------------------------------------------
def _isolated_env(tmp_path: Path) -> tuple[dict[str, str], Path, Path]:
"""Build env that isolates HOME + socket path to tmp_path.
Returns (env_dict, deferred_dir, sock_path).
`sock_path` is created and `deferred_dir` is the on-disk location where
`write_deferred_captures` will land its JSONL when HOME is honored.
"""
sock_dir = Path(f"/tmp/iai-no-spawn-defer-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
iai_dir = tmp_path / ".iai-mcp"
iai_dir.mkdir(parents=True, exist_ok=True)
env = os.environ.copy()
env["HOME"] = str(tmp_path)
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
# Defense-in-depth: if the inline path is somehow exercised, force the
# fail-backend so we don't hang on the real keychain prompt.
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-no-spawn-defer-pass"
# Make the spawned python find iai_mcp without an editable install.
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
return env, iai_dir / ".deferred-captures", sock_path
def _make_transcript(tmp_path: Path) -> Path:
"""Write a 3-turn Claude Code-style JSONL transcript."""
turns = [
{"type": "user", "message": {"role": "user", "content": "hello phase 7 5"}},
{"type": "assistant", "message": {"role": "assistant", "content": "ack always defer"}},
{"type": "user", "message": {"role": "user", "content": "third defer turn"}},
]
transcript_path = tmp_path / "transcript.jsonl"
transcript_path.write_text("\n".join(json.dumps(t) for t in turns) + "\n")
return transcript_path
def _run_no_spawn(env: dict[str, str], transcript_path: Path) -> subprocess.CompletedProcess:
"""Invoke `iai-mcp capture-transcript --no-spawn <transcript>` via
`python -m iai_mcp.cli`. 5s wall-clock budget comfortably above the 2s
contract the implementation must meet.
"""
return subprocess.run(
[
sys.executable,
"-m",
"iai_mcp.cli",
"capture-transcript",
"--no-spawn",
"--session-id",
"test-phase75",
str(transcript_path),
],
env=env,
capture_output=True,
text=True,
timeout=5,
)
def _bind_listener(sock_path: Path) -> socket.socket:
"""Bind an AF_UNIX listener at `sock_path` so `_try_short_timeout_connect`
would return True if the OLD code path were reached. Caller must close
the returned socket and unlink the path; use try/finally."""
sock_path.parent.mkdir(parents=True, exist_ok=True)
if sock_path.exists():
sock_path.unlink()
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
s.bind(str(sock_path))
s.listen(1)
return s
# ---------------------------------------------------------------------------
# Test 1: reachable mock socket — must STILL defer (not inline-insert).
# This is the load-bearing acceptance: the OLD behaviour on this
# branch was inline ingest with embedder cold-load. NEW behaviour: defer.
# ---------------------------------------------------------------------------
def test_no_spawn_reachable_defers_not_inserts(tmp_path):
"""Phase 7.5 R1: even with the daemon socket reachable, --no-spawn
writes a deferred-captures JSONL and exits 0 with status="deferred"."""
env, deferred_dir, sock_path = _isolated_env(tmp_path)
transcript = _make_transcript(tmp_path)
listener = _bind_listener(sock_path)
try:
proc = _run_no_spawn(env, transcript)
finally:
listener.close()
try:
sock_path.unlink()
except FileNotFoundError:
pass
assert proc.returncode == 0, f"stderr={proc.stderr!r} stdout={proc.stdout!r}"
# Must be JSON-parsable AND have status="deferred" (NOT "inserted": N).
payload = json.loads(proc.stdout.strip())
assert payload.get("status") == "deferred", (
f"reachable case must defer under Phase 7.5; got {payload!r}"
)
assert "path" in payload, payload
assert "inserted" not in payload, (
f"inline-ingest path must not run under --no-spawn; got {payload!r}"
)
# Empirical proof the embedder did NOT cold-load: stderr is clean.
# `sentence_transformers` writes a tqdm progress bar containing
# `Loading weights` when bge-small-en-v1.5 first loads.
assert "Loading weights" not in proc.stderr, (
f"embedder cold-loaded on reachable --no-spawn path (Phase 7.5 broken):\n"
f"{proc.stderr}"
)
assert "sentence_transformers" not in proc.stderr, (
f"sentence_transformers touched on reachable --no-spawn path:\n"
f"{proc.stderr}"
)
# File-on-disk side-effect: deferred JSONL exists with v1 header.
files = sorted(deferred_dir.glob("*.jsonl"))
assert len(files) == 1, f"expected 1 deferred file, got {files}"
header = json.loads(files[0].read_text().splitlines()[0])
assert header["version"] == 1
assert header["session_id"] == "test-phase75"
# ---------------------------------------------------------------------------
# Test 2: unreachable socket — back-compat. Same output as Test 1.
# ---------------------------------------------------------------------------
def test_no_spawn_unreachable_still_defers(tmp_path):
"""Back-compat guard: --no-spawn with daemon UNREACHABLE behaves
identically to the reachable case (both defer). Locks down that the
new always-defer path doesn't regress existing behaviour."""
env, deferred_dir, sock_path = _isolated_env(tmp_path)
transcript = _make_transcript(tmp_path)
# No listener bound; sock_path does not exist on disk.
assert not sock_path.exists()
proc = _run_no_spawn(env, transcript)
assert proc.returncode == 0, f"stderr={proc.stderr!r} stdout={proc.stdout!r}"
payload = json.loads(proc.stdout.strip())
assert payload.get("status") == "deferred", payload
assert "inserted" not in payload, payload
# Same stderr cleanliness invariant.
assert "Loading weights" not in proc.stderr, proc.stderr
assert "sentence_transformers" not in proc.stderr, proc.stderr
files = sorted(deferred_dir.glob("*.jsonl"))
assert len(files) == 1, f"expected 1 deferred file, got {files}"
# ---------------------------------------------------------------------------
# Test 3: fresh subprocess introspects sys.modules to prove no embedder load.
# In-process is unreliable because pytest sessions pre-load iai_mcp.embed via
# other test modules (test_recall_cue_router, test_active_inference_gate,
# test_invariant_anchor_edges, test_schema_instance_of_edges).
# ---------------------------------------------------------------------------
def test_no_spawn_zero_embedder_imports_in_fresh_process(tmp_path):
"""Phase 7.5 R1 (import-isolation): in a brand-new Python interpreter,
invoking the `--no-spawn` CLI handler end-to-end leaves
`iai_mcp.embed` and `sentence_transformers` UNLOADED. Direct evidence
the heavy-import path is severed."""
env, deferred_dir, _sock_path = _isolated_env(tmp_path)
transcript = _make_transcript(tmp_path)
# Inline driver script: invoke main(), then dump the loaded module names
# we care about as a single-line JSON.
driver = (
"import sys, json\n"
"from iai_mcp.cli import main\n"
"rc = main([\n"
" 'capture-transcript', '--no-spawn',\n"
" '--session-id', 'test-phase75-fresh',\n"
f" {str(transcript)!r},\n"
"])\n"
"loaded = sorted(\n"
" k for k in sys.modules\n"
" if k == 'iai_mcp.embed' or k.startswith('iai_mcp.embed.')\n"
" or k == 'sentence_transformers' or k.startswith('sentence_transformers.')\n"
" or k == 'torch' or k.startswith('torch.')\n"
" or k == 'transformers' or k.startswith('transformers.')\n"
")\n"
"print('IAIMCP75_DUMP=' + json.dumps({'rc': rc, 'loaded': loaded}))\n"
)
proc = subprocess.run(
[sys.executable, "-c", driver],
env=env,
capture_output=True,
text=True,
timeout=10,
)
assert proc.returncode == 0, f"driver failed: stderr={proc.stderr!r}"
# Find the dump line; CLI may emit its own JSON to stdout first.
dump_lines = [ln for ln in proc.stdout.splitlines() if ln.startswith("IAIMCP75_DUMP=")]
assert len(dump_lines) == 1, f"expected 1 dump line, got {dump_lines!r}"
dump = json.loads(dump_lines[0][len("IAIMCP75_DUMP=") :])
assert dump["rc"] == 0, f"main() returned {dump['rc']}"
loaded = set(dump["loaded"])
# The load-bearing assertions: heavy embedder and ML deps NOT touched.
forbidden = {m for m in loaded if (
m == "iai_mcp.embed" or m.startswith("iai_mcp.embed.")
or m == "sentence_transformers" or m.startswith("sentence_transformers.")
)}
assert not forbidden, (
f"--no-spawn must not import embedder/ML deps; loaded: {sorted(forbidden)}"
)
# Side-effect: deferred file landed on disk in the fresh interpreter run.
assert any(deferred_dir.glob("*.jsonl"))
# ---------------------------------------------------------------------------
# Test 4: structural lockdown — the modified function body must not contain
# the reintroduced inline imports. Cheap, in-process, regression-proof
# (SPEC A1: "Verified by static grep on the modified function").
# ---------------------------------------------------------------------------
def test_no_spawn_branch_has_no_inline_imports():
"""Phase 7.5 A1 lockdown: the `if no_spawn:` block in
`cmd_capture_transcript` contains zero imports of
`iai_mcp.capture.capture_transcript` and `iai_mcp.store.MemoryStore`.
Prevents quiet reintroduction of the inline-embed path."""
cli_src = (REPO / "src" / "iai_mcp" / "cli.py").read_text()
# Locate the function body.
fn_match = re.search(
r"^def cmd_capture_transcript\(.*?\n(.*?)^def ",
cli_src,
flags=re.MULTILINE | re.DOTALL,
)
assert fn_match, "could not locate cmd_capture_transcript in cli.py"
fn_body = fn_match.group(1)
# Slice the `if no_spawn:` branch — everything between the `if no_spawn:`
# line and the next un-indented (or 4-space indented) `# Default path`
# marker. The default-mode path lives below that marker and IS allowed
# to import capture_transcript + MemoryStore.
no_spawn_match = re.search(
r"^ if no_spawn:\n(.*?)^ # Default path",
fn_body,
flags=re.MULTILINE | re.DOTALL,
)
assert no_spawn_match, (
"could not isolate `if no_spawn:` block; layout drifted from fix"
)
no_spawn_block = no_spawn_match.group(1)
# The branch must reference write_deferred_captures and nothing else
# heavy.
assert "write_deferred_captures" in no_spawn_block, (
"no_spawn branch must call write_deferred_captures"
)
# Forbidden inline-ingest imports.
assert "from iai_mcp.capture import capture_transcript" not in no_spawn_block, (
"Phase 7.5 regression: capture_transcript reintroduced into "
"--no-spawn branch (would trigger embedder cold-load on every "
"Stop-hook fire)"
)
assert "from iai_mcp.store import MemoryStore" not in no_spawn_block, (
"Phase 7.5 regression: MemoryStore reintroduced into --no-spawn "
"branch"
)
# Defensive: no probe call either — the SPEC removes it from this branch.
assert "_try_short_timeout_connect" not in no_spawn_block, (
"socket probe must be gone from --no-spawn branch (the "
"probe was the gate that selected the inline path)"
)

View file

@ -0,0 +1,180 @@
"""Phase 07.2-03 R2 / A2 regression test — cascade poll cooldown.
Mechanism: mock `iai_mcp.daemon.time.monotonic` (the daemon-side cooldown
clock) AND monkeypatch `HIPPEA_CASCADE_POLL_SEC` to 0.05s so the loop
body re-enters fast on the real event loop, while the cooldown is gated
by the mocked simulated-time clock. Drive the loop forward by advancing
the mock clock in 5-second simulated steps; assert the body ran at most
ceil(window/60)+1 = 6 times across the simulated 5-minute window.
Both monkeypatches are required for the test to have teeth:
- Without `HIPPEA_CASCADE_POLL_SEC=0.05`, the real-wall-time poll wait
(5s) limits real iterations to ~1 in a 1.2s test window `n==1`
passes the `n <= 6` assertion trivially without any cooldown.
- Without `time.monotonic` mocking, the cooldown gate sees real elapsed
wall time (~1s in test) and never gates anything (60s threshold).
Project async-test idiom (mandatory): sync `def test_*` + `asyncio.run`.
"""
from __future__ import annotations
import asyncio
from unittest.mock import patch
import pytest
@pytest.mark.skip(
reason=(
"Plan 07.2-03 documented fallback (Task 2 'Note on test pragmatism'): "
"patching `iai_mcp.daemon.time.monotonic` deadlocks asyncio's internal "
"scheduler — `BaseEventLoop.time()` reads `time.monotonic()` for every "
"deadline, so frozen clock => `await asyncio.wait_for(...)` never "
"expires. Plan explicitly pre-authorizes simplifying to "
"`test_cooldown_clears_after_min_interval_elapsed` only (which proves "
"the underlying elapsed-comparison gate logic without asyncio). The "
"plan also forbids swapping to pytest-asyncio. R2 acceptance is "
"carried by the unit test below + the gate code path's exclusive "
"dependence on `time.monotonic - _last_cascade_completed_at` "
"(mechanically equivalent under any clock that advances)."
)
)
def test_at_most_six_cascades_over_five_minute_window_with_continuous_pending(monkeypatch):
"""R2 acceptance: cooldown caps cascade rate to ≤ 6 in 5 min."""
asyncio.run(_at_most_six_cascades_body(monkeypatch))
async def _at_most_six_cascades_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
cascade_invocations: list[float] = []
sentinel_assignment = type("Asgmt", (), {"top_communities": [], "mid_regions": {}})()
# Mock clock that we control. Initial value 1000.0; test advances it.
clock = [1000.0]
def fake_monotonic():
return clock[0]
def counting_stub(store):
cascade_invocations.append(fake_monotonic())
return (None, sentinel_assignment, [])
async def fast_cascade_stub(store, assignment, **kwargs):
return {"communities_selected": 0, "records_warmed": 0}
# Persistent pending=true so cascade body is always ELIGIBLE — only the
# cooldown gate keeps the rate in check.
state_holder = {
"fsm_state": "WAKE",
"hippea_cascade_request": {"pending": True, "session_id": "test"},
}
def load_state_stub():
return dict(state_holder)
def save_state_stub(state):
# Re-arm pending=true after the cascade body clears it. This
# simulates 11 sessions all keeping pending=true high.
state_holder.update(state)
state_holder["hippea_cascade_request"] = {
"pending": True, "session_id": "test",
}
def write_event_stub(*args, **kwargs):
return None
# Reset module-level cooldown state.
monkeypatch.setattr(daemon_mod, "_last_cascade_completed_at", 0.0)
# Speed up the loop's real-wall-time poll cadence so the body re-enters
# fast. The cooldown gate (60s in MOCKED-clock space) is what we're
# testing — the real-wall poll just controls how often we get a chance
# to evaluate the gate.
monkeypatch.setattr(daemon_mod, "HIPPEA_CASCADE_POLL_SEC", 0.05)
shutdown = asyncio.Event()
# Patch ONLY `time.monotonic` on the daemon module's bound `time` ref;
# leave `time.sleep` etc. alone so the loop's `await asyncio.wait_for`
# works on real time.
with patch("iai_mcp.daemon.time.monotonic", fake_monotonic), \
patch("iai_mcp.retrieve.build_runtime_graph", counting_stub), \
patch("iai_mcp.hippea_cascade.run_cascade", fast_cascade_stub), \
patch("iai_mcp.daemon_state.load_state", load_state_stub), \
patch("iai_mcp.daemon_state.save_state", save_state_stub), \
patch("iai_mcp.daemon.write_event", write_event_stub):
cascade_task = asyncio.create_task(
daemon_mod._hippea_cascade_loop(store=None, shutdown=shutdown),
)
# Drive 300s of simulated time forward in 5s simulated steps.
# Real wall time elapsed ≈ steps * (asyncio.sleep yield). With
# POLL_SEC=0.05, the loop body has many opportunities to re-enter
# within each 0.02s real yield.
POLL_STEP = 5.0
WINDOW = 300.0
steps = int(WINDOW / POLL_STEP)
for _ in range(steps):
clock[0] += POLL_STEP
# Yield so the cascade task gets scheduled. Real-wall sleep is
# short; the loop's own `await asyncio.wait_for(..., 0.05)`
# plus this 0.02 yield gives the body multiple chances per step.
await asyncio.sleep(0.02)
shutdown.set()
try:
await asyncio.wait_for(cascade_task, timeout=2.0)
except asyncio.TimeoutError:
cascade_task.cancel()
try:
await cascade_task
except (asyncio.CancelledError, Exception):
pass
# Acceptance per A2: ≤ 6 cascades in 5-minute window.
# The bound is computed as ceil(WINDOW / MIN_INTERVAL) + 1 with
# MIN_INTERVAL=60 → ceil(300/60)+1 = 6.
n = len(cascade_invocations)
assert n <= 6, (
f"R2 FAIL: {n} cascade invocations in 5-min window with "
f"continuous pending=true. Expected ≤ 6 with 60s cooldown."
)
# Also assert at least 2 (loop did get to run AND cooldown
# actually let through more than one — without a cooldown bug
# this would still be at LEAST 2 because we advanced 300s of
# simulated time across at least 5 cooldown windows).
# If `n == 1` here, the test is degenerate (would pass for a
# broken cooldown that blocks ALL cascades). We require n >= 2
# to confirm the gate releases on time-advance.
assert n >= 2, (
f"R2 FAIL: only {n} cascade invocations across simulated "
f"5-min window. Expected ≥ 2 (cooldown should release after "
f"60 simulated seconds). Test fixture / mocks broken."
)
def test_cooldown_clears_after_min_interval_elapsed():
"""Direct unit test of the gate logic: after MIN_INTERVAL elapses,
a fresh cascade body invocation is allowed."""
asyncio.run(_cooldown_clears_after_min_interval_body())
async def _cooldown_clears_after_min_interval_body():
import iai_mcp.daemon as daemon_mod
# Set last-completed to "now"; assert next iteration is gated.
clock = [1000.0]
def fake_monotonic():
return clock[0]
with patch("iai_mcp.daemon.time.monotonic", fake_monotonic):
daemon_mod._last_cascade_completed_at = 1000.0
elapsed = fake_monotonic() - daemon_mod._last_cascade_completed_at
assert elapsed < daemon_mod.HIPPEA_CASCADE_MIN_INTERVAL_SEC
# Advance clock past MIN_INTERVAL.
clock[0] = 1000.0 + daemon_mod.HIPPEA_CASCADE_MIN_INTERVAL_SEC + 0.1
elapsed = fake_monotonic() - daemon_mod._last_cascade_completed_at
assert elapsed >= daemon_mod.HIPPEA_CASCADE_MIN_INTERVAL_SEC

View file

@ -0,0 +1,111 @@
"""Phase 07.2-03 R1 / A1 regression test — cascade body must not block the event loop.
Mechanism: stub `retrieve.build_runtime_graph` with a sync function that
`time.sleep(5.0)`. With Plan 03's `await asyncio.to_thread(...)` wrap,
the cascade-body sleep runs in a worker thread and a concurrent
`asyncio.sleep(0)` + small coroutine on the same event loop completes
in <100ms. Without the wrap, the event loop is pinned for 5s.
Project async-test idiom (mandatory): sync `def test_*` body wraps
`asyncio.run(_async_body())`. The project does NOT depend on
`pytest-asyncio`; `@pytest.mark.asyncio` markers silently pass without
running. See tests/test_daemon_tick_flags.py:144 for the canonical pattern.
"""
from __future__ import annotations
import asyncio
import time
from unittest.mock import patch
def test_concurrent_coroutine_completes_under_100ms_while_cascade_sleeps_5s(monkeypatch):
"""R1 acceptance: concurrent async work runs while cascade body is mid-sleep."""
asyncio.run(_concurrent_coroutine_completes_under_100ms_body(monkeypatch))
async def _concurrent_coroutine_completes_under_100ms_body(monkeypatch):
# Patch retrieve.build_runtime_graph at the module the cascade imports
# from (cascade does `from iai_mcp import retrieve`; so we patch
# `iai_mcp.retrieve.build_runtime_graph` — that's what the local-import
# name resolution lands on inside the function body).
sleep_duration = 5.0
sentinel_assignment = type("Asgmt", (), {"top_communities": [], "mid_regions": {}})()
def slow_blocking_stub(store):
time.sleep(sleep_duration)
# Return a 3-tuple matching real signature: (graph, assignment, rich_club).
return (None, sentinel_assignment, [])
# Stub run_cascade to instantly return — we only care about the heavy
# build_runtime_graph step blocking-or-not.
async def fast_cascade_stub(store, assignment, **kwargs):
return {"communities_selected": 0, "records_warmed": 0}
# Stub state I/O so the cascade body sees pending=true once.
state_holder = {
"fsm_state": "WAKE",
"hippea_cascade_request": {"pending": True, "session_id": "test"},
}
def load_state_stub():
return dict(state_holder)
def save_state_stub(state):
state_holder.clear()
state_holder.update(state)
# Stub write_event (called inside the cascade body via to_thread).
def write_event_stub(*args, **kwargs):
return None
# Build a shutdown event that we'll set after a moment to terminate the loop.
shutdown = asyncio.Event()
# Reset module-level cooldown state to 0.0 so first iteration runs body.
import iai_mcp.daemon as daemon_mod
monkeypatch.setattr(daemon_mod, "_last_cascade_completed_at", 0.0)
# Patch the names the cascade body resolves at call time.
with patch("iai_mcp.retrieve.build_runtime_graph", slow_blocking_stub), \
patch("iai_mcp.hippea_cascade.run_cascade", fast_cascade_stub), \
patch("iai_mcp.daemon_state.load_state", load_state_stub), \
patch("iai_mcp.daemon_state.save_state", save_state_stub), \
patch("iai_mcp.daemon.write_event", write_event_stub):
# Start the cascade loop as a background task.
cascade_task = asyncio.create_task(
daemon_mod._hippea_cascade_loop(store=None, shutdown=shutdown),
)
# Give the cascade a moment to enter the body and start sleeping.
# We need cascade to BE INSIDE the to_thread sleep when we measure.
await asyncio.sleep(0.2)
# Now race a small coroutine that should complete in <100ms if the
# event loop isn't blocked.
t_start = time.monotonic()
await asyncio.sleep(0.01) # 10ms — basic loop responsiveness probe
await asyncio.sleep(0.01)
elapsed = time.monotonic() - t_start
# Cleanup: shut down the cascade loop.
shutdown.set()
try:
await asyncio.wait_for(cascade_task, timeout=sleep_duration + 2.0)
except asyncio.TimeoutError:
cascade_task.cancel()
try:
await cascade_task
except (asyncio.CancelledError, Exception):
pass
# The two `asyncio.sleep(0.01)` calls + coroutine overhead should
# land WELL under 100ms if the wrap is in place. Without the wrap
# (bare `retrieve.build_runtime_graph(store)` call), this elapsed
# would be ≥ 5.0s.
assert elapsed < 0.1, (
f"R1 FAIL: event loop pinned for {elapsed:.3f}s while cascade body "
f"was running. Expected <100ms (wrap working). Did Plan 03's "
f"`await asyncio.to_thread(retrieve.build_runtime_graph, store)` "
f"land in src/iai_mcp/daemon.py::_hippea_cascade_loop?"
)

View file

@ -0,0 +1,221 @@
"""Plan 05-13 RED scaffold — cached centrality on graph nodes.
``build_runtime_graph`` must compute betweenness centrality ONCE and
attach it as the ``centrality`` NetworkX node attribute so the rank
stage can read it O(1) instead of recomputing ``graph.centrality()``
on every recall. The cache file must round-trip the per-node
centrality alongside the rest of the node payload so a cold-start
rebuild hits the cache and the pipeline-hot-path stays allocation-free.
Contracts:
C1 every graph node has a ``centrality`` float attribute after
``build_runtime_graph`` returns.
C2 runtime_graph_cache round-trips the ``centrality`` value per node
(save + try_load preserves the exact float).
C3 when a node is missing ``centrality`` (pre-05-13 graph / race),
recall_for_response falls back to inline computation without crashing.
C4 CACHE_VERSION bumped from "05-12-v1" to "05-13-v1"; legacy cache
files are invalidated cleanly.
"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from uuid import UUID, uuid4
import pytest
from iai_mcp import retrieve, runtime_graph_cache
from iai_mcp.store import MemoryStore
from iai_mcp.types import MemoryRecord
@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
import keyring as _keyring
fake: dict[tuple[str, str], str] = {}
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
monkeypatch.setattr(
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
)
monkeypatch.setattr(
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
)
yield fake
def _make_record(store: MemoryStore, text: str, seed: int) -> MemoryRecord:
import numpy as np
rng = np.random.default_rng(seed)
v = rng.standard_normal(store.embed_dim).astype(np.float32)
v /= float(np.linalg.norm(v)) or 1.0
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=text,
aaak_index="",
embedding=v.tolist(),
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=["t"],
language="en",
)
@pytest.fixture
def seeded_store(tmp_path: Path) -> MemoryStore:
store = MemoryStore(path=tmp_path / "lancedb")
store.root = tmp_path
# Seed enough records to produce a non-trivial graph so betweenness > 0
# on at least some nodes.
for i in range(15):
store.insert(_make_record(store, f"fact-{i}", i + 1))
# Create some edges so betweenness has something to measure.
records = list(store.all_records())
ids = [r.id for r in records]
pairs = [(ids[i], ids[i + 1]) for i in range(len(ids) - 1)]
pairs += [(ids[0], ids[5]), (ids[2], ids[10])]
store.boost_edges(pairs, delta=0.5)
return store
# --------------------------------------------------------------- C1
def test_C1_every_node_has_centrality_attr(seeded_store):
"""After build_runtime_graph, every node carries a 'centrality' float attr."""
graph, _a, _rc = retrieve.build_runtime_graph(seeded_store)
assert len(graph._nx.nodes) > 0
for nid in graph._nx.nodes:
node = graph._nx.nodes[nid]
assert "centrality" in node, f"node {nid} missing centrality attr"
assert isinstance(node["centrality"], float), (
f"centrality on {nid} must be float, got {type(node['centrality'])}"
)
# --------------------------------------------------------------- C2
def test_C2_cache_round_trips_centrality(seeded_store):
"""save + try_load preserves per-node centrality exactly."""
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
# Snapshot centrality from the live graph.
live_cent = {
nid: float(graph._nx.nodes[nid]["centrality"])
for nid in graph._nx.nodes
}
# Force a fresh save by invalidating then re-running build.
runtime_graph_cache.invalidate(seeded_store)
graph2, _a2, _rc2 = retrieve.build_runtime_graph(seeded_store)
# Now cache should be populated. try_load should give us node_payload
# with centrality baked in.
cached = runtime_graph_cache.try_load(seeded_store)
assert cached is not None, "cache should be populated after build"
# try_load returns 4-tuple (max_degree appended).
_assignment, _rich_club, node_payload, _max_degree = cached
assert node_payload is not None and len(node_payload) > 0
for nid, live in live_cent.items():
payload = node_payload.get(nid)
assert payload is not None, f"missing payload for {nid}"
assert "centrality" in payload, f"payload {nid} missing centrality"
# Exact-float equality — JSON round-trip preserves float64.
assert abs(payload["centrality"] - live) < 1e-9, (
f"centrality drift on {nid}: cache={payload['centrality']} "
f"live={live}"
)
# --------------------------------------------------------------- C3
def test_C3_missing_centrality_fallback_inline(seeded_store):
"""Graph with missing 'centrality' on nodes must not crash rank stage."""
from iai_mcp import pipeline
class _E:
DIM = seeded_store.embed_dim
DEFAULT_DIM = seeded_store.embed_dim
DEFAULT_MODEL_KEY = "t"
def embed(self, t):
import numpy as np
import hashlib
rng = np.random.default_rng(
int(hashlib.sha256(t.encode()).hexdigest()[:16], 16)
)
v = rng.standard_normal(self.DIM).astype(np.float32)
v /= float(np.linalg.norm(v)) or 1.0
return v.tolist()
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
# Strip centrality from all nodes — simulates a pre-05-13 graph shape
# or a race in _graph_sync_hook.
for nid in list(graph._nx.nodes):
graph._nx.nodes[nid].pop("centrality", None)
resp = pipeline.recall_for_response(
store=seeded_store, graph=graph, assignment=assignment,
rich_club=rich_club, embedder=_E(), cue="fact-3",
session_id="t-C3", budget_tokens=4000,
)
# No crash; still returns hits.
assert resp is not None
assert isinstance(resp.hits, list)
# --------------------------------------------------------------- C4
def test_C4_cache_version_bumped_to_05_13_v1():
"""CACHE_VERSION moved forward over the cache-shape evolution (05-12-v1
-> 05-13-v1 -> 06-02-v1 -> 07-09-v3, with W3 / wrapping
the file in AES-256-GCM). Legacy files invalidate cleanly on version
mismatch (and the legacy plaintext-shape "06-02-v1" lazy-migrates to
the encrypted shape on first warm-start under 07.9).
"""
assert runtime_graph_cache.CACHE_VERSION == "07-09-v3"
def test_C4_legacy_cache_invalidated(seeded_store, tmp_path: Path):
"""A cache file written with CACHE_VERSION=05-12-v1 must NOT load.
W3: the on-disk format is now AES-256-GCM-wrapped. Decrypt
the file, mutate cache_version, re-encrypt, then assert try_load
rejects the stale version cleanly.
"""
from iai_mcp.crypto import decrypt_field, encrypt_field
# First build the graph so we know the path.
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
cache_path = tmp_path / "runtime_graph_cache.json"
assert cache_path.exists(), "cache not created by build_runtime_graph"
# Decrypt → mutate version → re-encrypt round-trip.
key = runtime_graph_cache._cache_encryption_key(seeded_store)
raw_text = cache_path.read_text(encoding="utf-8")
plaintext = decrypt_field(raw_text, key, runtime_graph_cache._CACHE_AAD)
raw = json.loads(plaintext)
raw["cache_version"] = "05-12-v1"
new_ct = encrypt_field(json.dumps(raw), key, runtime_graph_cache._CACHE_AAD)
cache_path.write_text(new_ct, encoding="ascii")
# try_load must reject it (legacy version stamp).
assert runtime_graph_cache.try_load(seeded_store) is None

165
tests/test_cli_audit.py Normal file
View file

@ -0,0 +1,165 @@
"""Tests for iai-mcp audit CLI (OPS-07 Plan 02-05).
`iai-mcp audit [--since WEEKS] [--severity SEV]` renders an identity-event
audit log, TZ-aware timestamps, and REDACTED shield match counts (D-30
threat T-02-05-02: leaking matched patterns in CLI output would hand the
attacker a dictionary of what the shield is watching for).
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
import pytest
from iai_mcp.cli import main as cli_main
from iai_mcp.events import write_event
from iai_mcp.store import MemoryStore
def test_cli_audit_empty(tmp_path, capsys, monkeypatch):
"""No identity events -> 'No identity events recorded' message, exit 0."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
code = cli_main(["audit"])
assert code == 0
out = capsys.readouterr().out
assert (
"no identity events" in out.lower()
or "no events" in out.lower()
)
def test_cli_audit_renders_events(tmp_path, capsys, monkeypatch):
"""Pre-populated events render with kind + ts (in user TZ) + severity."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
write_event(
store, kind="s5_invariant_update",
data={"anchor_id": "abc", "new_record_id": "def"},
severity="info", session_id="s1",
)
code = cli_main(["audit"])
assert code == 0
out = capsys.readouterr().out
# Kind appears.
assert "s5_invariant_update" in out
# Severity visible.
assert "info" in out
def test_cli_audit_since_weeks(tmp_path, capsys, monkeypatch):
"""`audit --since=2` filters to 2-week window without crashing."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
write_event(
store, kind="s5_invariant_update",
data={"anchor_id": "abc"},
severity="info", session_id="s1",
)
code = cli_main(["audit", "--since=2"])
assert code == 0
def test_cli_audit_severity_filter_warning_only(tmp_path, capsys, monkeypatch):
"""`--severity=warning` filters out info-severity events."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
write_event(
store, kind="s5_invariant_update",
data={"anchor_id": "abc"},
severity="info", session_id="s1",
)
write_event(
store, kind="s5_drift_alert",
data={"first_value": 0.1, "last_value": 0.5},
severity="warning", session_id="s2",
)
code = cli_main(["audit", "--severity=warning"])
assert code == 0
out = capsys.readouterr().out
# Warning event mentioned; info event NOT.
assert "s5_drift_alert" in out
assert "s5_invariant_update" not in out
def test_cli_audit_shows_shield_rejections_redacted(tmp_path, capsys, monkeypatch):
"""shield_rejection events appear but matched patterns are redacted to
count only (not the literal words)."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
write_event(
store, kind="shield_rejection",
data={
"tier": "hard_block",
"matched": ["forget", "you are now", "override"],
"record_id": "aabbcc",
"action": "reject",
},
severity="critical", session_id="s1",
)
code = cli_main(["audit"])
assert code == 0
out = capsys.readouterr().out
# kind visible.
assert "shield_rejection" in out
# matched COUNT visible (3 patterns).
assert "3" in out or "matched_count=3" in out.replace(" ", "")
# Literal signal words MUST NOT appear (redaction).
assert "forget" not in out
assert "you are now" not in out
# ---------------------------------------------------------------- subcommands
def test_cli_audit_shield_subcommand(tmp_path, capsys, monkeypatch):
"""`iai-mcp audit shield --since=7` returns shield events."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
write_event(
store, kind="shield_rejection",
data={"tier": "hard_block", "matched": ["forget"], "action": "reject"},
severity="critical", session_id="s1",
)
# Exercise the subcommand; no crash is the contract.
code = cli_main(["audit", "shield", "--since=7"])
assert code == 0
def test_cli_audit_drift_subcommand(tmp_path, capsys, monkeypatch):
"""`iai-mcp audit drift` runs detection + surfaces present alert."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
# Seed monotonically increasing M4 variance to trigger drift.
for i, v in enumerate([0.1, 0.2, 0.3, 0.4, 0.5]):
write_event(
store, kind="trajectory_metric",
data={"metric": "m4", "value": v},
severity="info", session_id=f"s{i}",
)
code = cli_main(["audit", "drift"])
assert code == 0
out = capsys.readouterr().out
# Drift detected and surfaced.
assert "drift" in out.lower()
def test_cli_audit_identity_subcommand(tmp_path, capsys, monkeypatch):
"""`iai-mcp audit identity` shows only s5_* events."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
write_event(
store, kind="s5_invariant_update",
data={"anchor_id": "abc"},
severity="info", session_id="s1",
)
write_event(
store, kind="shield_rejection",
data={"tier": "hard_block", "matched": ["forget"], "action": "reject"},
severity="critical", session_id="s2",
)
code = cli_main(["audit", "identity"])
assert code == 0
out = capsys.readouterr().out
# s5 event present; shield_rejection filtered out.
assert "s5_invariant_update" in out
assert "shield_rejection" not in out

383
tests/test_cli_crypto.py Normal file
View file

@ -0,0 +1,383 @@
"""iai-mcp crypto + iai-mcp migrate --from=2 --to=3 CLI tests.
Originally Plan 02-08; updated in W1 to retire the keyring
backend in favor of a file-backed primary backend at
`{IAI_MCP_STORE}/.crypto.key` (32 raw bytes, mode 0o600). The
`_isolated_keyring` autouse fixture is gone CLI tests now monkeypatch
IAI_MCP_STORE to a tmp_path and pre-create / inspect the file directly.
Commands under test:
- `iai-mcp crypto status` -> JSON-ish status of file backend + user_id
- `iai-mcp crypto rotate` -> rotate key + re-encrypt all records
- `iai-mcp migrate --from=2 --to=3 [--dry-run]` -> encryption migration
"""
from __future__ import annotations
import json
import os
import secrets
import stat
from datetime import datetime, timezone
from uuid import uuid4
import pytest
def test_cli_crypto_status_shows_file_backend(tmp_path, monkeypatch, capsys):
"""Phase 07.10 W1 RED — `iai-mcp crypto status` reports the file backend.
Pre-creates a 32-byte 0o600 `.crypto.key` in the store root, calls the
status command, asserts:
- exit code 0
- output mentions backend=file
- output includes the file path (or at least its filename)
- output exposes mode 0o600
- NO mention of "keyring" (the backend is gone in W2)
RED until W2: cmd_crypto_status still emits keyring fields + has no
`backend: file` shape.
"""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(secrets.token_bytes(32))
os.chmod(key_path, 0o600)
from iai_mcp.cli import cmd_crypto_status
args = argparse.Namespace(user_id="default")
exit_code = cmd_crypto_status(args)
out = capsys.readouterr().out
out_lower = out.lower()
assert exit_code == 0
assert "default" in out
# New file-backend output contract:
assert "file" in out_lower, f"status must report backend=file; got:\n{out}"
assert ".crypto.key" in out, f"status must include the file path; got:\n{out}"
assert "600" in out, f"status must expose mode 0o600; got:\n{out}"
# The keyring shape is gone in W2:
assert "keyring" not in out_lower, (
f"status must NOT mention keyring (backend retired in 07.10); got:\n{out}"
)
def test_cli_crypto_rotate_regenerates_key(tmp_path, monkeypatch, capsys):
"""Phase 07.10 W1 RED — `iai-mcp crypto rotate` writes a fresh key to the
file backend AND re-encrypts records under the new key.
Pre-creates a `.crypto.key` (key A) at 0o600, seeds a record encrypted
under key A, calls rotate, asserts:
- the file now contains different 32 bytes at mode 0o600
- the seeded record's ciphertext was re-encrypted (different blob,
still iai:enc:v1: prefixed, decrypts to the original plaintext
through the rotated wrapper)
RED until W2/W3 ship the file-backend + cache-invalidate fix.
"""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
# Seed key A in the file backend.
key_path = tmp_path / ".crypto.key"
key_a = secrets.token_bytes(32)
key_path.write_bytes(key_a)
os.chmod(key_path, 0o600)
from iai_mcp.cli import cmd_crypto_rotate
from iai_mcp.store import MemoryStore, RECORDS_TABLE
from iai_mcp.types import EMBED_DIM, MemoryRecord
# Seed a record under the initial key.
store = MemoryStore()
rec = MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface="rotation test content",
aaak_index="",
embedding=[0.1] * EMBED_DIM,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
tags=[],
language="en",
)
store.insert(rec)
initial_ct = store.db.open_table(RECORDS_TABLE).to_pandas()[
lambda df: df["id"] == str(rec.id)
].iloc[0]["literal_surface"]
assert initial_ct.startswith("iai:enc:v1:")
args = argparse.Namespace(user_id="default")
exit_code = cmd_crypto_rotate(args)
out = capsys.readouterr().out
assert exit_code == 0
assert "rotat" in out.lower()
# File backend invariant: the key file now holds different 32 bytes
# at mode 0o600.
new_key_bytes = key_path.read_bytes()
assert len(new_key_bytes) == 32
assert new_key_bytes != key_a, "rotate must write a fresh key to the file"
mode = stat.S_IMODE(os.stat(key_path).st_mode)
assert mode == 0o600, f"rotated key file must be 0o600, got 0o{mode:03o}"
# Data invariant: the seeded record was re-encrypted under the new key.
# store2 picks up the rotated key from the file backend; the AESGCM
# wrapper cache is freshly built from the new key.
store2 = MemoryStore()
post_ct = store2.db.open_table(RECORDS_TABLE).to_pandas()[
lambda df: df["id"] == str(rec.id)
].iloc[0]["literal_surface"]
assert post_ct.startswith("iai:enc:v1:")
assert post_ct != initial_ct # Re-encrypted under a new key.
# Content round-trip still works through the rotated key.
got = store2.get(rec.id)
assert got is not None
assert got.literal_surface == "rotation test content"
def test_cli_migrate_to_3_dry_run_counts_plaintext_rows(tmp_path, monkeypatch, capsys):
"""iai-mcp migrate --from=2 --to=3 --dry-run prints a plaintext-row count."""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.cli import cmd_migrate
from iai_mcp.store import MemoryStore, RECORDS_TABLE
from iai_mcp.types import EMBED_DIM, MemoryRecord
store = MemoryStore()
# Forcibly add a PLAINTEXT row directly to the table (bypass insert()'s encryption).
rid = uuid4()
row = {
"id": str(rid),
"tier": "episodic",
"literal_surface": "plain legacy",
"aaak_index": "",
"embedding": [0.1] * EMBED_DIM,
"structure_hv": b"",
"community_id": "",
"centrality": 0.0,
"detail_level": 2,
"pinned": False,
"stability": 0.0,
"difficulty": 0.0,
"last_reviewed": None,
"never_decay": False,
"never_merge": False,
"provenance_json": json.dumps([{"ts": "x", "cue": "y", "session_id": "z"}]),
"created_at": datetime.now(timezone.utc),
"updated_at": datetime.now(timezone.utc),
"tags_json": json.dumps([]),
"language": "en",
"s5_trust_score": 0.5,
"profile_modulation_gain_json": json.dumps({}),
"schema_version": 2,
}
store.db.open_table(RECORDS_TABLE).add([row])
args = argparse.Namespace(from_=2, to=3, dry_run=True, verbose=False)
exit_code = cmd_migrate(args)
out = capsys.readouterr().out
assert exit_code == 0
# Output mentions a record count + the word migrate/would.
assert "would" in out.lower() or "dry" in out.lower() or "migrat" in out.lower()
assert "1" in out # We planted exactly one plaintext row.
def test_cli_migrate_to_3_encrypts_plaintext_rows(tmp_path, monkeypatch, capsys):
"""`iai-mcp migrate --from=2 --to=3` actually encrypts plaintext rows."""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.cli import cmd_migrate
from iai_mcp.store import MemoryStore, RECORDS_TABLE
from iai_mcp.types import EMBED_DIM
store = MemoryStore()
rid = uuid4()
row = {
"id": str(rid),
"tier": "episodic",
"literal_surface": "still-plaintext",
"aaak_index": "",
"embedding": [0.1] * EMBED_DIM,
"structure_hv": b"",
"community_id": "",
"centrality": 0.0,
"detail_level": 2,
"pinned": False,
"stability": 0.0,
"difficulty": 0.0,
"last_reviewed": None,
"never_decay": False,
"never_merge": False,
"provenance_json": json.dumps([]),
"created_at": datetime.now(timezone.utc),
"updated_at": datetime.now(timezone.utc),
"tags_json": json.dumps([]),
"language": "en",
"s5_trust_score": 0.5,
"profile_modulation_gain_json": json.dumps({}),
"schema_version": 2,
}
store.db.open_table(RECORDS_TABLE).add([row])
args = argparse.Namespace(from_=2, to=3, dry_run=False, verbose=False)
exit_code = cmd_migrate(args)
assert exit_code == 0
df = store.db.open_table(RECORDS_TABLE).to_pandas()
post = df[df["id"] == str(rid)].iloc[0]
assert post["literal_surface"].startswith("iai:enc:v1:")
def test_cli_migrate_to_3_rejects_unsupported_version_pair(
tmp_path, monkeypatch, capsys
):
"""--from=9 --to=42 is rejected with a clear error + non-zero exit."""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.cli import cmd_migrate
args = argparse.Namespace(from_=9, to=42, dry_run=False, verbose=False)
exit_code = cmd_migrate(args)
err = capsys.readouterr().err.lower()
out = capsys.readouterr().out.lower()
assert exit_code != 0
# Some guidance in stderr or stdout.
assert ("unsupported" in err or "invalid" in err or
"unsupported" in out or "invalid" in out)
def test_neural_map_bench_passes_after_encryption(tmp_path):
"""bench/neural_map N=100 must still pass <100ms p95 post-encryption."""
from bench.neural_map import run_neural_map_bench, D_SPEED_P95_MS
out = run_neural_map_bench(n=100, iterations=10, store_path=tmp_path, seed=0)
assert out["n"] == 100
assert out["iterations"] == 10
assert out["passed"] is True, (
f"D-SPEED regression post-encryption: p95={out['latency_ms_p95']} ms "
f">= {D_SPEED_P95_MS} ms"
)
def test_cli_crypto_init_creates_fresh_file(tmp_path, monkeypatch, capsys):
"""Phase 07.10 `iai-mcp crypto init` creates a fresh 32-byte 0o600 file.
No file pre-existing; no keyring needed; resulting file must be exactly
32 bytes at mode 0o600, exit 0, output cites the path. The key bytes
themselves MUST NOT appear in stdout.
"""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
key_path = tmp_path / ".crypto.key"
assert not key_path.exists()
from iai_mcp.cli import cmd_crypto_init
args = argparse.Namespace(user_id="default")
exit_code = cmd_crypto_init(args)
out = capsys.readouterr().out
assert exit_code == 0
assert key_path.exists()
assert key_path.stat().st_size == 32
mode = stat.S_IMODE(os.stat(key_path).st_mode)
assert mode == 0o600, f"init key file must be 0o600, got 0o{mode:03o}"
# Output cites the path so the user knows where the key lives.
assert ".crypto.key" in out
# The 32 raw key bytes MUST NOT appear in the output (D-09 — no key disclosure).
raw = key_path.read_bytes()
# Stdout is decoded; a binary blob would not round-trip cleanly. Sanity:
# check that no run of >=4 raw bytes appears in stdout.
for i in range(0, 32, 4):
chunk = raw[i:i + 4]
# Skip null-padded windows that could trivially collide with text.
if chunk == b"\x00\x00\x00\x00":
continue
assert chunk.decode("latin-1") not in out, (
"init must not print key bytes to stdout"
)
def test_cli_crypto_init_refuses_when_file_exists(tmp_path, monkeypatch, capsys):
"""Phase 07.10 `iai-mcp crypto init` refuses if `.crypto.key` exists.
Pre-create any-content file at the canonical path; `init` must exit 1
with an error pointing at the path. File contents must be unchanged.
"""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
key_path = tmp_path / ".crypto.key"
pre = secrets.token_bytes(32)
key_path.write_bytes(pre)
os.chmod(key_path, 0o600)
from iai_mcp.cli import cmd_crypto_init
args = argparse.Namespace(user_id="default")
exit_code = cmd_crypto_init(args)
err = capsys.readouterr().err
assert exit_code == 1
assert ".crypto.key" in err
# File contents unchanged.
assert key_path.read_bytes() == pre
def test_cli_crypto_rotate_invalidates_aesgcm_cache(tmp_path, monkeypatch):
"""Phase 07.10 / T-07.10-08 — `cmd_crypto_rotate` MUST invalidate the
cached AESGCM after writing the fresh key.
The rotate test above (`test_cli_crypto_rotate_regenerates_key`) reads
post-rotate state via a fresh `MemoryStore()` which sidesteps the cache
entirely; removing the hook would not break it. This test pins the hook
directly via `unittest.mock.patch.object` so a future refactor that drops
the `store._invalidate_aesgcm_cache()` line is caught immediately.
"""
import argparse
from unittest.mock import patch
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
# Seed a key file so the rotate path proceeds normally.
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(secrets.token_bytes(32))
os.chmod(key_path, 0o600)
from iai_mcp.cli import cmd_crypto_rotate
from iai_mcp.store import MemoryStore
args = argparse.Namespace(user_id="default")
with patch.object(
MemoryStore, "_invalidate_aesgcm_cache", autospec=True
) as m:
exit_code = cmd_crypto_rotate(args)
assert exit_code == 0
assert m.called, (
"cmd_crypto_rotate must call store._invalidate_aesgcm_cache() "
"after assigning the new key (Phase 07.10 D-10, T-07.10-08)"
)

View file

@ -0,0 +1,114 @@
"""CLI + migrate_redact_undecryptable_records tests."""
from __future__ import annotations
import json
import os
import secrets
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
from uuid import uuid4
import pytest
from iai_mcp.migrate import migrate_redact_undecryptable_records
from iai_mcp.store import MemoryStore
from iai_mcp.types import MemoryRecord, SCHEMA_VERSION_CURRENT
def _minimal_record(literal: str) -> MemoryRecord:
rid = uuid4()
now = datetime.now(timezone.utc)
return MemoryRecord(
id=rid,
tier="episodic",
literal_surface=literal,
aaak_index="",
embedding=[0.02] * 384,
structure_hv=b"\x00" * 1250,
community_id=None,
centrality=0.0,
detail_level=1,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=["t1"],
language="en",
s5_trust_score=0.5,
profile_modulation_gain={},
schema_version=SCHEMA_VERSION_CURRENT,
)
def test_redact_makes_literal_decryptable_and_idempotent(tmp_path: Path) -> None:
root = tmp_path / "redact-store"
root.mkdir()
key_a = secrets.token_bytes(32)
key_b = secrets.token_bytes(32)
kpath = root / ".crypto.key"
kpath.write_bytes(key_a)
os.chmod(kpath, 0o600)
store_a = MemoryStore(path=root, user_id="default")
rec = _minimal_record("secret-surface")
store_a.insert(rec)
rid = rec.id
del store_a
kpath.write_bytes(key_b)
os.chmod(kpath, 0o600)
store_b = MemoryStore(path=root, user_id="default")
out = migrate_redact_undecryptable_records(store_b)
assert out["redacted"] == 1
assert out["skipped_plain"] == 0
got = store_b.get(rid)
assert got is not None
assert got.literal_surface.startswith("<REDACTED:")
out2 = migrate_redact_undecryptable_records(store_b)
assert out2["redacted"] == 0
assert out2["skipped_ok"] >= 1
def test_cli_crypto_redact_undecryptable_smoke(tmp_path: Path) -> None:
root = tmp_path / "cli-redact"
root.mkdir()
key_a = secrets.token_bytes(32)
key_b = secrets.token_bytes(32)
kpath = root / ".crypto.key"
kpath.write_bytes(key_a)
os.chmod(kpath, 0o600)
store_a = MemoryStore(path=root, user_id="default")
store_a.insert(_minimal_record("cli-redact-body"))
del store_a
kpath.write_bytes(key_b)
os.chmod(kpath, 0o600)
env = {**os.environ, "IAI_MCP_STORE": str(root.resolve())}
proc = subprocess.run(
[
sys.executable,
"-m",
"iai_mcp.cli",
"crypto",
"redact-undecryptable",
"--user-id",
"default",
],
capture_output=True,
text=True,
cwd=str(Path(__file__).resolve().parents[1]),
env=env,
check=False,
)
assert proc.returncode == 0, proc.stderr + proc.stdout
payload = json.loads(proc.stdout.strip())
assert payload.get("redacted") == 1

750
tests/test_cli_daemon.py Normal file
View file

@ -0,0 +1,750 @@
"""Plan 04-05 -- iai-mcp daemon subcommand group tests (DAEMON-10 + DAEMON-12).
Verifies dispatcher wiring, install/uninstall flow with consent banner,
launchd / systemd template rendering with sys.executable substitution
(Pitfall 5), version skew detection in `daemon status`, and C4 clean uninstall
(removes plist/unit + all 3 state files).
All subprocess calls (launchctl, systemctl, loginctl, tail, journalctl) are
monkeypatched so the suite never touches the host's actual launchd/systemd.
Socket-talking subcommands (status / force-rem / pause / logs) are exercised
against the `_ThreadedFakeDaemon` helper (lifted from
tests/test_core_bedtime_inject.py pattern -- a fake daemon that survives
multiple asyncio.run() teardowns by running on a dedicated background loop).
"""
from __future__ import annotations
import asyncio
import io
import json
import os
import platform
import sys
import tempfile
import threading
from contextlib import redirect_stdout, redirect_stderr
from pathlib import Path
from unittest.mock import patch
import pytest
from iai_mcp import cli as cli_mod
# ---------------------------------------------------------------------------
# Threaded fake daemon (survives multiple asyncio.run teardowns)
# ---------------------------------------------------------------------------
class _ThreadedFakeDaemon:
"""Fake daemon NDJSON server on a background loop.
Each request line is captured. Each request gets `reply` written back
(or a per-request reply via `reply_fn(req)` if provided).
"""
def __init__(
self,
path: Path,
captured: list,
reply: dict | None = None,
reply_fn=None,
) -> None:
self.path = path
self.captured = captured
self.reply = reply
self.reply_fn = reply_fn
self._loop: asyncio.AbstractEventLoop | None = None
self._server: asyncio.AbstractServer | None = None
self._thread: threading.Thread | None = None
self._ready = threading.Event()
def start(self) -> None:
def _run() -> None:
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(self._loop)
async def _handle(reader, writer):
try:
line = await reader.readline()
if line:
req = json.loads(line.decode("utf-8"))
self.captured.append(req)
if self.reply_fn is not None:
resp = self.reply_fn(req)
else:
resp = self.reply or {}
writer.write((json.dumps(resp) + "\n").encode("utf-8"))
await writer.drain()
finally:
try:
writer.close()
await writer.wait_closed()
except Exception:
pass
async def _serve():
self.path.parent.mkdir(parents=True, exist_ok=True)
self._server = await asyncio.start_unix_server(
_handle, path=str(self.path),
)
self._ready.set()
async with self._server:
await self._server.serve_forever()
try:
self._loop.run_until_complete(_serve())
except asyncio.CancelledError:
pass
finally:
self._loop.close()
self._thread = threading.Thread(target=_run, daemon=True)
self._thread.start()
assert self._ready.wait(timeout=5.0), "fake daemon failed to start"
def stop(self) -> None:
loop = self._loop
if loop is None:
return
async def _shutdown():
if self._server is not None:
self._server.close()
await self._server.wait_closed()
try:
asyncio.run_coroutine_threadsafe(_shutdown(), loop).result(timeout=5.0)
except Exception:
pass
loop.call_soon_threadsafe(loop.stop)
if self._thread is not None:
self._thread.join(timeout=5.0)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def short_socket(tmp_path: Path) -> Path:
"""Short unix-socket path (macOS ~104-byte limit)."""
candidate = tmp_path / "d.sock"
if len(str(candidate)) > 100:
candidate = Path(tempfile.mkdtemp(prefix="iai-clitest-")) / "d.sock"
return candidate
@pytest.fixture
def fake_state_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Redirect ~/.iai-mcp + ~/Library/LaunchAgents + ~/.config/systemd/user
to tmp_path-rooted equivalents, so install/uninstall never touches the
real host filesystem."""
fake_home = tmp_path / "home"
fake_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setattr(Path, "home", classmethod(lambda cls: fake_home))
# Re-resolve the constants after Path.home() is patched.
monkeypatch.setattr(
cli_mod, "LOCK_PATH", fake_home / ".iai-mcp" / ".lock",
)
monkeypatch.setattr(
cli_mod, "SOCKET_PATH", fake_home / ".iai-mcp" / ".daemon.sock",
)
monkeypatch.setattr(
cli_mod, "STATE_PATH", fake_home / ".iai-mcp" / ".daemon-state.json",
)
monkeypatch.setattr(
cli_mod,
"LAUNCHD_TARGET",
fake_home / "Library" / "LaunchAgents" / "com.iai-mcp.daemon.plist",
)
monkeypatch.setattr(
cli_mod,
"SYSTEMD_TARGET",
fake_home / ".config" / "systemd" / "user" / "iai-mcp-daemon.service",
)
return fake_home
# ---------------------------------------------------------------------------
# Test 1: dry-run does NOT write any file
# ---------------------------------------------------------------------------
def test_install_dry_run_writes_no_file(
fake_state_dir: Path,
capsys: pytest.CaptureFixture,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
rc = cli_mod.main(["daemon", "install", "--dry-run", "--yes"])
assert rc == 0
assert not cli_mod.LAUNCHD_TARGET.exists()
out = capsys.readouterr().out
assert "Would install to" in out
# sys.executable is substituted in dry-run output
assert sys.executable in out
# ---------------------------------------------------------------------------
# Test 2: install on macOS writes plist with sys.executable + invokes launchctl
# ---------------------------------------------------------------------------
def test_install_macos_writes_plist_with_sys_executable(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
calls: list[list[str]] = []
def _fake_run(argv, **kwargs):
calls.append(list(argv))
class _R:
returncode = 0
stdout = ""
stderr = ""
return _R()
monkeypatch.setattr(cli_mod.subprocess, "run", _fake_run)
rc = cli_mod.main(["daemon", "install", "--yes"])
assert rc == 0
assert cli_mod.LAUNCHD_TARGET.exists()
contents = cli_mod.LAUNCHD_TARGET.read_text()
# Pitfall 5: absolute sys.executable substituted into plist
assert sys.executable in contents
# USERNAME placeholder substituted (not present literally)
assert "{USERNAME}" not in contents
# launchctl bootstrap + kickstart called
assert any("bootstrap" in " ".join(c) for c in calls), calls
assert any("kickstart" in " ".join(c) for c in calls), calls
# ---------------------------------------------------------------------------
# Test 3: install on Linux writes systemd unit + invokes systemctl + loginctl
# ---------------------------------------------------------------------------
def test_install_linux_writes_unit_and_invokes_systemctl(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Linux")
monkeypatch.setenv("USER", "testuser")
calls: list[list[str]] = []
def _fake_run(argv, **kwargs):
calls.append(list(argv))
class _R:
returncode = 0
# Simulate Linger=no on the first show-user, then Linger=yes after enable
_show_count = [0]
stdout = (
"Linger=no" if argv[:2] == ["loginctl", "show-user"]
else ""
)
stderr = ""
return _R()
monkeypatch.setattr(cli_mod.subprocess, "run", _fake_run)
rc = cli_mod.main(["daemon", "install", "--yes"])
assert rc == 0
assert cli_mod.SYSTEMD_TARGET.exists()
contents = cli_mod.SYSTEMD_TARGET.read_text()
assert sys.executable in contents
# loginctl invoked at least twice (show + enable + re-verify)
loginctl_calls = [c for c in calls if c and c[0] == "loginctl"]
assert len(loginctl_calls) >= 2, loginctl_calls
# systemctl --user daemon-reload AND enable --now invoked
cmd_strs = [" ".join(c) for c in calls]
assert any("systemctl --user daemon-reload" in s for s in cmd_strs), cmd_strs
assert any("systemctl --user enable --now iai-mcp-daemon.service" in s for s in cmd_strs), cmd_strs
# ---------------------------------------------------------------------------
# Test 4: consent banner blocks on stdin; non-`y` responses abort
# ---------------------------------------------------------------------------
def test_install_without_yes_prompts_consent_banner_aborts(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
# Don't actually call subprocess
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})(),
)
# Strict gate: ONLY exact lowercase "y" (after .strip()) proceeds.
# Everything else -- empty, "n", "N", "yes", "no", "true", numeric -- aborts.
for response in ["", "n", "N", "yes", "no", "true", "1", "0", "yeah", "nope"]:
monkeypatch.setattr(
"builtins.input", lambda _prompt="", r=response: r,
)
rc = cli_mod.main(["daemon", "install"])
assert rc == 1, f"non-strict-y response {response!r} should abort"
# State file should not exist (install did not proceed)
assert not cli_mod.LAUNCHD_TARGET.exists()
err = capsys.readouterr().err
# Banner must mention key phrases.
# Banner phrasing was updated 2026-04-19 (Plan 05-08 bge-small-en pivot):
# "rises to ~2 GB if the opt-in bge-m3 model is selected" — with space.
assert "~2 GB" in err or "2 GB" in err
assert "1%" in err
assert "iai-mcp daemon uninstall" in err
def test_install_with_lowercase_y_proceeds(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
monkeypatch.setattr("builtins.input", lambda _prompt="": "y")
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
rc = cli_mod.main(["daemon", "install"])
assert rc == 0
assert cli_mod.LAUNCHD_TARGET.exists()
def test_install_consent_records_audit_trail(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""D-10 audit trail: explicit consent writes a timestamped JSON receipt
under ~/.iai-mcp/.consent-*.json so a later forensic review can confirm
the user actually consented (not bypassed via --yes)."""
monkeypatch.setattr(platform, "system", lambda: "Darwin")
monkeypatch.setattr("builtins.input", lambda _prompt="": "y")
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
rc = cli_mod.main(["daemon", "install"])
assert rc == 0
consent_files = list((fake_state_dir / ".iai-mcp").glob(".consent-*.json"))
assert consent_files, "expected at least one .consent-<ts>.json audit receipt"
payload = json.loads(consent_files[0].read_text())
assert payload.get("consent") is True
assert "ts" in payload
# ---------------------------------------------------------------------------
# Test 5: macOS uninstall removes plist + all 3 state files
# ---------------------------------------------------------------------------
def test_uninstall_macos_removes_plist_and_all_state_files(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
# Pre-seed the plist + 3 state files
cli_mod.LAUNCHD_TARGET.parent.mkdir(parents=True, exist_ok=True)
cli_mod.LAUNCHD_TARGET.write_text("<plist></plist>")
state_dir = fake_state_dir / ".iai-mcp"
state_dir.mkdir(parents=True, exist_ok=True)
cli_mod.LOCK_PATH.write_text("")
cli_mod.SOCKET_PATH.write_text("")
cli_mod.STATE_PATH.write_text("{}")
rc = cli_mod.main(["daemon", "uninstall", "--yes"])
assert rc == 0
# C4 invariant: all 4 artefacts gone
assert not cli_mod.LAUNCHD_TARGET.exists()
assert not cli_mod.LOCK_PATH.exists()
assert not cli_mod.SOCKET_PATH.exists()
assert not cli_mod.STATE_PATH.exists()
# ---------------------------------------------------------------------------
# Test 6: Linux uninstall removes unit + all 3 state files
# ---------------------------------------------------------------------------
def test_uninstall_linux_removes_unit_and_all_state_files(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Linux")
calls: list[list[str]] = []
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})()),
)
cli_mod.SYSTEMD_TARGET.parent.mkdir(parents=True, exist_ok=True)
cli_mod.SYSTEMD_TARGET.write_text("[Service]")
state_dir = fake_state_dir / ".iai-mcp"
state_dir.mkdir(parents=True, exist_ok=True)
cli_mod.LOCK_PATH.write_text("")
cli_mod.SOCKET_PATH.write_text("")
cli_mod.STATE_PATH.write_text("{}")
rc = cli_mod.main(["daemon", "uninstall", "--yes"])
assert rc == 0
assert not cli_mod.SYSTEMD_TARGET.exists()
assert not cli_mod.LOCK_PATH.exists()
assert not cli_mod.SOCKET_PATH.exists()
assert not cli_mod.STATE_PATH.exists()
cmd_strs = [" ".join(c) for c in calls]
assert any("systemctl --user disable --now iai-mcp-daemon.service" in s for s in cmd_strs), cmd_strs
# ---------------------------------------------------------------------------
# Test 7: status round-trip + daemon-down message
# ---------------------------------------------------------------------------
def test_status_socket_round_trip(
short_socket: Path,
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture,
) -> None:
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
captured: list[dict] = []
daemon = _ThreadedFakeDaemon(
short_socket,
captured,
reply={
"ok": True,
"state": "WAKE",
"uptime_sec": 42.5,
"version": "0.1.0",
},
)
daemon.start()
try:
rc = cli_mod.main(["daemon", "status"])
assert rc == 0
finally:
daemon.stop()
out = capsys.readouterr().out
assert "WAKE" in out
assert "42" in out
# request was sent
assert captured == [{"type": "status"}]
def test_status_daemon_down(
short_socket: Path,
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture,
) -> None:
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
assert not short_socket.exists()
rc = cli_mod.main(["daemon", "status"])
assert rc == 1
out = capsys.readouterr().out
assert "daemon not running" in out
# ---------------------------------------------------------------------------
# Test 8: status version skew warns when daemon != installed
# ---------------------------------------------------------------------------
def test_status_warns_on_version_skew(
short_socket: Path,
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture,
) -> None:
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
captured: list[dict] = []
daemon = _ThreadedFakeDaemon(
short_socket,
captured,
reply={
"ok": True,
"state": "WAKE",
"version": "0.0.1-OLD",
},
)
daemon.start()
try:
rc = cli_mod.main(["daemon", "status"])
assert rc == 0
finally:
daemon.stop()
err = capsys.readouterr().err
assert "version" in err.lower()
assert "0.0.1-OLD" in err
assert "restart" in err.lower()
# ---------------------------------------------------------------------------
# Test 9: configure subcommands persist to state file
# ---------------------------------------------------------------------------
def test_configure_set_budget_persists(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
# daemon_state.STATE_PATH must mirror our fake home for save_state to land
# in the right place. We patch BOTH cli_mod.STATE_PATH AND the daemon_state
# module's constant in one shot.
from iai_mcp import daemon_state
monkeypatch.setattr(daemon_state, "STATE_PATH", cli_mod.STATE_PATH)
cli_mod.STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
rc = cli_mod.main(["daemon", "configure", "set-budget", "0.02"])
assert rc == 0
state = json.loads(cli_mod.STATE_PATH.read_text())
assert state["daily_quota_pct_override"] == pytest.approx(0.02)
def test_configure_set_cycle_count_persists(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
from iai_mcp import daemon_state
monkeypatch.setattr(daemon_state, "STATE_PATH", cli_mod.STATE_PATH)
cli_mod.STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
rc = cli_mod.main(["daemon", "configure", "set-cycle-count", "5"])
assert rc == 0
state = json.loads(cli_mod.STATE_PATH.read_text())
assert state["cycle_count_override"] == 5
def test_configure_disable_host_persists(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
from iai_mcp import daemon_state
monkeypatch.setattr(daemon_state, "STATE_PATH", cli_mod.STATE_PATH)
cli_mod.STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
rc = cli_mod.main(["daemon", "configure", "disable-claude"])
assert rc == 0
state = json.loads(cli_mod.STATE_PATH.read_text())
assert state["claude_enabled"] is False
# ---------------------------------------------------------------------------
# Test 10: force-rem socket message
# ---------------------------------------------------------------------------
def test_force_rem_sends_correct_message(
short_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
captured: list[dict] = []
daemon = _ThreadedFakeDaemon(
short_socket, captured, reply={"ok": True, "cycles_completed": 1},
)
daemon.start()
try:
rc = cli_mod.main(["daemon", "force-rem"])
assert rc == 0
finally:
daemon.stop()
assert captured == [{"type": "force_rem"}]
# ---------------------------------------------------------------------------
# Test 11: pause N
# ---------------------------------------------------------------------------
def test_pause_sends_seconds_arg(
short_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
captured: list[dict] = []
daemon = _ThreadedFakeDaemon(short_socket, captured, reply={"ok": True})
daemon.start()
try:
rc = cli_mod.main(["daemon", "pause", "300"])
assert rc == 0
finally:
daemon.stop()
assert captured == [{"type": "pause", "seconds": 300}]
def test_resume_sends_resume_message(
short_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
captured: list[dict] = []
daemon = _ThreadedFakeDaemon(short_socket, captured, reply={"ok": True})
daemon.start()
try:
rc = cli_mod.main(["daemon", "resume"])
assert rc == 0
finally:
daemon.stop()
assert captured == [{"type": "resume"}]
# ---------------------------------------------------------------------------
# Test 12: start / stop dispatch correct argv on each platform
# ---------------------------------------------------------------------------
def test_start_macos_uses_launchctl_kickstart(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
calls: list[list[str]] = []
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
)
rc = cli_mod.main(["daemon", "start"])
assert rc == 0
cmd_strs = [" ".join(c) for c in calls]
assert any("launchctl kickstart" in s for s in cmd_strs), cmd_strs
def test_stop_macos_uses_launchctl_kill_sigterm(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
calls: list[list[str]] = []
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
)
rc = cli_mod.main(["daemon", "stop"])
assert rc == 0
cmd_strs = [" ".join(c) for c in calls]
assert any("launchctl kill SIGTERM" in s for s in cmd_strs), cmd_strs
def test_start_linux_uses_systemctl_start(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Linux")
calls: list[list[str]] = []
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
)
rc = cli_mod.main(["daemon", "start"])
assert rc == 0
assert any(c[:4] == ["systemctl", "--user", "start", "iai-mcp-daemon.service"] for c in calls), calls
def test_stop_linux_uses_systemctl_stop(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Linux")
calls: list[list[str]] = []
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
)
rc = cli_mod.main(["daemon", "stop"])
assert rc == 0
assert any(c[:4] == ["systemctl", "--user", "stop", "iai-mcp-daemon.service"] for c in calls), calls
# ---------------------------------------------------------------------------
# Test 13: logs dispatches tail (macOS) or journalctl (Linux)
# ---------------------------------------------------------------------------
def test_logs_macos_invokes_tail(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
calls: list[list[str]] = []
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
)
rc = cli_mod.main(["daemon", "logs", "-n", "50"])
assert rc == 0
assert any(c and c[0] == "tail" for c in calls), calls
def test_logs_linux_invokes_journalctl(
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Linux")
calls: list[list[str]] = []
monkeypatch.setattr(
cli_mod.subprocess,
"run",
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
)
rc = cli_mod.main(["daemon", "logs", "-n", "100"])
assert rc == 0
assert any(
c[:5] == ["journalctl", "--user", "-u", "iai-mcp-daemon.service", "-n"]
for c in calls
), calls
# ---------------------------------------------------------------------------
# Idempotency: install + install does not error
# ---------------------------------------------------------------------------
def test_install_twice_is_idempotent(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
assert cli_mod.main(["daemon", "install", "--yes"]) == 0
assert cli_mod.main(["daemon", "install", "--yes"]) == 0
assert cli_mod.LAUNCHD_TARGET.exists()
def test_uninstall_twice_is_idempotent(
fake_state_dir: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
monkeypatch.setattr(platform, "system", lambda: "Darwin")
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
assert cli_mod.main(["daemon", "uninstall", "--yes"]) == 0
assert cli_mod.main(["daemon", "uninstall", "--yes"]) == 0
# ---------------------------------------------------------------------------
# Help output sanity
# ---------------------------------------------------------------------------
def test_daemon_help_lists_all_subcommands(
capsys: pytest.CaptureFixture,
) -> None:
with pytest.raises(SystemExit) as exc_info:
cli_mod.main(["daemon", "--help"])
assert exc_info.value.code == 0
out = capsys.readouterr().out
for sub in (
"install",
"uninstall",
"start",
"stop",
"status",
"logs",
"force-rem",
"pause",
"resume",
"configure",
):
assert sub in out, f"missing {sub} in daemon --help output"

View file

@ -0,0 +1,214 @@
"""Plan 07.14-02 tests: regression-lock for `iai-mcp daemon install`
sys.executable substitution into launchd plist + systemd user unit.
Locks the contract that `_render_launchd_plist` and `_render_systemd_unit`
substitute `sys.executable` in place of the template `/usr/local/bin/python3`
and `/usr/bin/python3` placeholders. Without this contract, the daemon
runs under whatever `python3` happens to be first on PATH at launchd /
systemd invocation, which on macOS is typically the SIP-protected
`/usr/local/bin/python3` -- different from the venv Python where iai-mcp
and its dependencies live.
VERIFY result (planner + executor 2026-05-01): production code already
does the substitution. `src/iai_mcp/cli.py::_render_launchd_plist`
calls `text.replace("/usr/local/bin/python3", sys.executable)`, and
`_render_systemd_unit` calls
`text.replace("/usr/bin/python3", sys.executable)`. The plist template
at `deploy/launchd/com.iai-mcp.daemon.plist` carries
`<string>/usr/local/bin/python3</string>` inside `ProgramArguments`, and
`deploy/systemd/iai-mcp-daemon.service` carries
`ExecStart=/usr/bin/python3 -m iai_mcp.daemon`. Production-code change
for this plan is ZERO LINES; this file is a regression lock so a future
refactor that hardcodes the path will fail these tests.
Test 3 (`test_install_warns_when_sys_executable_lacks_psutil`) verified
during Sub-step 1: `cmd_daemon_install` (cli.py 268-362) does NOT carry a
`subprocess.run([sys.executable, "-c", "import psutil"])` probe today.
Per 07.14-CONTEXT.md ("only if gap-driven patch is needed: ... defer
adding such a row to a future phase. Do NOT add it speculatively in
07.14"), the WARN-on-missing-psutil contract is xfail-marked: the
contract is documented for a future plan to enforce, but adding the
probe speculatively is out of scope.
"""
from __future__ import annotations
import argparse
import subprocess
import sys
import pytest
def _make_install_args(**kwargs) -> argparse.Namespace:
"""Build an argparse.Namespace matching `cmd_daemon_install` args."""
defaults = dict(dry_run=True, yes=True)
defaults.update(kwargs)
return argparse.Namespace(**defaults)
def test_install_uses_sys_executable_macos(monkeypatch):
"""`_render_launchd_plist` substitutes `/usr/local/bin/python3` with
the absolute path of `sys.executable` of the invoking interpreter.
Scoping note: we patch `iai_mcp.cli.sys.executable` (NOT global
`sys.executable`) so the override is local to the cli module's `sys`
reference and does not leak to other modules during pytest collection.
"""
fake_python = "/path/to/venv/bin/python3"
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
from iai_mcp.cli import _render_launchd_plist
rendered = _render_launchd_plist()
assert f"<string>{fake_python}</string>" in rendered, (
f"plist did not substitute sys.executable; rendered text:\n{rendered[:500]}"
)
assert "<string>/usr/local/bin/python3</string>" not in rendered, (
"plist still contains the unsubstituted /usr/local/bin/python3 placeholder"
)
def test_install_uses_sys_executable_linux(monkeypatch):
"""`_render_systemd_unit` substitutes `/usr/bin/python3` with
`sys.executable`.
Verifies both that the substituted path appears AND that the original
`/usr/bin/python3 -m iai_mcp.daemon` ExecStart line is fully replaced
(not just shadowed by an additional line).
"""
fake_python = "/path/to/venv/bin/python3"
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
from iai_mcp.cli import _render_systemd_unit
rendered = _render_systemd_unit()
assert f"{fake_python} -m iai_mcp.daemon" in rendered or (
f"{fake_python}" in rendered and "iai_mcp.daemon" in rendered
), f"systemd unit did not substitute sys.executable; rendered:\n{rendered[:500]}"
assert "/usr/bin/python3 -m iai_mcp.daemon" not in rendered, (
"systemd unit still contains the unsubstituted /usr/bin/python3 placeholder"
)
# ============================================================================
# Test 3 -- xfail per 07.14-CONTEXT.md deferral
# ============================================================================
# Sub-step 1 verification (executor 2026-05-01): cmd_daemon_install
# (src/iai_mcp/cli.py lines 268-362) does NOT contain a
# `subprocess.run([sys.executable, "-c", "import psutil"])` probe today.
#
# Per 07.14-CONTEXT.md "only if gap-driven patch is needed: ...
# defer adding such a row to a future phase. Do NOT add it speculatively
# in 07.14".
#
# This xfail documents the contract for a future plan that adds the
# probe. If/when the probe lands, the xfail will flip to xpass and the
# developer un-marks it. `strict=False` so an xpass does not fail the
# suite during the transition.
# ============================================================================
# Plan 10.6-01 Task 1.7: plist invariants -----------------------------
def test_plist_keepalive_is_crashed_only(monkeypatch):
"""Plist KeepAlive uses {"Crashed": true} only -- NOT SuccessfulExit=false.
lifecycle model: graceful exit 0 on HIBERNATION must
NOT trigger respawn (so the daemon stays dead until wrapper
kickstart fires). Crashed=true respawns only on non-zero exit
(the LifecycleLockConflict path); SuccessfulExit=false would
create a respawn loop because exit 0 is now the steady state.
"""
fake_python = "/path/to/venv/bin/python3"
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
from iai_mcp.cli import _render_launchd_plist
rendered = _render_launchd_plist()
# Crashed-only block must be present.
assert "<key>Crashed</key>" in rendered
# Legacy SuccessfulExit=false must be GONE.
assert "<key>SuccessfulExit</key>" not in rendered, (
"Phase 10.6 removed SuccessfulExit=false from the plist. Its presence "
"would create a respawn loop because exit 0 is now the steady state."
)
def test_plist_lifecycle_env_vars_present(monkeypatch):
"""The plist defines LIFECYCLE_* + sleep-quarantine env vars.
cadence knobs become production-tunable via the plist
EnvironmentVariables block. Defaults match proposal v2 §3.
"""
fake_python = "/path/to/venv/bin/python3"
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
from iai_mcp.cli import _render_launchd_plist
rendered = _render_launchd_plist()
assert "<key>LIFECYCLE_DROWSY_AFTER_SEC</key>" in rendered
assert "<key>LIFECYCLE_SLEEP_HEARTBEAT_IDLE_SEC</key>" in rendered
assert "<key>LIFECYCLE_HIBERNATE_AFTER_SEC</key>" in rendered
assert "<key>IAI_MCP_SLEEP_QUARANTINE_TTL_HOURS</key>" in rendered
def test_plist_legacy_env_vars_removed(monkeypatch):
"""Legacy env vars from the RSS-watchdog + idle_watcher era are gone."""
fake_python = "/path/to/venv/bin/python3"
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
from iai_mcp.cli import _render_launchd_plist
rendered = _render_launchd_plist()
assert "<key>IAI_MCP_RSS_RESTART_THRESHOLD_MB</key>" not in rendered, (
"RSS-watchdog removed in Task 1.4; env var must be gone "
"from the plist."
)
assert "<key>IAI_DAEMON_IDLE_SHUTDOWN_SECS</key>" not in rendered
assert "<key>IAI_MCP_SKIP_STARTUP_OPTIMIZE</key>" not in rendered
@pytest.mark.xfail(
reason=(
"psutil-availability probe NOT in cmd_daemon_install today. "
"Adding speculatively is deferred per 07.14-CONTEXT.md "
'("only if gap-driven patch is needed: ... defer adding such a '
'row to a future phase"). This xfail documents the contract for '
"a future plan."
),
strict=False,
)
def test_install_warns_when_sys_executable_lacks_psutil(
monkeypatch, capsys, tmp_path,
):
"""When the venv-resolved Python lacks `psutil`, install emits a WARN
(not FAIL) with a hint to install psutil + re-run.
NOTE: deferred per CONTEXT.md -- xfail until a future plan adds
the psutil-availability probe to `cmd_daemon_install`.
"""
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("HF_HOME", str(tmp_path / "hf"))
# Simulate `import psutil` failing under the target Python.
real_run = subprocess.run
def _fake_run(cmd, **kwargs):
# Match: subprocess.run([sys.executable, "-c", "import psutil"], ...)
if (
isinstance(cmd, list)
and len(cmd) >= 3
and cmd[1] == "-c"
and cmd[2] == "import psutil"
):
raise subprocess.CalledProcessError(returncode=1, cmd=cmd)
return real_run(cmd, **kwargs)
monkeypatch.setattr("subprocess.run", _fake_run)
from iai_mcp.cli import cmd_daemon_install
rc = cmd_daemon_install(_make_install_args(dry_run=True, yes=True))
err = capsys.readouterr().err
# WARN != FAIL: install proceeds (rc == 0) but stderr carries the hint.
assert rc == 0, f"install must NOT fail on missing psutil; got rc={rc}"
err_lower = err.lower()
assert "psutil" in err_lower
assert "iai-mcp daemon install" in err_lower
assert "re-run" in err_lower

111
tests/test_cli_health.py Normal file
View file

@ -0,0 +1,111 @@
"""Tests for the iai-mcp CLI -- health + migrate commands."""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from uuid import uuid4
import pytest
# ----------------------------------------------------------- iai-mcp health
def test_cli_health_no_events(tmp_path, monkeypatch, capsys):
"""Fresh store -> 'llm_health: no events recorded'."""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.cli import cmd_health
args = argparse.Namespace()
exit_code = cmd_health(args)
out = capsys.readouterr().out
assert exit_code == 0
assert "no events" in out.lower()
def test_cli_health_reports_last_event(tmp_path, monkeypatch, capsys):
"""Seeded llm_health event -> output includes severity + ts rendered in TZ."""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.cli import cmd_health
from iai_mcp.events import write_event
from iai_mcp.store import MemoryStore
store = MemoryStore()
write_event(
store,
kind="llm_health",
data={"status": "ok"},
severity="info",
)
args = argparse.Namespace()
exit_code = cmd_health(args)
out = capsys.readouterr().out
assert exit_code == 0
assert "llm_health" in out
# Severity reported.
assert "info" in out
# ---------------------------------------------------------- iai-mcp migrate
def test_cli_migrate_dry_run(tmp_path, monkeypatch, capsys):
"""Seeded v1 records -> dry-run prints 'would migrate N records'."""
import argparse
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.cli import cmd_migrate
from iai_mcp.store import MemoryStore
from iai_mcp.types import MemoryRecord, SCHEMA_VERSION_LEGACY, EMBED_DIM
store = MemoryStore()
for i in range(3):
r = MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=f"Legacy v1 record number {i} with words to detect.",
aaak_index="",
embedding=[0.1] * EMBED_DIM,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
tags=[],
language="en",
schema_version=SCHEMA_VERSION_LEGACY,
)
# simulate un-tagged legacy by clearing language after construction
r.language = ""
store.insert(r)
args = argparse.Namespace(from_=1, to=2, dry_run=True, verbose=False)
exit_code = cmd_migrate(args)
out = capsys.readouterr().out
assert exit_code == 0
assert "would migrate" in out.lower()
# Dry run must not mutate the store: all records still v1.
for r in store.all_records():
if not r.pinned or r.id == uuid4(): # skip potential L0
continue
v1_count = sum(1 for r in store.all_records() if r.schema_version == 1)
# At least the 3 we inserted must still be v1.
assert v1_count >= 3
def test_cli_entrypoint_exists():
"""`iai-mcp` entrypoint is registered via pyproject.toml scripts."""
from iai_mcp.cli import main
assert callable(main)

View file

@ -0,0 +1,422 @@
"""Phase 10.1 Plan 10.1-01 Task 1.5 -- `iai-mcp lifecycle status` CLI tests.
Covers status output for each of the 4 states, default WAKE when the
file is absent, and the formatted lines for sleep_cycle_progress and
quarantine.
"""
from __future__ import annotations
import argparse
from datetime import datetime, timezone
import pytest
from iai_mcp.lifecycle_state import (
LifecycleState,
LifecycleStateRecord,
save_state,
)
# ---------------------------------------------------------------------------
# Helper -- patch LIFECYCLE_STATE_PATH to a tmp file for each test
# ---------------------------------------------------------------------------
def _run_status(tmp_path, monkeypatch, capsys, record: LifecycleStateRecord | None):
"""Patch the module-level path constant, optionally seed a record,
invoke the subcommand directly, return captured stdout.
"""
target = tmp_path / "lifecycle_state.json"
monkeypatch.setattr(
"iai_mcp.lifecycle_state.LIFECYCLE_STATE_PATH",
target,
)
if record is not None:
save_state(record, target)
# Late import of cmd_lifecycle_status so the monkeypatch above
# applies before the function reads LIFECYCLE_STATE_PATH.
from iai_mcp.cli import cmd_lifecycle_status
args = argparse.Namespace()
rc = cmd_lifecycle_status(args)
out = capsys.readouterr().out
return rc, out
# ---------------------------------------------------------------------------
# Status output for each of the 4 states
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("state", list(LifecycleState))
def test_status_prints_state_label(tmp_path, monkeypatch, capsys, state):
record: LifecycleStateRecord = {
"current_state": state.value,
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:11:30+00:00",
"wrapper_event_seq": 42,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": True,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert f"state: {state.value}" in out
# ---------------------------------------------------------------------------
# Absent file -> default WAKE
# ---------------------------------------------------------------------------
def test_status_returns_default_wake_when_file_absent(tmp_path, monkeypatch, capsys):
rc, out = _run_status(tmp_path, monkeypatch, capsys, record=None)
assert rc == 0
assert "state: WAKE" in out
# ---------------------------------------------------------------------------
# Wrapper-event seq + last_activity rendered
# ---------------------------------------------------------------------------
def test_status_renders_seq_and_last_activity(tmp_path, monkeypatch, capsys):
record: LifecycleStateRecord = {
"current_state": "WAKE",
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:11:30+00:00",
"wrapper_event_seq": 137,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": True,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert "wrapper_event_seq: 137" in out
assert "last_activity: 2026-05-02T15:11:30+00:00" in out
# ---------------------------------------------------------------------------
# sleep_cycle_progress rendering
# ---------------------------------------------------------------------------
def test_status_progress_none_says_none(tmp_path, monkeypatch, capsys):
record: LifecycleStateRecord = {
"current_state": "WAKE",
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:00:00+00:00",
"wrapper_event_seq": 0,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": True,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert "sleep_cycle_progress: none" in out
def test_status_progress_active_renders_step_attempt(tmp_path, monkeypatch, capsys):
record: LifecycleStateRecord = {
"current_state": "SLEEP",
"since_ts": "2026-05-02T03:00:00+00:00",
"last_activity_ts": "2026-05-02T03:00:00+00:00",
"wrapper_event_seq": 7,
"sleep_cycle_progress": {
"last_completed_step": 3,
"attempt": 1,
"last_error": None,
"started_at": "2026-05-02T03:00:00+00:00",
},
"quarantine": None,
"shadow_run": True,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert "step=3" in out
assert "attempt=1" in out
assert "last_error=none" in out
# ---------------------------------------------------------------------------
# Quarantine rendering
# ---------------------------------------------------------------------------
def test_status_quarantine_none_says_none(tmp_path, monkeypatch, capsys):
record: LifecycleStateRecord = {
"current_state": "WAKE",
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:00:00+00:00",
"wrapper_event_seq": 0,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": True,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert "quarantine: none" in out
def test_status_quarantine_active_renders_until_and_reason(tmp_path, monkeypatch, capsys):
record: LifecycleStateRecord = {
"current_state": "SLEEP",
"since_ts": "2026-05-02T03:00:00+00:00",
"last_activity_ts": "2026-05-02T03:00:00+00:00",
"wrapper_event_seq": 1,
"sleep_cycle_progress": None,
"quarantine": {
"until_ts": "2026-05-03T03:00:00+00:00",
"reason": "sleep step 4 failed 3x",
"since_ts": "2026-05-02T03:00:00+00:00",
},
"shadow_run": True,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert "until=2026-05-03T03:00:00+00:00" in out
assert "reason=sleep step 4 failed 3x" in out
assert "since=2026-05-02T03:00:00+00:00" in out
# ---------------------------------------------------------------------------
# shadow_run flag rendering
# ---------------------------------------------------------------------------
def test_status_shadow_run_true_mentions_legacy_watchdog(tmp_path, monkeypatch, capsys):
record: LifecycleStateRecord = {
"current_state": "WAKE",
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:00:00+00:00",
"wrapper_event_seq": 0,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": True,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert "shadow_run: true" in out
assert "Phase 10.6" in out # spec line mentions phase that flips it
def test_status_shadow_run_false(tmp_path, monkeypatch, capsys):
record: LifecycleStateRecord = {
"current_state": "WAKE",
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:00:00+00:00",
"wrapper_event_seq": 0,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": False,
}
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
assert rc == 0
assert "shadow_run: false" in out
# ---------------------------------------------------------------------------
# Helper formatter sanity
# ---------------------------------------------------------------------------
def test_format_relative_minutes(tmp_path, monkeypatch):
from iai_mcp.cli import _format_relative
now = datetime(2026, 5, 2, 15, 12, 0, tzinfo=timezone.utc)
out = _format_relative("2026-05-02T15:00:00+00:00", now=now)
assert out == "12 minutes"
def test_format_relative_hours():
from iai_mcp.cli import _format_relative
now = datetime(2026, 5, 2, 15, 12, 0, tzinfo=timezone.utc)
out = _format_relative("2026-05-02T13:12:00+00:00", now=now)
assert out == "2 hours"
def test_format_relative_days():
from iai_mcp.cli import _format_relative
now = datetime(2026, 5, 5, 0, 0, 0, tzinfo=timezone.utc)
out = _format_relative("2026-05-02T00:00:00+00:00", now=now)
assert out == "3 days"
def test_format_relative_singular_minute():
from iai_mcp.cli import _format_relative
now = datetime(2026, 5, 2, 15, 1, 0, tzinfo=timezone.utc)
out = _format_relative("2026-05-02T15:00:00+00:00", now=now)
assert out == "1 minute"
def test_format_relative_handles_garbage():
from iai_mcp.cli import _format_relative
assert _format_relative("not-a-timestamp") == "unknown"
# ---------------------------------------------------------------------------
# End-to-end: invoke via main([...])
# ---------------------------------------------------------------------------
def test_cli_main_lifecycle_status_via_main(tmp_path, monkeypatch, capsys):
target = tmp_path / "lifecycle_state.json"
monkeypatch.setattr(
"iai_mcp.lifecycle_state.LIFECYCLE_STATE_PATH",
target,
)
record: LifecycleStateRecord = {
"current_state": "DROWSY",
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:11:30+00:00",
"wrapper_event_seq": 42,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": True,
}
save_state(record, target)
from iai_mcp.cli import main
rc = main(["lifecycle", "status"])
out = capsys.readouterr().out
assert rc == 0
assert "state: DROWSY" in out
# ---------------------------------------------------------------------------
# Plan 10.6-01 Task 1.2 -- lifecycle force-unlock subcommand
# ---------------------------------------------------------------------------
def test_force_unlock_with_yes_flag(tmp_path, monkeypatch, capsys):
"""``--yes`` skips the prompt and clears a present lockfile."""
import json as _json
from iai_mcp.cli import cmd_lifecycle_force_unlock
lock_path = tmp_path / ".locked"
lock_path.write_text(
_json.dumps(
{
"pid": 4242,
"hostname": "stale-host.local",
"started_at": "2026-04-29T08:00:00+00:00",
"schema_version": 1,
}
)
)
args = argparse.Namespace(yes=True, lock_path=lock_path)
rc = cmd_lifecycle_force_unlock(args)
out = capsys.readouterr().out
assert rc == 0
assert "pid=4242" in out
assert "stale-host.local" in out
assert "Lockfile removed." in out
assert not lock_path.exists()
def test_force_unlock_without_yes_prompts_no_aborts(
tmp_path, monkeypatch, capsys,
):
"""No ``--yes`` flag: prompt is read, "n" aborts with rc=1, file kept."""
import json as _json
from iai_mcp.cli import cmd_lifecycle_force_unlock
lock_path = tmp_path / ".locked"
lock_path.write_text(
_json.dumps(
{
"pid": 4242,
"hostname": "stale-host.local",
"started_at": "2026-04-29T08:00:00+00:00",
"schema_version": 1,
}
)
)
monkeypatch.setattr("builtins.input", lambda _prompt="": "n")
args = argparse.Namespace(yes=False, lock_path=lock_path)
rc = cmd_lifecycle_force_unlock(args)
captured = capsys.readouterr()
assert rc == 1
assert "cancelled" in captured.err.lower()
assert lock_path.exists()
def test_force_unlock_without_yes_prompts_y_succeeds(
tmp_path, monkeypatch, capsys,
):
"""Prompt receives "y" -> file cleared, rc=0."""
import json as _json
from iai_mcp.cli import cmd_lifecycle_force_unlock
lock_path = tmp_path / ".locked"
lock_path.write_text(
_json.dumps(
{
"pid": 4242,
"hostname": "stale-host.local",
"started_at": "2026-04-29T08:00:00+00:00",
"schema_version": 1,
}
)
)
monkeypatch.setattr("builtins.input", lambda _prompt="": "y")
args = argparse.Namespace(yes=False, lock_path=lock_path)
rc = cmd_lifecycle_force_unlock(args)
out = capsys.readouterr().out
assert rc == 0
assert "Lockfile removed." in out
assert not lock_path.exists()
def test_force_unlock_when_no_lockfile(tmp_path, capsys):
"""Absent lockfile -> rc=0 with "nothing to unlock" message."""
from iai_mcp.cli import cmd_lifecycle_force_unlock
lock_path = tmp_path / ".locked" # never created
args = argparse.Namespace(yes=True, lock_path=lock_path)
rc = cmd_lifecycle_force_unlock(args)
out = capsys.readouterr().out
assert rc == 0
assert "nothing to unlock" in out.lower()
def test_cli_main_lifecycle_force_unlock_via_main(
tmp_path, monkeypatch, capsys,
):
"""End-to-end: invoke via ``iai-mcp lifecycle force-unlock --yes``.
Production path uses ``DEFAULT_LOCK_PATH``; we monkey-patch it so
the test does not touch ``~/.iai-mcp/.locked``.
"""
import json as _json
lock_path = tmp_path / ".locked"
lock_path.write_text(
_json.dumps(
{
"pid": 9999,
"hostname": "foreign-host.local",
"started_at": "2026-04-30T10:00:00+00:00",
"schema_version": 1,
}
)
)
monkeypatch.setattr(
"iai_mcp.lifecycle_lock.DEFAULT_LOCK_PATH",
lock_path,
)
from iai_mcp.cli import main
rc = main(["lifecycle", "force-unlock", "--yes"])
out = capsys.readouterr().out
assert rc == 0
assert "Lockfile removed." in out
assert not lock_path.exists()

View file

@ -0,0 +1,345 @@
"""Plan 07.14-01 tests: `iai-mcp maintenance compact-records`.
Eight cases:
1. test_dry_run_prints_metrics_no_optimize_call
2. test_apply_with_yes_runs_optimize
3. test_preflight_refuses_when_daemon_alive
4. test_preflight_skips_when_daemon_state_missing
5. test_record_id_set_invariant_aborts_on_divergence
6. test_audit_file_written_on_apply
7. test_dry_run_no_audit_file
8. test_yes_required_with_apply_in_non_tty
All tests use mocked `MemoryStore` + mocked `optimize_lance_storage` +
mocked `psutil` zero real LanceDB I/O, zero real embedder load,
combined wall-clock target < 5s.
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from datetime import timedelta
from pathlib import Path
from unittest.mock import patch, MagicMock
import pytest
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_args(**kwargs) -> argparse.Namespace:
"""Build an argparse.Namespace with default flag values, overridable."""
defaults = dict(
dry_run=False,
apply=False,
yes=False,
store_path=None,
)
defaults.update(kwargs)
return argparse.Namespace(**defaults)
def _patch_psutil_alive(
monkeypatch: pytest.MonkeyPatch, *, pid: int, cmdline: list[str],
) -> None:
"""Make psutil.Process(pid).cmdline() return the given list.
Mirrors the pattern in tests/test_doctor_checklist.py we patch
sys.modules["psutil"] so the function-scope `import psutil` inside
`_maintenance_compact_preflight_daemon_alive` resolves to the mock.
"""
fake_proc = MagicMock()
fake_proc.cmdline.return_value = cmdline
fake_psutil = MagicMock()
fake_psutil.Process.return_value = fake_proc
monkeypatch.setitem(sys.modules, "psutil", fake_psutil)
def _make_optimize_report(
*, versions_before: int = 3, versions_after: int = 1,
rows_before: int = 0, rows_after: int = 0,
) -> dict:
"""Construct an optimize_lance_storage-shaped report (3 tables)."""
base = {
"rows_before": rows_before,
"rows_after": rows_after,
"versions_before": versions_before,
"versions_after": versions_after,
"size_bytes_before": 0,
"size_bytes_after": 0,
"elapsed_sec": 0.0,
}
return {
"records": dict(base),
"edges": dict(base, versions_before=0, versions_after=0),
"events": dict(base, versions_before=0, versions_after=0),
}
def _make_fake_store(record_ids: list[str]) -> MagicMock:
"""Construct a MagicMock MemoryStore exposing tbl.count_rows() +
tbl.to_pandas(columns=['id']) for the given record-id list.
"""
fake_store = MagicMock()
fake_tbl = MagicMock()
fake_tbl.count_rows.return_value = len(record_ids)
fake_df = MagicMock()
fake_df.__getitem__.return_value.tolist.return_value = list(record_ids)
fake_tbl.to_pandas.return_value = fake_df
fake_store.db.open_table.return_value = fake_tbl
return fake_store
# ---------------------------------------------------------------------------
# Fixture: HOME-isolated IAI root with records.lance skeleton
# ---------------------------------------------------------------------------
@pytest.fixture
def iai_root(tmp_path, monkeypatch):
"""Sandbox HOME → tmp_path; pre-create
`~/.iai-mcp/lancedb/records.lance` skeleton with `_versions/` subdir
holding 3 fake manifests so the size/version walk has data to
measure.
"""
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("HF_HOME", str(tmp_path / "hf"))
monkeypatch.setenv(
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
)
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-passphrase")
try:
import keyring.core
keyring.core._keyring_backend = None
except ImportError:
pass
iai_dir = tmp_path / ".iai-mcp"
iai_dir.mkdir()
records_lance = iai_dir / "lancedb" / "records.lance"
records_lance.mkdir(parents=True)
versions_dir = records_lance / "_versions"
versions_dir.mkdir()
for i in range(3):
(versions_dir / f"{i:020d}.manifest").write_bytes(b"x" * 100)
# Reload cli to pick up new HOME — STATE_PATH/LOCK_PATH/SOCKET_PATH are
# module-scope Path.home() captures.
import importlib
from iai_mcp import cli as _cli
importlib.reload(_cli)
yield iai_dir
importlib.reload(_cli)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_dry_run_prints_metrics_no_optimize_call(iai_root, capsys):
"""--dry-run emits metrics-only JSON; mocked optimize never called."""
from iai_mcp.cli import cmd_maintenance_compact_records
with patch(
"iai_mcp.maintenance.optimize_lance_storage"
) as mock_opt:
rc = cmd_maintenance_compact_records(_make_args(dry_run=True))
assert rc == 0
out = capsys.readouterr().out
payload = json.loads(out)
assert payload["mode"] == "dry-run"
assert "versions_count" in payload["metrics"]["pre"]
assert "size_mb" in payload["metrics"]["pre"]
assert "records_count" in payload["metrics"]["pre"]
assert payload["metrics"]["post"] is None
mock_opt.assert_not_called()
def test_apply_with_yes_runs_optimize(iai_root, monkeypatch, capsys):
"""Mocked optimize → `--apply --yes` calls it once with retention=0d."""
from iai_mcp import cli as _cli
fake_store = _make_fake_store(["id1", "id2", "id3", "id4", "id5"])
monkeypatch.setattr(
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
)
mock_opt = MagicMock(return_value=_make_optimize_report(
versions_before=3, versions_after=1,
rows_before=5, rows_after=5,
))
monkeypatch.setattr(
"iai_mcp.maintenance.optimize_lance_storage", mock_opt,
)
rc = _cli.cmd_maintenance_compact_records(
_make_args(apply=True, yes=True),
)
assert rc == 0
assert mock_opt.call_count == 1
_, kwargs = mock_opt.call_args
assert kwargs["retention"] == timedelta(days=0)
def test_preflight_refuses_when_daemon_alive(iai_root, monkeypatch, capsys):
"""If daemon-state.json points at a live `iai_mcp.daemon` process,
--apply --yes refuses with rc=1 + 'daemon running' in stderr.
"""
state_path = iai_root / ".daemon-state.json"
state_path.write_text(json.dumps({"daemon_pid": os.getpid()}))
_patch_psutil_alive(
monkeypatch, pid=os.getpid(),
cmdline=["python", "-m", "iai_mcp.daemon"],
)
# os.kill(os.getpid(), 0) succeeds — process exists.
from iai_mcp.cli import cmd_maintenance_compact_records
with patch(
"iai_mcp.maintenance.optimize_lance_storage"
) as mock_opt:
rc = cmd_maintenance_compact_records(
_make_args(apply=True, yes=True),
)
assert rc == 1
err = capsys.readouterr().err
assert "daemon running" in err
mock_opt.assert_not_called()
def test_preflight_skips_when_daemon_state_missing(
iai_root, monkeypatch, capsys,
):
"""No .daemon-state.json → preflight passes; optimize is called."""
assert not (iai_root / ".daemon-state.json").exists()
fake_store = _make_fake_store([])
monkeypatch.setattr(
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
)
mock_opt = MagicMock(return_value=_make_optimize_report(
versions_before=3, versions_after=1,
))
monkeypatch.setattr(
"iai_mcp.maintenance.optimize_lance_storage", mock_opt,
)
from iai_mcp.cli import cmd_maintenance_compact_records
rc = cmd_maintenance_compact_records(
_make_args(apply=True, yes=True),
)
assert rc == 0
assert mock_opt.call_count == 1
def test_record_id_set_invariant_aborts_on_divergence(
iai_root, monkeypatch, capsys,
):
"""Pre id-set has 3 ids; post id-set has 2. Abort + FAILED audit."""
fake_store = _make_fake_store(["id1", "id2", "id3"])
monkeypatch.setattr(
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
)
monkeypatch.setattr(
"iai_mcp.maintenance.optimize_lance_storage",
MagicMock(return_value=_make_optimize_report(
versions_before=3, versions_after=1,
rows_before=3, rows_after=2,
)),
)
# Patch _maintenance_compact_metrics to return divergent id-sets across
# its two invocations (pre, post).
pre_set = {"id1", "id2", "id3"}
post_set = {"id1", "id2"}
metrics_seq = [
{
"versions_count": 3, "size_mb": 0.0,
"records_count": 3, "record_id_set": pre_set,
},
{
"versions_count": 1, "size_mb": 0.0,
"records_count": 2, "record_id_set": post_set,
},
]
call_counter = {"n": 0}
def _stub_metrics(*args, **kwargs):
i = call_counter["n"]
call_counter["n"] += 1
return metrics_seq[min(i, 1)]
monkeypatch.setattr(
"iai_mcp.cli._maintenance_compact_metrics", _stub_metrics,
)
from iai_mcp.cli import cmd_maintenance_compact_records
rc = cmd_maintenance_compact_records(
_make_args(apply=True, yes=True),
)
assert rc == 1
err = capsys.readouterr().err
assert "ABORT" in err
assert "divergence" in err
# FAILED audit file must exist.
failed = list(iai_root.glob(".maintenance-compact-FAILED-*.json"))
assert len(failed) == 1
payload = json.loads(failed[0].read_text())
assert payload["status"] == "aborted"
assert payload["reason"] == "record_id_set divergence post-optimize"
assert payload["missing_ids_count"] == 1
def test_audit_file_written_on_apply(iai_root, monkeypatch, capsys):
"""--apply --yes happy path → audit JSON with status=ok + pre/post."""
fake_store = _make_fake_store(["id1", "id2"])
monkeypatch.setattr(
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
)
monkeypatch.setattr(
"iai_mcp.maintenance.optimize_lance_storage",
MagicMock(return_value=_make_optimize_report(
versions_before=3, versions_after=1,
rows_before=2, rows_after=2,
)),
)
from iai_mcp.cli import cmd_maintenance_compact_records
rc = cmd_maintenance_compact_records(
_make_args(apply=True, yes=True),
)
assert rc == 0
audits = list(iai_root.glob(".maintenance-compact-*.json"))
audits = [a for a in audits if "FAILED" not in a.name]
assert len(audits) == 1, (
f"expected exactly 1 audit file, got {audits}"
)
payload = json.loads(audits[0].read_text())
assert payload["status"] == "ok"
assert "metrics_pre" in payload
assert "metrics_post" in payload
assert "elapsed_sec" in payload
def test_dry_run_no_audit_file(iai_root, capsys):
"""--dry-run never writes a `.maintenance-compact-*.json` file."""
from iai_mcp.cli import cmd_maintenance_compact_records
rc = cmd_maintenance_compact_records(_make_args(dry_run=True))
assert rc == 0
audits = list(iai_root.glob(".maintenance-compact-*.json"))
assert audits == []
def test_yes_required_with_apply_in_non_tty(iai_root, monkeypatch, capsys):
"""--apply on non-tty without --yes → exit 2, friendly hint."""
monkeypatch.setattr("sys.stdin.isatty", lambda: False)
from iai_mcp.cli import cmd_maintenance_compact_records
rc = cmd_maintenance_compact_records(
_make_args(apply=True, yes=False),
)
assert rc == 2
err = capsys.readouterr().err
assert "requires --yes" in err

View file

@ -0,0 +1,344 @@
"""Phase 10.3 Plan 10.3-01 Task 1.5 -- CLI maintenance sleep-cycle tests.
Eight cases:
1. test_happy_path_runs_pipeline_and_prints_progress
2. test_quarantined_without_force_returns_nonzero_with_message
3. test_force_runs_pipeline_when_quarantined
4. test_reset_quarantine_clears_then_runs
5. test_reset_quarantine_when_not_quarantined_no_op
6. test_failure_returns_nonzero_with_error_in_stderr
7. test_failure_after_3rd_strike_prints_quarantine_hint
8. test_subparser_exposes_sleep_cycle_with_flags
All tests use stub `MemoryStore` + monkeypatched SleepPipeline methods
no real LanceDB I/O.
"""
from __future__ import annotations
import argparse
from datetime import datetime, timedelta, timezone
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from iai_mcp.lifecycle_state import (
default_state,
load_state,
save_state,
)
from iai_mcp.sleep_pipeline import SleepStep
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_args(**kwargs) -> argparse.Namespace:
"""Construct argparse.Namespace with sleep-cycle defaults."""
defaults = dict(
force=False,
reset_quarantine=False,
store_path=None,
)
defaults.update(kwargs)
return argparse.Namespace(**defaults)
@pytest.fixture
def iai_root(tmp_path, monkeypatch):
"""Sandbox HOME so LIFECYCLE_STATE_PATH points inside tmp_path."""
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("HF_HOME", str(tmp_path / "hf"))
monkeypatch.setenv(
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
)
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-passphrase")
iai_dir = tmp_path / ".iai-mcp"
iai_dir.mkdir()
# Reload modules so they pick up the new HOME — LIFECYCLE_STATE_PATH
# and STATE_PATH are module-scope captures.
import importlib
from iai_mcp import lifecycle_state as _ls
from iai_mcp import cli as _cli
importlib.reload(_ls)
importlib.reload(_cli)
yield iai_dir
importlib.reload(_ls)
importlib.reload(_cli)
def _patch_store_open(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
"""Replace MemoryStore() with a MagicMock so the CLI can construct
a 'store' without touching real LanceDB / embedder.
"""
fake_store = MagicMock()
monkeypatch.setattr(
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
)
return fake_store
def _patch_pipeline_steps_to_noop(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Replace every _step_* method on SleepPipeline with a no-op so the
real pipeline executes without doing real LanceDB work.
"""
from iai_mcp.sleep_pipeline import SleepPipeline
for step, method_name in [
(SleepStep.SCHEMA_MINE, "_step_schema_mine"),
(SleepStep.KNOB_TUNE, "_step_knob_tune"),
(SleepStep.DREAM_DECAY, "_step_dream_decay"),
(SleepStep.OPTIMIZE_LANCE, "_step_optimize_lance"),
(SleepStep.COMPACT_RECORDS, "_step_compact_records"),
]:
def _make_noop(s=step):
def _impl(self, _interrupt_check):
return True, {}
return _impl
monkeypatch.setattr(
SleepPipeline, method_name, _make_noop(),
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_happy_path_runs_pipeline_and_prints_progress(
iai_root, monkeypatch, capsys,
):
"""sleep-cycle with no flags + no quarantine -> exit 0, 5 step lines."""
_patch_store_open(monkeypatch)
_patch_pipeline_steps_to_noop(monkeypatch)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
rc = cmd_maintenance_sleep_cycle(_make_args())
assert rc == 0
out = capsys.readouterr().out
assert "Sleep cycle started." in out
assert "[1/5] schema_mine" in out
assert "[2/5] knob_tune" in out
assert "[3/5] dream_decay" in out
assert "[4/5] optimize_lance" in out
assert "[5/5] compact_records" in out
assert "Sleep cycle complete" in out
def test_quarantined_without_force_returns_nonzero_with_message(
iai_root, monkeypatch, capsys,
):
"""Active quarantine + no --force -> exit 1, hint in stderr."""
_patch_store_open(monkeypatch)
# Seed an active quarantine in the lifecycle_state.json that the
# reloaded module now points at.
from iai_mcp.lifecycle_state import LIFECYCLE_STATE_PATH
now = datetime.now(timezone.utc)
record = default_state()
record["quarantine"] = {
"until_ts": (now + timedelta(hours=12)).isoformat(),
"reason": "test stuck",
"since_ts": now.isoformat(),
}
save_state(record, LIFECYCLE_STATE_PATH)
_patch_pipeline_steps_to_noop(monkeypatch)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
rc = cmd_maintenance_sleep_cycle(_make_args())
assert rc == 1
captured = capsys.readouterr()
assert "quarantined" in captured.err.lower()
assert "test stuck" in captured.err
assert "--force" in captured.err
assert "--reset-quarantine" in captured.err
def test_force_runs_pipeline_when_quarantined(
iai_root, monkeypatch, capsys,
):
"""--force bypasses quarantine and runs all 5 steps."""
_patch_store_open(monkeypatch)
from iai_mcp.lifecycle_state import LIFECYCLE_STATE_PATH
now = datetime.now(timezone.utc)
record = default_state()
record["quarantine"] = {
"until_ts": (now + timedelta(hours=12)).isoformat(),
"reason": "test stuck",
"since_ts": now.isoformat(),
}
save_state(record, LIFECYCLE_STATE_PATH)
_patch_pipeline_steps_to_noop(monkeypatch)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
rc = cmd_maintenance_sleep_cycle(_make_args(force=True))
assert rc == 0
out = capsys.readouterr().out
assert "[5/5] compact_records" in out
assert "Sleep cycle complete" in out
# force_run leaves quarantine record alone.
record_after = load_state(LIFECYCLE_STATE_PATH)
assert record_after["quarantine"] is not None
def test_reset_quarantine_clears_then_runs(
iai_root, monkeypatch, capsys,
):
"""--reset-quarantine wipes quarantine first, then runs normally."""
_patch_store_open(monkeypatch)
from iai_mcp.lifecycle_state import LIFECYCLE_STATE_PATH
now = datetime.now(timezone.utc)
record = default_state()
record["quarantine"] = {
"until_ts": (now + timedelta(hours=12)).isoformat(),
"reason": "stuck",
"since_ts": now.isoformat(),
}
save_state(record, LIFECYCLE_STATE_PATH)
_patch_pipeline_steps_to_noop(monkeypatch)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
rc = cmd_maintenance_sleep_cycle(_make_args(reset_quarantine=True))
assert rc == 0
out = capsys.readouterr().out
assert "Quarantine cleared." in out
assert "Sleep cycle complete" in out
record_after = load_state(LIFECYCLE_STATE_PATH)
assert record_after["quarantine"] is None
def test_reset_quarantine_when_not_quarantined_no_op(
iai_root, monkeypatch, capsys,
):
"""--reset-quarantine when no quarantine -> friendly no-op message."""
_patch_store_open(monkeypatch)
_patch_pipeline_steps_to_noop(monkeypatch)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
rc = cmd_maintenance_sleep_cycle(_make_args(reset_quarantine=True))
assert rc == 0
out = capsys.readouterr().out
assert "Quarantine not active" in out
assert "Sleep cycle complete" in out
def test_failure_returns_nonzero_with_error_in_stderr(
iai_root, monkeypatch, capsys,
):
"""A step exception -> exit 1, FAILED line in stderr."""
_patch_store_open(monkeypatch)
_patch_pipeline_steps_to_noop(monkeypatch)
# Patch one specific step to raise.
from iai_mcp.sleep_pipeline import SleepPipeline
def _raiser(self, _interrupt_check):
raise RuntimeError("synthetic optimize failure")
monkeypatch.setattr(
SleepPipeline, "_step_optimize_lance", _raiser,
)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
rc = cmd_maintenance_sleep_cycle(_make_args())
assert rc == 1
captured = capsys.readouterr()
# First 3 steps printed to stdout (completed_steps), then FAILED on stderr.
assert "[1/5] schema_mine" in captured.out
assert "[2/5] knob_tune" in captured.out
assert "[3/5] dream_decay" in captured.out
assert "[4/5] optimize_lance ... FAILED" in captured.err
assert "synthetic optimize failure" in captured.err
def test_failure_after_3rd_strike_prints_quarantine_hint(
iai_root, monkeypatch, capsys,
):
"""3rd consecutive same-step failure -> exit 1 + quarantine hint."""
_patch_store_open(monkeypatch)
_patch_pipeline_steps_to_noop(monkeypatch)
from iai_mcp.sleep_pipeline import SleepPipeline
def _raiser(self, _interrupt_check):
raise RuntimeError("boom")
monkeypatch.setattr(SleepPipeline, "_step_dream_decay", _raiser)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
cmd_maintenance_sleep_cycle(_make_args()) # attempt=1
cmd_maintenance_sleep_cycle(_make_args()) # attempt=2
capsys.readouterr() # discard accumulated output
rc = cmd_maintenance_sleep_cycle(_make_args()) # attempt=3 -> quarantine
assert rc == 1
captured = capsys.readouterr()
assert "FAILED" in captured.err
assert "quarantined for 24h" in captured.err
assert "--reset-quarantine" in captured.err
def test_subparser_exposes_sleep_cycle_with_flags():
"""`iai-mcp maintenance sleep-cycle --force --reset-quarantine` parses."""
from iai_mcp.cli import _build_parser
parser = _build_parser()
args = parser.parse_args([
"maintenance", "sleep-cycle",
"--force", "--reset-quarantine",
])
assert args.force is True
assert args.reset_quarantine is True
# Defaults for store-path.
assert args.store_path is None
assert args.maintenance_cmd == "sleep-cycle"
def test_subparser_defaults_force_false_reset_false():
"""Default flag values: both False."""
from iai_mcp.cli import _build_parser
parser = _build_parser()
args = parser.parse_args(["maintenance", "sleep-cycle"])
assert args.force is False
assert args.reset_quarantine is False
def test_store_open_failure_returns_2(
iai_root, monkeypatch, capsys,
):
"""MemoryStore() raising -> CLI exits 2 with stderr message."""
def _broken_store(path=None, **kw):
raise RuntimeError("disk full")
monkeypatch.setattr(
"iai_mcp.store.MemoryStore", _broken_store,
)
from iai_mcp.cli import cmd_maintenance_sleep_cycle
rc = cmd_maintenance_sleep_cycle(_make_args())
assert rc == 2
err = capsys.readouterr().err
assert "could not open MemoryStore" in err
assert "disk full" in err

View file

@ -0,0 +1,63 @@
"""Plan 03-02 CONN-07 RED: iai-mcp topology CLI.
The `topology` subcommand prints one key:value line per metric:
C: <float>
L: <float>
sigma: <float | "insufficient_data">
communities: <int>
rich_club_ratio: <float>
N: <int>
regime: <str>
"""
from __future__ import annotations
import re
import pytest
from iai_mcp.cli import main as cli_main
def test_topology_subcommand_registered():
"""`iai-mcp topology --help` must succeed (subparser registered)."""
with pytest.raises(SystemExit) as ex:
cli_main(["topology", "--help"])
# argparse --help calls sys.exit(0) on success
assert ex.value.code == 0
def test_topology_prints_required_keys(tmp_path, capsys, monkeypatch):
"""All seven key:value lines must appear in output."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
code = cli_main(["topology"])
assert code == 0
out = capsys.readouterr().out
assert re.search(r"^C:\s", out, re.MULTILINE), f"missing 'C: ' line in {out!r}"
assert re.search(r"^L:\s", out, re.MULTILINE), f"missing 'L: ' line in {out!r}"
assert re.search(r"^sigma:\s", out, re.MULTILINE), (
f"missing 'sigma: ' line in {out!r}"
)
assert re.search(r"^communities:\s", out, re.MULTILINE), (
f"missing 'communities: ' line in {out!r}"
)
assert re.search(r"^rich_club_ratio:\s", out, re.MULTILINE), (
f"missing 'rich_club_ratio: ' line in {out!r}"
)
assert re.search(r"^N:\s", out, re.MULTILINE), f"missing 'N: ' line in {out!r}"
assert re.search(r"^regime:\s", out, re.MULTILINE), (
f"missing 'regime: ' line in {out!r}"
)
def test_topology_empty_store_prints_insufficient_data(tmp_path, capsys, monkeypatch):
"""Fresh store: N is small, sigma should print as 'insufficient_data'."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
code = cli_main(["topology"])
assert code == 0
out = capsys.readouterr().out
# On an empty store, sigma must be "insufficient_data" or the regime is
# "insufficient_data" -- either way, the line must contain the marker.
assert "insufficient_data" in out, (
f"empty store must surface insufficient_data; got {out!r}"
)

View file

@ -0,0 +1,77 @@
"""Tests for iai-mcp trajectory CLI.
The `trajectory` subcommand aggregates M1..M6 events via
trajectory.aggregate_trajectory and prints one summary line per metric.
Supports --since WEEKS to scope history.
"""
from __future__ import annotations
import json
from datetime import datetime, timedelta, timezone
import pytest
from iai_mcp.cli import main as cli_main
from iai_mcp.events import write_event
from iai_mcp.store import MemoryStore
def test_trajectory_empty_output(tmp_path, capsys, monkeypatch):
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
# No trajectory data recorded yet.
code = cli_main(["trajectory"])
assert code == 0
out = capsys.readouterr().out
assert "no trajectory data" in out.lower() or "no data" in out.lower()
def test_trajectory_renders_m1_to_m6(tmp_path, capsys, monkeypatch):
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
# Seed one event for each metric.
for i, m in enumerate(["m1", "m2", "m3", "m4", "m5", "m6"]):
write_event(
store,
kind="trajectory_metric",
data={"metric": m, "value": float(i + 1)},
severity="info",
session_id="s1",
)
code = cli_main(["trajectory"])
assert code == 0
out = capsys.readouterr().out
# Every metric mentioned (M1 ... M6 uppercase).
for m in ("M1", "M2", "M3", "M4", "M5", "M6"):
assert m in out
def test_trajectory_since_weeks_flag(tmp_path, capsys, monkeypatch):
"""--since=N accepts the flag without crashing. (Filter behaviour is
tested at the trajectory.aggregate_trajectory level; the CLI contract
here is: flag is recognised and 0 on success.)"""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
write_event(
store, kind="trajectory_metric",
data={"metric": "m1", "value": 1.0},
severity="info", session_id="s1",
)
code = cli_main(["trajectory", "--since=2"])
assert code == 0
def test_trajectory_prints_aggregate_stats(tmp_path, capsys, monkeypatch):
"""Output for a populated M1 mentions min/max/mean."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
store = MemoryStore(path=tmp_path)
for v in (1.0, 2.0, 3.0):
write_event(
store, kind="trajectory_metric",
data={"metric": "m1", "value": v},
severity="info", session_id="s1",
)
code = cli_main(["trajectory"])
assert code == 0
out = capsys.readouterr().out
# Some aggregate indicator visible.
assert "mean" in out.lower() or "avg" in out.lower() or "=" in out

155
tests/test_community.py Normal file
View file

@ -0,0 +1,155 @@
"""Tests for iai_mcp.community (D-05 bootstrap, stable UUIDs, CONN-01/04)."""
from __future__ import annotations
import random
from uuid import uuid4
from iai_mcp.community import (
CommunityAssignment,
MAX_TOP_COMMUNITIES,
MID_N_LEIDEN,
MODULARITY_FLOOR,
REFRESH_DELTA,
SMALL_N_FLAT,
UUID_ROTATE_COSINE,
detect_communities,
needs_refresh,
)
from iai_mcp.graph import MemoryGraph
def _random_emb(seed: int) -> list[float]:
rng = random.Random(seed)
return [rng.random() for _ in range(384)]
def test_small_n_flat_single_community() -> None:
"""N < SMALL_N_FLAT -> flat, single community."""
g = MemoryGraph()
for i in range(50):
g.add_node(uuid4(), community_id=None, embedding=_random_emb(i))
a = detect_communities(g, prior=None)
assert a.backend == "flat"
assert len(set(a.node_to_community.values())) == 1
assert a.modularity == 0.0
def test_two_cliques_produce_multiple_communities() -> None:
"""2 dense cliques of 150 nodes -> N=300, Leiden should find Q >= 0.2."""
g = MemoryGraph()
clique_a = [uuid4() for _ in range(150)]
clique_b = [uuid4() for _ in range(150)]
for i, n in enumerate(clique_a):
g.add_node(n, community_id=None, embedding=_random_emb(i))
for i, n in enumerate(clique_b):
g.add_node(n, community_id=None, embedding=_random_emb(10_000 + i))
for i in range(150):
for j in range(i + 1, 150):
g.add_edge(clique_a[i], clique_a[j])
g.add_edge(clique_b[i], clique_b[j])
a = detect_communities(g, prior=None)
assert a.backend.startswith("leiden")
assert a.modularity >= MODULARITY_FLOOR
assert len(set(a.node_to_community.values())) >= 2
def test_stable_uuids_on_identical_rerun() -> None:
"""identical graphs rerun with prior -> zero UUID churn."""
g = MemoryGraph()
clique_a = [uuid4() for _ in range(150)]
clique_b = [uuid4() for _ in range(150)]
for i, n in enumerate(clique_a):
g.add_node(n, community_id=None, embedding=_random_emb(i))
for i, n in enumerate(clique_b):
g.add_node(n, community_id=None, embedding=_random_emb(10_000 + i))
for i in range(150):
for j in range(i + 1, 150):
g.add_edge(clique_a[i], clique_a[j])
g.add_edge(clique_b[i], clique_b[j])
first = detect_communities(g, prior=None)
second = detect_communities(g, prior=first)
for node, comm_first in first.node_to_community.items():
assert second.node_to_community[node] == comm_first
def test_top_communities_capped_at_seven() -> None:
"""CONN-01: MAX_TOP_COMMUNITIES = 7 enforced on level 1 output."""
g = MemoryGraph()
for i in range(SMALL_N_FLAT + 10):
g.add_node(uuid4(), community_id=None, embedding=_random_emb(i))
nodes = list(g._nx.nodes())
for k in range(0, len(nodes) - 1, 20):
for j in range(k, min(k + 20, len(nodes) - 1)):
from uuid import UUID as _UUID
g.add_edge(_UUID(nodes[j]), _UUID(nodes[j + 1]))
a = detect_communities(g, prior=None)
assert len(a.top_communities) <= MAX_TOP_COMMUNITIES
def test_mid_regions_exposes_community_members() -> None:
"""CONN-01 level 2: mid_regions maps community UUID -> member UUIDs."""
g = MemoryGraph()
nodes = [uuid4() for _ in range(50)]
for i, n in enumerate(nodes):
g.add_node(n, community_id=None, embedding=_random_emb(i))
a = detect_communities(g, prior=None)
total_members = sum(len(members) for members in a.mid_regions.values())
assert total_members == 50
def test_needs_refresh_threshold() -> None:
"""CONN-04: |Δ Q| > 0.05 -> refresh, else stable."""
prior = CommunityAssignment(modularity=0.30)
assert needs_refresh(prior, 0.36) is True # Δ = 0.06 > 0.05
assert needs_refresh(prior, 0.31) is False # Δ = 0.01 < 0.05
assert needs_refresh(prior, 0.24) is True # Δ = 0.06 > 0.05 (negative side)
# Boundary: Δ == 0.05 is NOT > 0.05 -> False (strict inequality).
assert needs_refresh(prior, 0.35) is False
def test_empty_graph_returns_empty_assignment() -> None:
g = MemoryGraph()
a = detect_communities(g, prior=None)
assert a.backend == "flat"
assert a.node_to_community == {}
assert a.community_centroids == {}
def test_constants_exposed() -> None:
"""Named constants are importable (verifies the grep acceptance criteria)."""
assert SMALL_N_FLAT == 200
assert MID_N_LEIDEN == 500
assert MODULARITY_FLOOR == 0.2
assert REFRESH_DELTA == 0.05
assert UUID_ROTATE_COSINE == 0.7
assert MAX_TOP_COMMUNITIES == 7
def test_mid_n_non_modular_falls_back_to_flat() -> None:
"""SMALL_N_FLAT <= N < MID_N_LEIDEN with Q < 0.2 -> flat fallback."""
g = MemoryGraph()
# 250 nodes fully connected -> a clique, Leiden will produce Q ~ 0.0
nodes = [uuid4() for _ in range(250)]
for i, n in enumerate(nodes):
g.add_node(n, community_id=None, embedding=_random_emb(i))
for i in range(250):
for j in range(i + 1, 250):
g.add_edge(nodes[i], nodes[j])
a = detect_communities(g, prior=None)
# Fully-connected graph has no community structure -> fall back to flat.
assert a.backend == "flat"
def test_mid_regions_count_matches_community_count() -> None:
"""mid_regions has exactly one entry per distinct community."""
g = MemoryGraph()
clique_a = [uuid4() for _ in range(150)]
clique_b = [uuid4() for _ in range(150)]
for i, n in enumerate(clique_a + clique_b):
g.add_node(n, community_id=None, embedding=_random_emb(i))
for i in range(150):
for j in range(i + 1, 150):
g.add_edge(clique_a[i], clique_a[j])
g.add_edge(clique_b[i], clique_b[j])
a = detect_communities(g, prior=None)
assert len(a.mid_regions) == len(set(a.node_to_community.values()))

View file

@ -0,0 +1,163 @@
"""Tests for TOK-04 LLMLingua-2 compression (Plan 02-04 Task 2, D-25).
Scope (constitutional):
- ALLOWED: L2 community descriptors, session summaries, cls_summary records.
- FORBIDDEN: literal_surface of normal records, pinned, invariant_anchor,
user-tagged 'raw' records.
- Passthrough when llmlingua package not installed (local-only stays green).
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import uuid4
import pytest
from iai_mcp.events import query_events
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _rec(
*,
text: str = "lorem ipsum dolor sit amet consectetur adipiscing elit",
tags: list[str] | None = None,
pinned: bool = False,
detail_level: int = 2,
s5_trust_score: float = 0.5,
language: str = "en",
) -> MemoryRecord:
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=text,
aaak_index="",
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
community_id=None,
centrality=0.0,
detail_level=detail_level,
pinned=pinned,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=list(tags or []),
language=language,
s5_trust_score=s5_trust_score,
)
# --------------------------------------------------------------- is_compressible
def test_is_compressible_rejects_pinned():
from iai_mcp.compress import is_compressible
r = _rec(pinned=True)
ok, reason = is_compressible(r)
assert ok is False
assert "pinned" in reason.lower()
def test_is_compressible_rejects_raw_tagged():
from iai_mcp.compress import is_compressible
r = _rec(tags=["raw:ru", "project:iai-mcp"])
ok, reason = is_compressible(r)
assert ok is False
assert "raw" in reason.lower()
def test_is_compressible_rejects_invariant_anchor():
from iai_mcp.compress import is_compressible
r = _rec(s5_trust_score=0.95)
ok, reason = is_compressible(r)
assert ok is False
assert "invariant" in reason.lower() or "trust" in reason.lower()
def test_is_compressible_allows_cls_summary():
from iai_mcp.compress import is_compressible
r = _rec(tags=["semantic", "cls_summary"])
ok, _reason = is_compressible(r)
assert ok is True
def test_is_compressible_allows_schema():
from iai_mcp.compress import is_compressible
r = _rec(tags=["schema", "auto"])
ok, _reason = is_compressible(r)
assert ok is True
def test_is_compressible_rejects_normal_record_by_default():
"""D-25 literal_surface constitutional: default is reject unless explicitly allowed."""
from iai_mcp.compress import is_compressible
r = _rec(tags=["project:iai-mcp"])
ok, reason = is_compressible(r)
assert ok is False
assert "literal_surface" in reason.lower() or "constitutional" in reason.lower()
# --------------------------------------------------------------- compress_llmlingua2
def test_compress_llmlingua2_passes_through_when_pkg_absent(tmp_path, monkeypatch):
"""On ImportError, fall back to passthrough + log llm_health event."""
from iai_mcp import compress as compress_mod
# Force the import path to fail.
monkeypatch.setattr(compress_mod, "_load_llmlingua2", lambda: None)
store = MemoryStore(path=tmp_path)
text = "this is a long text that would normally be compressed"
out = compress_mod.compress_llmlingua2(text, target_ratio=0.5, store=store)
assert out == text # passthrough
def test_compress_llmlingua2_logs_fallback_event(tmp_path, monkeypatch):
from iai_mcp import compress as compress_mod
monkeypatch.setattr(compress_mod, "_load_llmlingua2", lambda: None)
store = MemoryStore(path=tmp_path)
compress_mod.compress_llmlingua2("text", target_ratio=0.5, store=store)
events = query_events(store, kind="llm_health")
fallback_events = [e for e in events if e["data"].get("component") == "compress_llmlingua2"]
assert len(fallback_events) >= 1
# --------------------------------------------------------------- wrappers
def test_compress_l2_descriptor_uses_l2_target_ratio():
from iai_mcp.compress import COMPRESSION_TARGET_L2, compress_l2_descriptor
# Passthrough when pkg absent -- just check the function is callable.
out = compress_l2_descriptor("community summary line")
assert isinstance(out, str)
assert COMPRESSION_TARGET_L2 == 0.5
def test_compress_summary_uses_summary_target_ratio():
from iai_mcp.compress import COMPRESSION_TARGET_SUMMARY, compress_summary
out = compress_summary("cluster summary line")
assert isinstance(out, str)
assert COMPRESSION_TARGET_SUMMARY == 0.3
def test_compress_module_constants():
from iai_mcp.compress import COMPRESSION_TARGET_L2, COMPRESSION_TARGET_SUMMARY
assert COMPRESSION_TARGET_L2 == 0.5
assert COMPRESSION_TARGET_SUMMARY == 0.3

543
tests/test_concurrency.py Normal file
View file

@ -0,0 +1,543 @@
"""Tests for iai_mcp.concurrency -- Task 1.
Covers 10 behaviours from the plan:
1. acquire_shared + try_acquire_exclusive blocking semantics.
2. Exclusive-then-exclusive: second blocks.
3. flock fd-close safety (Pitfall 2): closing /etc/passwd doesn't release lock.
4. Multi-MCP: 2 and 3 shared holders keep daemon blocked.
5. SIGKILL releases lock automatically (kernel).
6. Unix socket NDJSON status round-trip.
7. Unix socket dispatcher receives exact dict for pause/force_rem/tail_logs.
8. Stale socket cleanup (Pitfall 10) lets server bind without EADDRINUSE.
9. Lock file + socket file mode 0o600.
10. holds_exclusive_nb -- cooperative-yield probe; returns False when
contended and never propagates BlockingIOError / EWOULDBLOCK.
"""
from __future__ import annotations
import asyncio
import fcntl
import json
import multiprocessing
import os
import signal
import time
from pathlib import Path
import pytest
# Use spawn so fork+LanceDB+multithread hazards (Pitfall 6) never apply.
_SPAWN = multiprocessing.get_context("spawn")
# ---------------------------------------------------------------------------
# helpers that run inside spawn children
# ---------------------------------------------------------------------------
def _child_hold_shared(lock_path_str: str, acquired_flag: str, release_flag: str) -> int:
"""Open the lock file, take LOCK_SH, touch acquired_flag, wait for release_flag, exit."""
fd = os.open(lock_path_str, os.O_RDWR | os.O_CREAT, 0o600)
try:
fcntl.flock(fd, fcntl.LOCK_SH)
Path(acquired_flag).write_text("ok")
# Wait for parent to signal release.
release = Path(release_flag)
for _ in range(300): # up to 30s
if release.exists():
break
time.sleep(0.1)
finally:
try:
fcntl.flock(fd, fcntl.LOCK_UN)
except OSError:
pass
os.close(fd)
return 0
def _child_hold_shared_sigkillable(lock_path_str: str, acquired_flag: str) -> int:
"""Take LOCK_SH, touch flag, sleep forever (until SIGKILL from parent)."""
fd = os.open(lock_path_str, os.O_RDWR | os.O_CREAT, 0o600)
fcntl.flock(fd, fcntl.LOCK_SH)
Path(acquired_flag).write_text("ok")
while True:
time.sleep(1)
# ---------------------------------------------------------------------------
# fixture: isolate LOCK_PATH / SOCKET_PATH into tmp_path
# ---------------------------------------------------------------------------
@pytest.fixture
def lock_and_socket_paths(tmp_path, monkeypatch):
"""Redirect module-level LOCK_PATH + SOCKET_PATH to tmp_path.
AF_UNIX on macOS caps the path at 104 chars; pytest's tmp_path is often
too long. We place the lock in tmp_path and the socket under a short
/tmp/iai-<pid>-<n>/ directory so `bind()` succeeds.
"""
from iai_mcp import concurrency
lock_path = tmp_path / ".lock"
# Short socket dir to stay inside the AF_UNIX 104-byte limit on macOS.
sock_dir = Path(f"/tmp/iai-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path)
try:
yield lock_path, sock_path
finally:
# Best-effort cleanup so /tmp doesn't accumulate.
try:
if sock_path.exists():
sock_path.unlink()
except OSError:
pass
try:
sock_dir.rmdir()
except OSError:
pass
# ---------------------------------------------------------------------------
# Test 1: shared vs exclusive
# ---------------------------------------------------------------------------
def test_shared_blocks_exclusive(tmp_path, lock_and_socket_paths):
"""ProcessLock.acquire_shared() holder blocks try_acquire_exclusive()."""
from iai_mcp.concurrency import ProcessLock
lock_path, _ = lock_and_socket_paths
reader = ProcessLock(lock_path)
reader.acquire_shared()
try:
writer = ProcessLock(lock_path)
try:
# Separate fd on same file: exclusive must NOT be acquirable.
assert writer.try_acquire_exclusive() is False
finally:
writer.close()
finally:
reader.release()
reader.close()
# ---------------------------------------------------------------------------
# Test 2: exclusive-then-exclusive
# ---------------------------------------------------------------------------
def test_exclusive_then_exclusive_nonblocking(tmp_path, lock_and_socket_paths):
"""First exclusive holder succeeds; second gets False (non-blocking)."""
from iai_mcp.concurrency import ProcessLock
lock_path, _ = lock_and_socket_paths
first = ProcessLock(lock_path)
try:
assert first.try_acquire_exclusive() is True
second = ProcessLock(lock_path)
try:
assert second.try_acquire_exclusive() is False
finally:
second.close()
finally:
first.release()
first.close()
# ---------------------------------------------------------------------------
# Test 3: flock fd-close safety (Pitfall 2 guard)
# ---------------------------------------------------------------------------
def test_flock_fd_close_safe(tmp_path, lock_and_socket_paths):
"""Closing an unrelated fd must NOT release our flock lock.
flock is owned by process + open-file-description; closing /etc/passwd's fd
doesn't touch our lock. This is the reason we use flock not lockf (Pitfall 2).
"""
from iai_mcp.concurrency import ProcessLock
lock_path, _ = lock_and_socket_paths
holder = ProcessLock(lock_path)
try:
assert holder.try_acquire_exclusive() is True
# Open + close an unrelated file to provoke the lockf close-fd trap.
unrelated = os.open("/etc/passwd", os.O_RDONLY)
os.close(unrelated)
# Confirm another process cannot grab exclusive -- our lock still held.
other = ProcessLock(lock_path)
try:
assert other.try_acquire_exclusive() is False
finally:
other.close()
finally:
holder.release()
holder.close()
# ---------------------------------------------------------------------------
# Test 4: multi-MCP shared holders
# ---------------------------------------------------------------------------
@pytest.mark.parametrize("n_holders", [2, 3])
def test_multi_mcp(tmp_path, lock_and_socket_paths, n_holders):
"""N parallel shared holders block exclusive until ALL release."""
from iai_mcp.concurrency import ProcessLock
lock_path, _ = lock_and_socket_paths
lock_path_str = str(lock_path)
# Spawn N children, each holding LOCK_SH.
acquired_flags = [tmp_path / f".acquired_{i}" for i in range(n_holders)]
release_flag = tmp_path / ".release"
procs = []
for i in range(n_holders):
p = _SPAWN.Process(
target=_child_hold_shared,
args=(lock_path_str, str(acquired_flags[i]), str(release_flag)),
)
p.start()
procs.append(p)
try:
# Wait for all children to acquire shared.
deadline = time.time() + 15
while time.time() < deadline:
if all(f.exists() for f in acquired_flags):
break
time.sleep(0.05)
assert all(f.exists() for f in acquired_flags), "children failed to take LOCK_SH"
# Daemon cannot take exclusive.
daemon = ProcessLock(lock_path)
try:
assert daemon.try_acquire_exclusive() is False
finally:
daemon.close()
# Release ALL children, then daemon can acquire.
release_flag.write_text("go")
finally:
for p in procs:
p.join(timeout=10)
if p.is_alive():
p.terminate()
p.join(timeout=2)
# After all children exit, exclusive must succeed.
daemon2 = ProcessLock(lock_path)
try:
assert daemon2.try_acquire_exclusive() is True
finally:
daemon2.release()
daemon2.close()
# ---------------------------------------------------------------------------
# Test 5: SIGKILL releases lock (kernel-enforced)
# ---------------------------------------------------------------------------
def test_sigkill_releases_lock(tmp_path, lock_and_socket_paths):
"""Kernel auto-releases flock on process death (threat model: user kill -9)."""
from iai_mcp.concurrency import ProcessLock
lock_path, _ = lock_and_socket_paths
lock_path_str = str(lock_path)
acquired_flag = tmp_path / ".acquired_sigkill"
child = _SPAWN.Process(
target=_child_hold_shared_sigkillable,
args=(lock_path_str, str(acquired_flag)),
)
child.start()
try:
deadline = time.time() + 15
while time.time() < deadline and not acquired_flag.exists():
time.sleep(0.05)
assert acquired_flag.exists(), "child didn't acquire shared"
# Parent observes shared holder -> cannot take exclusive.
attempt = ProcessLock(lock_path)
try:
assert attempt.try_acquire_exclusive() is False
finally:
attempt.close()
# Kill child -9.
os.kill(child.pid, signal.SIGKILL)
child.join(timeout=10)
assert not child.is_alive()
finally:
if child.is_alive():
child.terminate()
child.join(timeout=2)
# Kernel released child's lock -> exclusive now succeeds.
daemon = ProcessLock(lock_path)
try:
# Give the kernel a brief moment to propagate the release.
deadline = time.time() + 3
acquired = False
while time.time() < deadline:
if daemon.try_acquire_exclusive():
acquired = True
break
time.sleep(0.05)
assert acquired, "exclusive still blocked after SIGKILL"
finally:
daemon.release()
daemon.close()
# ---------------------------------------------------------------------------
# Test 6: socket NDJSON status round-trip
# ---------------------------------------------------------------------------
def test_socket_status_round_trip(tmp_path, lock_and_socket_paths):
"""serve_control_socket answers status with ok=true + state + uptime_sec."""
from iai_mcp.concurrency import ProcessLock, serve_control_socket
_, sock_path = lock_and_socket_paths
lock = ProcessLock(lock_and_socket_paths[0])
state = {"fsm_state": "WAKE", "daemon_started_at": "2026-04-18T00:00:00+00:00"}
async def runner():
shutdown = asyncio.Event()
server_task = asyncio.create_task(
serve_control_socket(store=None, lock=lock, state=state, shutdown=shutdown,
socket_path=sock_path)
)
# Wait for socket to appear.
for _ in range(100):
if sock_path.exists():
break
await asyncio.sleep(0.02)
assert sock_path.exists(), "socket never bound"
reader, writer = await asyncio.open_unix_connection(path=str(sock_path))
writer.write(b'{"type":"status"}\n')
await writer.drain()
line = await reader.readline()
writer.close()
try:
await writer.wait_closed()
except Exception:
pass
shutdown.set()
await asyncio.wait_for(server_task, timeout=5)
return json.loads(line)
try:
resp = asyncio.run(runner())
finally:
lock.close()
assert resp["ok"] is True
assert resp["state"] == "WAKE"
# uptime_sec is a non-negative number.
assert isinstance(resp["uptime_sec"], (int, float))
# ---------------------------------------------------------------------------
# Test 7: injected dispatcher receives request dicts unchanged
# ---------------------------------------------------------------------------
def test_socket_injected_dispatcher(tmp_path, lock_and_socket_paths):
"""pause/force_rem/tail_logs routed through injected dispatcher unchanged."""
from iai_mcp.concurrency import ProcessLock, serve_control_socket
_, sock_path = lock_and_socket_paths
lock = ProcessLock(lock_and_socket_paths[0])
received: list[dict] = []
async def custom_dispatcher(req: dict) -> dict:
received.append(req)
return {"ok": True, "seen": req.get("type")}
requests = [
{"type": "pause", "seconds": 60},
{"type": "force_rem"},
{"type": "tail_logs", "n": 10},
]
async def runner():
shutdown = asyncio.Event()
server_task = asyncio.create_task(
serve_control_socket(
store=None, lock=lock, state={}, shutdown=shutdown,
dispatcher=custom_dispatcher, socket_path=sock_path,
)
)
for _ in range(100):
if sock_path.exists():
break
await asyncio.sleep(0.02)
assert sock_path.exists()
responses = []
for req in requests:
r, w = await asyncio.open_unix_connection(path=str(sock_path))
w.write((json.dumps(req) + "\n").encode())
await w.drain()
line = await r.readline()
responses.append(json.loads(line))
w.close()
try:
await w.wait_closed()
except Exception:
pass
shutdown.set()
await asyncio.wait_for(server_task, timeout=5)
return responses
try:
responses = asyncio.run(runner())
finally:
lock.close()
assert received == requests, f"dispatcher saw {received!r}"
for resp, req in zip(responses, requests):
assert resp == {"ok": True, "seen": req["type"]}
# ---------------------------------------------------------------------------
# Test 8: stale socket cleanup (Pitfall 10)
# ---------------------------------------------------------------------------
def test_stale_socket_cleanup(tmp_path, lock_and_socket_paths):
"""Pre-existing socket file (SIGKILL-orphaned) is cleaned so bind succeeds."""
from iai_mcp.concurrency import ProcessLock, serve_control_socket
_, sock_path = lock_and_socket_paths
# Simulate orphaned socket file.
sock_path.parent.mkdir(parents=True, exist_ok=True)
sock_path.write_text("stale")
assert sock_path.exists()
lock = ProcessLock(lock_and_socket_paths[0])
async def runner():
shutdown = asyncio.Event()
server_task = asyncio.create_task(
serve_control_socket(store=None, lock=lock, state={}, shutdown=shutdown,
socket_path=sock_path)
)
for _ in range(100):
if sock_path.exists() and sock_path.stat().st_size == 0:
# Socket replaces stale file; content is empty binary.
break
await asyncio.sleep(0.02)
# Quick status round-trip to confirm server is live.
r, w = await asyncio.open_unix_connection(path=str(sock_path))
w.write(b'{"type":"status"}\n')
await w.drain()
line = await r.readline()
w.close()
try:
await w.wait_closed()
except Exception:
pass
shutdown.set()
await asyncio.wait_for(server_task, timeout=5)
return json.loads(line)
try:
resp = asyncio.run(runner())
finally:
lock.close()
assert resp.get("ok") is True
# ---------------------------------------------------------------------------
# Test 9: 0o600 permissions on lock file + socket
# ---------------------------------------------------------------------------
def test_file_permissions_user_only(tmp_path, lock_and_socket_paths):
"""Lock + socket files must be 0o600 (user-only rw)."""
from iai_mcp.concurrency import ProcessLock, serve_control_socket
lock_path, sock_path = lock_and_socket_paths
lock = ProcessLock(lock_path)
# Lock file exists and has 0o600 mode.
assert lock_path.exists()
mode = lock_path.stat().st_mode & 0o777
assert mode == 0o600, f"lock mode is {oct(mode)}, expected 0o600"
async def runner():
shutdown = asyncio.Event()
server_task = asyncio.create_task(
serve_control_socket(store=None, lock=lock, state={}, shutdown=shutdown,
socket_path=sock_path)
)
for _ in range(100):
if sock_path.exists():
break
await asyncio.sleep(0.02)
# Check socket file mode.
sock_mode = sock_path.stat().st_mode & 0o777
shutdown.set()
await asyncio.wait_for(server_task, timeout=5)
return sock_mode
try:
sock_mode = asyncio.run(runner())
finally:
lock.close()
assert sock_mode == 0o600, f"socket mode is {oct(sock_mode)}, expected 0o600"
# ---------------------------------------------------------------------------
# Test 10: holds_exclusive_nb cooperative-yield probe
# ---------------------------------------------------------------------------
def test_holds_exclusive_nb(tmp_path, lock_and_socket_paths):
"""holds_exclusive_nb returns True when we hold EX; False when contended.
The probe MUST catch BlockingIOError/EWOULDBLOCK internally and never
propagate the exception.
"""
from iai_mcp.concurrency import ProcessLock
lock_path, _ = lock_and_socket_paths
daemon = ProcessLock(lock_path)
try:
# 1. Held exclusive -> probe returns True (no-op re-acquire).
assert daemon.try_acquire_exclusive() is True
assert daemon.holds_exclusive_nb() is True
# 2. Release and let a child grab shared; probe now returns False.
daemon.release()
lock_path_str = str(lock_path)
acquired_flag = tmp_path / ".shared_holder_acquired"
release_flag = tmp_path / ".shared_holder_release"
child = _SPAWN.Process(
target=_child_hold_shared,
args=(lock_path_str, str(acquired_flag), str(release_flag)),
)
child.start()
try:
deadline = time.time() + 15
while time.time() < deadline and not acquired_flag.exists():
time.sleep(0.05)
assert acquired_flag.exists()
# Daemon no longer holds EX, and child holds SH.
# holds_exclusive_nb should return False without raising.
assert daemon.holds_exclusive_nb() is False
finally:
release_flag.write_text("go")
child.join(timeout=10)
if child.is_alive():
child.terminate()
child.join(timeout=2)
finally:
daemon.close()

View file

@ -0,0 +1,403 @@
"""Tests for — the 7th unix-socket message type `session_open`.
Covers:
- Valid session_open message is accepted; reply = {"ok": True, "reason": "session_open_queued"}.
- Missing session_id is tolerated (optional field per spec).
- Wrong-typed session_id is rejected at validation.
- After a valid session_open, state contains:
* first_turn_pending[session_id] = True
* hippea_cascade_request with pending=True
- The 6 prior message types still work (no regression).
Uses a real `serve_control_socket(store, lock, state, shutdown)` behind a
threaded background event-loop so asyncio.run() calls in the test body don't
tear the server down between requests.
"""
from __future__ import annotations
import asyncio
import json
import tempfile
import threading
from pathlib import Path
from typing import Any
from unittest.mock import MagicMock
import pytest
from iai_mcp import concurrency, daemon_state
from iai_mcp.concurrency import (
ProcessLock,
_dispatch_socket_request,
_validate_socket_message,
serve_control_socket,
)
# ---------------------------------------------------------------- fixtures
@pytest.fixture
def tmp_socket(tmp_path: Path) -> Path:
"""Short unique unix-socket path (macOS ~104-byte limit)."""
candidate = tmp_path / "d.sock"
if len(str(candidate)) > 100:
candidate = Path(tempfile.mkdtemp(prefix="iai-sock-")) / "d.sock"
return candidate
@pytest.fixture
def tmp_state(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""Redirect daemon_state.STATE_PATH to a hermetic tmp file."""
p = tmp_path / ".daemon-state.json"
monkeypatch.setattr(daemon_state, "STATE_PATH", p)
return p
# ---------------------------------------------------------------- unit tests
def test_validate_session_open_accepts_valid_message() -> None:
ok, err = _validate_socket_message(
{"type": "session_open", "session_id": "s1", "ts": "2026-04-19T00:00:00Z"}
)
assert ok is True
assert err is None
def test_validate_session_open_accepts_missing_session_id() -> None:
"""session_id is optional per spec; absence is tolerated."""
ok, err = _validate_socket_message({"type": "session_open"})
assert ok is True
assert err is None
def test_validate_session_open_rejects_non_string_session_id() -> None:
ok, err = _validate_socket_message(
{"type": "session_open", "session_id": 123, "ts": "x"}
)
assert ok is False
assert err is not None
assert "session_id" in err
def test_validate_session_open_rejects_non_string_ts() -> None:
ok, err = _validate_socket_message(
{"type": "session_open", "session_id": "s1", "ts": 42}
)
assert ok is False
assert err is not None
assert "ts" in err
# ---------------------------------------------------------------- dispatcher unit
def _make_fake_store() -> Any:
return MagicMock()
def _make_fake_lock() -> Any:
return MagicMock(spec=ProcessLock)
# We call asyncio.run() directly in tests below; no asyncio marker needed.
def test_dispatch_session_open_queues_first_turn_and_cascade(
tmp_state: Path,
) -> None:
"""session_open handler: sets first_turn_pending[session_id]=True AND
hippea_cascade_request with pending=True; persists via save_state."""
state: dict = {"fsm_state": "WAKE"}
req = {
"type": "session_open",
"session_id": "sess-abc",
"ts": "2026-04-19T12:00:00Z",
}
resp = asyncio.run(
_dispatch_socket_request(req, _make_fake_store(), _make_fake_lock(), state)
)
assert resp == {"ok": True, "reason": "session_open_queued"}
# Flag set for first-turn hook.
pending = state.get("first_turn_pending")
assert isinstance(pending, dict)
stamp = pending.get("sess-abc")
assert isinstance(stamp, str) and stamp # ISO-8601 timestamp, post-fix
# Flag set for cascade task.
cascade = state.get("hippea_cascade_request")
assert isinstance(cascade, dict)
assert cascade.get("pending") is True
assert cascade.get("session_id") == "sess-abc"
# Echo for introspection.
last = state.get("last_session_open")
assert isinstance(last, dict)
assert last.get("session_id") == "sess-abc"
# Persisted to disk.
assert tmp_state.exists()
on_disk = json.loads(tmp_state.read_text())
assert on_disk.get("hippea_cascade_request", {}).get("pending") is True
def test_dispatch_session_open_missing_session_id_ok(tmp_state: Path) -> None:
"""No session_id -> defaults to empty string; still queues cascade."""
state: dict = {"fsm_state": "WAKE"}
req = {"type": "session_open", "ts": "2026-04-19T12:00:00Z"}
resp = asyncio.run(
_dispatch_socket_request(req, _make_fake_store(), _make_fake_lock(), state)
)
assert resp.get("ok") is True
assert resp.get("reason") == "session_open_queued"
def test_dispatch_session_open_clips_long_session_id(tmp_state: Path) -> None:
"""session_id is clipped to 128 chars (ASVS V5 output hardening)."""
state: dict = {"fsm_state": "WAKE"}
long_id = "a" * 1000
req = {"type": "session_open", "session_id": long_id, "ts": "x"}
resp = asyncio.run(
_dispatch_socket_request(req, _make_fake_store(), _make_fake_lock(), state)
)
assert resp["ok"] is True
last = state.get("last_session_open") or {}
assert len(last.get("session_id", "")) <= 128
# ---------------------------------------------------------------- no-regression
def test_dispatch_force_wake_still_works(tmp_state: Path) -> None:
state: dict = {"fsm_state": "WAKE"}
resp = asyncio.run(
_dispatch_socket_request(
{"type": "force_wake", "ts": "x"},
_make_fake_store(),
_make_fake_lock(),
state,
)
)
assert resp == {"ok": True, "reason": "wake_queued"}
def test_dispatch_force_rem_still_works(tmp_state: Path) -> None:
state: dict = {"fsm_state": "WAKE"}
resp = asyncio.run(
_dispatch_socket_request(
{"type": "force_rem", "ts": "x"},
_make_fake_store(),
_make_fake_lock(),
state,
)
)
assert resp == {"ok": True, "reason": "rem_queued"}
def test_dispatch_pause_still_works(tmp_state: Path) -> None:
state: dict = {"fsm_state": "WAKE"}
resp = asyncio.run(
_dispatch_socket_request(
{"type": "pause"},
_make_fake_store(),
_make_fake_lock(),
state,
)
)
assert resp == {"ok": True, "paused": True}
assert state["scheduler_paused"] is True
def test_dispatch_resume_still_works(tmp_state: Path) -> None:
state: dict = {"fsm_state": "WAKE", "scheduler_paused": True}
resp = asyncio.run(
_dispatch_socket_request(
{"type": "resume"},
_make_fake_store(),
_make_fake_lock(),
state,
)
)
assert resp == {"ok": True, "paused": False}
assert state["scheduler_paused"] is False
def test_dispatch_user_initiated_sleep_still_works(tmp_state: Path) -> None:
state: dict = {"fsm_state": "WAKE"}
resp = asyncio.run(
_dispatch_socket_request(
{"type": "user_initiated_sleep", "reason": "night", "ts": "x"},
_make_fake_store(),
_make_fake_lock(),
state,
)
)
assert resp.get("ok") is True
assert resp.get("state") == "TRANSITIONING"
def test_dispatch_status_still_works(tmp_state: Path) -> None:
state: dict = {"fsm_state": "WAKE"}
resp = asyncio.run(
_dispatch_socket_request(
{"type": "status"},
_make_fake_store(),
_make_fake_lock(),
state,
)
)
assert resp.get("ok") is True
assert resp.get("state") == "WAKE"
# Version echoed per Plan 04-gap-1.
assert "version" in resp
# ---------------------------------------------------------------- round-trip
class _ThreadedDaemon:
"""Real serve_control_socket on background thread + event loop.
Reuses the pattern from tests/test_core_bedtime_inject.py but drives the
production _dispatch_socket_request so we exercise the real 7th-message
path end-to-end.
"""
def __init__(self, path: Path, state: dict) -> None:
self.path = path
self.state = state
self.lock = MagicMock(spec=ProcessLock)
self.store = MagicMock()
self.shutdown = None # populated on the loop thread
self._loop: asyncio.AbstractEventLoop | None = None
self._thread: threading.Thread | None = None
self._ready = threading.Event()
def start(self) -> None:
def _run() -> None:
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(self._loop)
self.shutdown = asyncio.Event()
async def _serve() -> None:
# Hand the real dispatcher the state we own.
async def _dispatcher(req: dict) -> dict:
return await _dispatch_socket_request(
req, self.store, self.lock, self.state
)
task = asyncio.create_task(
serve_control_socket(
self.store,
self.lock,
self.state,
self.shutdown, # type: ignore[arg-type]
dispatcher=_dispatcher,
socket_path=self.path,
)
)
# Give the server a moment to bind before signalling ready.
await asyncio.sleep(0.1)
self._ready.set()
await task
try:
self._loop.run_until_complete(_serve())
except Exception:
pass
finally:
try:
self._loop.close()
except Exception:
pass
self._thread = threading.Thread(target=_run, daemon=True)
self._thread.start()
assert self._ready.wait(timeout=5.0), "threaded daemon failed to start"
def stop(self) -> None:
if self._loop is None:
return
if self.shutdown is not None:
self._loop.call_soon_threadsafe(self.shutdown.set)
self._thread and self._thread.join(timeout=5.0)
async def _send(path: Path, msg: dict, *, timeout: float = 5.0) -> dict:
reader, writer = await asyncio.open_unix_connection(str(path))
try:
writer.write((json.dumps(msg) + "\n").encode("utf-8"))
await writer.drain()
line = await asyncio.wait_for(reader.readline(), timeout=timeout)
return json.loads(line)
finally:
try:
writer.close()
await writer.wait_closed()
except Exception:
pass
def test_session_open_end_to_end_round_trip(
tmp_socket: Path, tmp_state: Path,
) -> None:
"""Real NDJSON round-trip over a unix socket — the 7th message type."""
state: dict = {"fsm_state": "WAKE"}
daemon = _ThreadedDaemon(tmp_socket, state)
daemon.start()
try:
resp = asyncio.run(
_send(
tmp_socket,
{
"type": "session_open",
"session_id": "e2e-sess-1",
"ts": "2026-04-19T12:00:00Z",
},
)
)
assert resp == {"ok": True, "reason": "session_open_queued"}
# State mutations visible to the test after the reply.
pending = state.get("first_turn_pending")
assert isinstance(pending, dict)
stamp = pending.get("e2e-sess-1")
assert isinstance(stamp, str) and stamp # ISO-8601 timestamp, post-fix
cascade = state.get("hippea_cascade_request")
assert isinstance(cascade, dict)
assert cascade.get("pending") is True
finally:
daemon.stop()
def test_session_open_does_not_regress_other_6_types(
tmp_socket: Path, tmp_state: Path,
) -> None:
"""Force_wake / force_rem / pause / resume / status / user_initiated_sleep
all still succeed end-to-end."""
state: dict = {"fsm_state": "WAKE"}
daemon = _ThreadedDaemon(tmp_socket, state)
daemon.start()
try:
# force_wake
r = asyncio.run(_send(tmp_socket, {"type": "force_wake", "ts": "x"}))
assert r == {"ok": True, "reason": "wake_queued"}
# force_rem
r = asyncio.run(_send(tmp_socket, {"type": "force_rem", "ts": "x"}))
assert r == {"ok": True, "reason": "rem_queued"}
# pause
r = asyncio.run(_send(tmp_socket, {"type": "pause"}))
assert r.get("ok") is True
# resume
r = asyncio.run(_send(tmp_socket, {"type": "resume"}))
assert r.get("ok") is True
# status
r = asyncio.run(_send(tmp_socket, {"type": "status"}))
assert r.get("ok") is True
# user_initiated_sleep (state is WAKE so this transitions)
r = asyncio.run(
_send(
tmp_socket,
{"type": "user_initiated_sleep", "reason": "night", "ts": "x"},
)
)
assert r.get("ok") is True
finally:
daemon.stop()

View file

@ -0,0 +1,516 @@
"""Phase 07.1 Plan 08 — R5 acceptance: concurrent wrapper cold-start regression trap.
THE regression-trap test that catches the precise scenario Phase 7's verifier
missed: N parallel wrapper cold-starts when no daemon exists.
SPEC R5 / A2 contract:
- PASSES on post-Phase-7.1 code (with launchd-managed listener):
bridge.ts is a pure connector (Plan 07.1-04) -> all 5 wrappers connect
to the SAME launchd-pre-bound socket -> launchd spawns the daemon
ONCE in response to the first connection -> all 5 wrappers share it.
- FAILS deterministically on pre-Phase-7.1 baseline:
bridge.ts spawn-fallback wins the TOCTOU race for multiple wrappers,
2-5 daemons end up bound, the singleton assertion fires.
Without this test, has the same verification gap had:
architectural code coverage without runtime invariant coverage. This test IS
the runtime invariant proof.
Test isolation: a per-test LaunchAgent with a unique Label
``com.iai-mcp.daemon.test-<pid>-<tmp_id>`` is rendered into ``tmp_path/
Library/LaunchAgents/`` (NOT the user's real ``~/Library/LaunchAgents/``,
to avoid pollution if teardown is interrupted) and loaded via
``launchctl load -w``. The test socket lives under
``/tmp/iai-cspawn-<pid>-<tmp_id>/d.sock`` (within macOS's 104-byte
AF_UNIX path cap). Teardown unloads the agent, removes the plist, kills
any spawned test daemon (env-filtered to never touch the user's real
production daemon), and removes the socket.
Total runtime: ~25-30s (5 staggered cold-starts + 15s settle + readline
poll). Override with ``IAI_MCP_SKIP_LAUNCHCTL_TESTS=1`` to skip.
This module is macOS-only (LaunchAgent + launchctl). Skipped on Linux/Windows.
"""
from __future__ import annotations
import json
import os
import platform
import select
import signal
import subprocess
import sys
import time
from pathlib import Path
import psutil
import pytest
REPO = Path(__file__).resolve().parent.parent
WRAPPER = REPO / "mcp-wrapper"
pytestmark = pytest.mark.skipif(
platform.system() != "Darwin",
reason="LaunchAgent + launchctl is macOS-only",
)
# ---------------------------------------------------------------------------
# Fixtures.
# ---------------------------------------------------------------------------
@pytest.fixture(scope="module")
def built_wrapper() -> Path:
"""Build the TS wrapper once per test module; reuse across tests.
Same pattern as ``tests/test_socket_subagent_reuse.py:built_wrapper``.
"""
if not (WRAPPER / "node_modules").exists():
subprocess.run(["npm", "install"], cwd=WRAPPER, check=True)
subprocess.run(["npm", "run", "build"], cwd=WRAPPER, check=True)
dist = WRAPPER / "dist" / "index.js"
assert dist.exists(), "npm run build should have produced dist/index.js"
return dist
@pytest.fixture
def test_launchagent(tmp_path):
"""Render + load a tmp LaunchAgent against an isolated test socket path.
The plist is written into ``tmp_path/Library/LaunchAgents/`` (NOT the
user's real ``~/Library/LaunchAgents/``) so any teardown failure leaves
no pollution under the user's home directory. ``launchctl load -w``
accepts any absolute plist path; the loaded agent is identified
internally by its ``Label`` value, which is unique per-test
(PID + ``tmp_path`` id).
[Rule 3 deviation] The base template only sets PATH/HOME/
IAI_MCP_LAUNCHD_MANAGED in EnvironmentVariables. Without
``IAI_DAEMON_SOCKET_PATH`` in env the launchd-spawned daemon picks up
the socket via fd 3 (LISTEN_FDS branch, Plan 07.1-02), but the
psutil-environ filter the test uses to count "daemons bound to this
test socket" returns 0 because the env var was never set in the
daemon's process environment. Inject ``IAI_DAEMON_SOCKET_PATH`` into
the rendered plist's EnvironmentVariables so the daemon process
carries it (harmlessly -- the launchd path ignores the env value and
uses fd 3) and the test's environ filter works.
Yields: ``(sock_path, plist_path, label, env)`` -- env is suitable for
spawning wrappers via subprocess.Popen.
"""
if os.environ.get("IAI_MCP_SKIP_LAUNCHCTL_TESTS") == "1":
pytest.skip("IAI_MCP_SKIP_LAUNCHCTL_TESTS=1")
# Use /tmp/ for the socket directory (macOS AF_UNIX 104-byte path cap;
# tmp_path under /private/var/folders/... is too long for some labels).
sock_dir = Path(f"/tmp/iai-cspawn-{os.getpid()}-{id(tmp_path) & 0xFFFFFF:x}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
if sock_path.exists():
sock_path.unlink()
label = f"com.iai-mcp.daemon.test-{os.getpid()}-{id(tmp_path) & 0xFFFFFF:x}"
# Render plist under tmp_path/Library/LaunchAgents/ (NOT the user's
# real ~/Library/LaunchAgents/ -- avoids pollution if teardown is
# interrupted on a dev box where the production daemon is OFF).
plist_dir = tmp_path / "Library" / "LaunchAgents"
plist_dir.mkdir(parents=True, exist_ok=True)
plist_path = plist_dir / f"{label}.plist"
# Read template and substitute placeholders. Then:
# 1. Replace the production label string ONLY at the
# <key>Label</key> binding site (anchor on the surrounding
# <string>...</string> so we don't accidentally rewrite the
# docstring comment block at the top, which mentions the
# production label by name).
# 2. Replace the production socket path with the test socket path.
# 3. Inject IAI_DAEMON_SOCKET_PATH and PYTHONPATH into
# EnvironmentVariables (Rule 3 fix -- without
# IAI_DAEMON_SOCKET_PATH in the daemon's process env, the
# psutil-environ filter cannot identify the launchd-spawned
# daemon as belonging to this test).
template = (REPO / "scripts" / "com.iai-mcp.daemon.plist.template").read_text()
label_old_xml = "<string>com.iai-mcp.daemon</string>"
label_new_xml = f"<string>{label}</string>"
if template.count(label_old_xml) != 1:
pytest.fail(
f"plist template invariant broken: expected exactly one "
f"<string>com.iai-mcp.daemon</string> occurrence (the "
f"<key>Label</key> binding); found "
f"{template.count(label_old_xml)}",
)
rendered = (
template
.replace("{PYTHON_PATH}", sys.executable)
.replace("{HOME}", str(Path.home()))
.replace(label_old_xml, label_new_xml)
.replace(
f"{Path.home()}/.iai-mcp/.daemon.sock",
str(sock_path),
)
.replace(
"<key>IAI_MCP_LAUNCHD_MANAGED</key>\n <string>1</string>",
"<key>IAI_MCP_LAUNCHD_MANAGED</key>\n <string>1</string>\n"
f" <key>IAI_DAEMON_SOCKET_PATH</key>\n <string>{sock_path}</string>\n"
f" <key>PYTHONPATH</key>\n <string>{REPO / 'src'}</string>",
)
)
plist_path.write_text(rendered)
# Pre-clean (idempotent). Ignore any "not loaded" errors.
subprocess.run(
["launchctl", "unload", "-w", str(plist_path)],
capture_output=True, check=False,
)
# Load the test LaunchAgent.
res = subprocess.run(
["launchctl", "load", "-w", str(plist_path)],
capture_output=True, text=True, check=False,
)
if res.returncode != 0:
# Common causes: TCC denial on macOS Sequoia/Sonoma, missing
# /Library/LaunchAgents permission, plist syntax error.
pytest.skip(f"launchctl load failed (rc={res.returncode}): {res.stderr.strip()}")
# Verify registration. If load returned 0 but the label is missing,
# something is off -- fail rather than silently skip.
list_res = subprocess.run(
["launchctl", "list"], capture_output=True, text=True, check=False,
)
if label not in list_res.stdout:
subprocess.run(
["launchctl", "unload", "-w", str(plist_path)],
capture_output=True, check=False,
)
pytest.fail(
f"LaunchAgent {label!r} not present in `launchctl list` after load",
)
env = {
**os.environ,
"IAI_MCP_PYTHON": sys.executable,
"PYTHONPATH": str(REPO / "src") + os.pathsep + os.environ.get("PYTHONPATH", ""),
"IAI_DAEMON_SOCKET_PATH": str(sock_path),
}
try:
yield sock_path, plist_path, label, env
finally:
# Teardown: unload, kill any spawned test daemon (env-filtered),
# remove socket file. The plist itself lives under tmp_path which
# pytest cleans up automatically.
subprocess.run(
["launchctl", "unload", "-w", str(plist_path)],
capture_output=True, check=False,
)
# Env-filtered daemon kill. NEVER touch the user's real production
# daemon (it would be running with the production socket path,
# not the tmp test socket path).
for proc in psutil.process_iter(["cmdline", "environ"]):
try:
cl = " ".join(proc.info.get("cmdline") or [])
if "iai_mcp.daemon" not in cl:
continue
penv = proc.info.get("environ") or {}
if penv.get("IAI_DAEMON_SOCKET_PATH") == str(sock_path):
proc.send_signal(signal.SIGTERM)
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# Brief settle, then second-pass SIGKILL on stragglers.
time.sleep(0.5)
for proc in psutil.process_iter(["cmdline", "environ"]):
try:
cl = " ".join(proc.info.get("cmdline") or [])
if "iai_mcp.daemon" not in cl:
continue
penv = proc.info.get("environ") or {}
if penv.get("IAI_DAEMON_SOCKET_PATH") == str(sock_path):
proc.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
try:
sock_path.unlink()
except (FileNotFoundError, OSError):
pass
try:
sock_dir.rmdir()
except OSError:
pass
# ---------------------------------------------------------------------------
# Helpers.
# ---------------------------------------------------------------------------
def _spawn_wrapper_send_initialize(
built_wrapper: Path, env: dict,
) -> subprocess.Popen:
"""Spawn one wrapper subprocess; send MCP initialize on stdin.
Returns the Popen handle. Caller polls stdout (with select+timeout) to
read the initialize response after the daemon settle window expires.
"""
proc = subprocess.Popen(
["node", str(built_wrapper)],
cwd=str(REPO),
env=env,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
init_req = {
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {
"protocolVersion": "2025-03-26",
"capabilities": {},
"clientInfo": {"name": "concurrent-spawn-test", "version": "0.0"},
},
}
try:
assert proc.stdin is not None
proc.stdin.write((json.dumps(init_req) + "\n").encode("utf-8"))
proc.stdin.flush()
except BrokenPipeError:
# Wrapper crashed before reading stdin; readline below will see
# empty bytes and the test will report 0/5 successes.
pass
return proc
def _read_initialize_response(
proc: subprocess.Popen, timeout_sec: float = 2.0,
) -> dict | None:
"""Poll wrapper stdout for one JSON-RPC line (the initialize response)."""
if proc.stdout is None:
return None
try:
ready, _, _ = select.select([proc.stdout], [], [], timeout_sec)
if not ready:
return None
line = proc.stdout.readline()
if not line:
return None
return json.loads(line.decode("utf-8"))
except (json.JSONDecodeError, OSError):
return None
def _count_daemons_for_socket(sock_path: Path) -> int:
"""Count iai_mcp.daemon processes whose env points at sock_path.
The launchd-spawned daemon picks up its socket via fd 3 (LISTEN_FDS),
not env -- but the test plist's EnvironmentVariables block sets
IAI_DAEMON_SOCKET_PATH so this filter works. The daemon process
inherits the env from launchd; the launchd path ignores the env value
when binding (uses fd 3), making the env var purely a tag for
test isolation.
"""
count = 0
sock_str = str(sock_path)
for proc in psutil.process_iter(["cmdline", "environ"]):
try:
cl = " ".join(proc.info.get("cmdline") or [])
if "iai_mcp.daemon" not in cl:
continue
env = proc.info.get("environ") or {}
if env.get("IAI_DAEMON_SOCKET_PATH") == sock_str:
count += 1
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return count
def _count_binders(sock_path: Path) -> int:
"""Count distinct PIDs that hold sock_path open (lsof -U)."""
res = subprocess.run(
["lsof", "-U", "-F", "pn"],
capture_output=True, text=True, check=False,
)
pids: set[int] = set()
current: int | None = None
target = str(sock_path)
for line in res.stdout.splitlines():
if line.startswith("p"):
try:
current = int(line[1:])
except ValueError:
current = None
elif line.startswith("n") and current is not None and line[1:] == target:
pids.add(current)
return len(pids)
# ---------------------------------------------------------------------------
# Tests.
# ---------------------------------------------------------------------------
def test_5_concurrent_wrapper_cold_starts_yield_singleton(
built_wrapper, test_launchagent,
):
"""SPEC R5 / A2: 5 staggered cold-starts -> exactly 1 daemon after settle.
Setup (via test_launchagent fixture):
- Tmp LaunchAgent loaded against an isolated test socket path.
- Plist has RunAtLoad=false. Empirically (macOS Sequoia 15.x),
launchctl load -w for a Sockets-activated agent may spawn the
daemon eagerly anyway -- the test tolerates this via the
relaxed pre-condition (<= 1) and asserts the singleton
invariant on the post-condition (== 1).
Body:
- Spawn 5 wrapper subprocesses with staggered start times
(~0/50/100/150/200 ms apart). Each sends MCP initialize.
- Wait 15s for the daemon to settle (cold-start ~8s embedder
load + LanceDB open + buffer).
- Read each wrapper's initialize response (with 2s readline
timeout per wrapper -- they should all be ready by t+15s).
- Terminate wrappers (releases their connect-side fds before the
binder count assertion).
Assertions:
(a) ``_count_daemons_for_socket(sock_path) == 1`` -- exactly one
iai_mcp.daemon process bound to this test socket. The
singleton invariant.
(b) ``_count_binders(sock_path) <= 1`` -- lsof reports at most
one process holding the socket file. Wrappers are clients
of the abstract socket connection, not file-holders -- after
their fds close they don't show up here. The launchd
pre-bound listener is owned by launchd itself, which may
or may not appear in lsof depending on the version.
(c) all 5 wrapper subprocesses received a successful MCP
initialize JSON-RPC response.
On post-Phase-7.1 code (current main): bridge.ts is a pure connector
(Plan 07.1-04 deleted spawn-fallback). All 5 wrappers connect to the
SAME launchd-pre-bound socket, launchd's spawn-once contract gives
them the SAME daemon, all 3 assertions hold. THIS is what the test
proves.
Regression-trap caveat: the SPEC framing of "FAILS deterministically
on pre-Phase-7.1 baseline" turned out to be platform-conditional. On
macOS Sequoia 15.x, ``launchctl load -w`` eagerly spawns the daemon
when the plist has Sockets defined (despite RunAtLoad=false). With
the launchd-pre-bound socket already up and a daemon already bound,
pre-Phase-7.1 bridge.ts would also succeed -- its spawn-fallback
would never fire because the initial connect succeeds. This test
therefore PROVES the post-Phase-7.1 invariant cleanly (its primary
job) but is NOT a deterministic regression trap on macOS Sequoia.
On older macOS versions where launchctl-load defers spawn until
first connection, the regression-trap behavior would hold. See the
SUMMARY's "Regression-trap caveat" section for the deferred-items
note on a true-TOCTOU test architecture.
"""
sock_path, plist_path, label, env = test_launchagent
# Pre-condition: at most 1 daemon bound to this socket. RunAtLoad=false
# in the plist is documented as "spawn lazily on first connection",
# but on macOS Sequoia (15.x) `launchctl load -w` for a Sockets-
# activated agent eagerly spawns the daemon despite RunAtLoad=false.
# Empirically verified: the daemon may be PID-listed immediately
# after `launchctl load -w` returns. This does NOT defeat the
# singleton invariant -- it just shifts the spawn moment. The
# critical assertion is the post-condition (`== 1` after 5 wrappers),
# not whether the daemon was 0 or 1 before.
initial_daemon_count = _count_daemons_for_socket(sock_path)
assert initial_daemon_count <= 1, (
f"expected <= 1 daemon before test, found {initial_daemon_count} "
f"(stale daemons from earlier test? cleanup leak?)"
)
# Spawn 5 wrappers staggered by ~50 ms each. Total stagger window
# ~200 ms -- well within the launchd socket-activation race window
# this test exercises.
procs: list[subprocess.Popen] = []
stagger_intervals = [0.0, 0.05, 0.05, 0.05, 0.05]
for delay in stagger_intervals:
if delay > 0:
time.sleep(delay)
procs.append(_spawn_wrapper_send_initialize(built_wrapper, env))
# Wait 15s for the daemon to settle. Cold start = 8s embedder load
# + LanceDB open + buffer. Per advisor: do NOT shorten this -- the
# 8s embedder cold-start is the empirical reality.
time.sleep(15)
# Read each wrapper's initialize response.
init_responses: list[dict | None] = [
_read_initialize_response(p, timeout_sec=2.0) for p in procs
]
# Snapshot the singleton + binder counts BEFORE terminating wrappers.
# Terminating may take 2s+ per wrapper; we want the assertion to fire
# against the steady state we just observed.
daemon_count = _count_daemons_for_socket(sock_path)
binder_count = _count_binders(sock_path)
# Cleanup wrappers (release their connect-side fds; daemon still up
# for the fixture teardown to handle).
for proc in procs:
try:
proc.terminate()
proc.wait(timeout=2)
except subprocess.TimeoutExpired:
proc.kill()
# Assertion (a) -- THE singleton invariant.
assert daemon_count == 1, (
f"singleton invariant violated: {daemon_count} daemons bound to "
f"{sock_path} after 5 concurrent wrapper cold-starts. "
f"contract: launchd handles the spawn-once; all wrappers join "
f"the same daemon. Pre-Phase-7.1 baseline reproduces 2-5 daemons "
f"via TOCTOU race in bridge.ts spawn-fallback."
)
# Assertion (b) -- file-holder confirmation. Either 0 (the socket
# file is owned by launchd's pre-bind, not a daemon process fd entry)
# or 1 (the spawned daemon also shows in lsof). In either case the
# COUNT must be <= 1: 2+ would mean dueling binders.
assert binder_count <= 1, (
f"lsof reports {binder_count} binders for {sock_path}; "
f"expected <= 1 (singleton)"
)
# Assertion (c) -- all 5 wrappers handshook successfully. A wrapper
# that received an initialize result proves it connected to a real
# daemon and got a real response (not just a launchd-side accept).
success_count = sum(
1 for r in init_responses if r is not None and "result" in r
)
assert success_count == 5, (
f"only {success_count}/5 wrappers received successful initialize "
f"response. Responses: {init_responses}"
)
@pytest.mark.skip(
reason="manual baseline regression check; run only against pre-Phase-7.1 "
"(git stash) to demonstrate the regression-trap behavior",
)
def test_pre_phase_7_1_baseline_fails():
"""Documentation marker: how to run against the pre-7.1 baseline.
Manual procedure to demonstrate the regression-trap behavior:
1. ``git stash`` (or ``git checkout <pre-7.1-commit>``)
2. ``cd mcp-wrapper && npm run build`` (rebuild bridge.ts with
the spawn-fallback restored)
3. ``pytest tests/test_concurrent_wrapper_spawn.py::\\
test_5_concurrent_wrapper_cold_starts_yield_singleton -v``
4. Expected: assertion (a) FAILS with daemon_count >= 2 (the
TOCTOU race produces multiple daemons that all bind in
parallel before any of them notice the others).
5. ``git stash pop`` (or ``git checkout main``) to restore
Phase 7.1.
6. Rebuild + rerun: assertion passes.
The executor of Plan 07.1-08 cannot easily git-stash mid-execution
(stashing would break the test file itself, which lives in the
working tree). Future verification: a maintainer who wants to
re-prove the regression-trap behavior follows the procedure above.
"""
pass

View file

@ -0,0 +1,143 @@
"""Tests for the consolidated_from edge type (MEM-07, D-16, D-29).
After run_heavy_consolidation:
- `consolidated_from` edges link the semantic summary record to each source
episodic record in its cluster.
- src = summary record (tier=semantic); dst = source episode.
- Source episodes keep their literal_surface verbatim (MEM-01 preservation).
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import UUID, uuid4
import pytest
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _record(text: str, tier: str = "episodic") -> MemoryRecord:
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier=tier,
literal_surface=text,
aaak_index="",
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=[],
language="en",
)
def _run_heavy(store):
from iai_mcp.guard import BudgetLedger, RateLimitLedger
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
return run_heavy_consolidation(
store,
session_id="s-cfr",
config=SleepConfig(llm_enabled=False),
budget=BudgetLedger(store),
rate=RateLimitLedger(store),
has_api_key=False,
)
def test_consolidated_from_edge_created_on_heavy_run(tmp_path):
"""Cohesive cluster of 3 -> at least one consolidated_from edge."""
from iai_mcp.store import EDGES_TABLE, MemoryStore
store = MemoryStore(path=tmp_path)
recs = [_record(f"rec {i}") for i in range(3)]
for r in recs:
store.insert(r)
# Triangle: all three connected
store.boost_edges(
[(recs[0].id, recs[1].id), (recs[1].id, recs[2].id), (recs[0].id, recs[2].id)],
edge_type="hebbian", delta=0.5,
)
_run_heavy(store)
df = store.db.open_table(EDGES_TABLE).to_pandas()
cf = df[df["edge_type"] == "consolidated_from"]
assert len(cf) >= 3
def test_consolidated_from_edge_points_semantic_to_episodes(tmp_path):
"""src of consolidated_from is the summary record (tier=semantic);
dst is a source episode (tier=episodic)."""
from iai_mcp.store import EDGES_TABLE, MemoryStore
store = MemoryStore(path=tmp_path)
recs = [_record(f"rec {i}") for i in range(3)]
for r in recs:
store.insert(r)
store.boost_edges(
[(recs[0].id, recs[1].id), (recs[1].id, recs[2].id), (recs[0].id, recs[2].id)],
edge_type="hebbian", delta=0.5,
)
_run_heavy(store)
df = store.db.open_table(EDGES_TABLE).to_pandas()
cf = df[df["edge_type"] == "consolidated_from"]
assert not cf.empty
source_ids = {str(r.id) for r in recs}
for _, row in cf.iterrows():
# Either src or dst is a summary (not in our original source_ids);
# the other should be one of our source episodes.
if row["src"] not in source_ids and row["dst"] in source_ids:
# Fetch the summary record
summary = store.get(UUID(row["src"]))
assert summary is not None
assert summary.tier == "semantic"
dst_rec = store.get(UUID(row["dst"]))
assert dst_rec is not None
assert dst_rec.tier == "episodic"
elif row["dst"] not in source_ids and row["src"] in source_ids:
# boost_edges canonicalises (src, dst) as sorted -- either direction
summary = store.get(UUID(row["dst"]))
assert summary is not None
assert summary.tier == "semantic"
else:
# Edge between two source records -- that's wrong for consolidated_from.
pytest.fail(
f"consolidated_from edge without a summary endpoint: "
f"{row['src']} -> {row['dst']}"
)
def test_consolidated_from_edges_preserve_literal_in_episodes(tmp_path):
"""source episodes' literal_surface unchanged after consolidation."""
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
literals = ["alpha", "beta", "gamma"]
recs = [_record(t) for t in literals]
for r in recs:
store.insert(r)
store.boost_edges(
[(recs[0].id, recs[1].id), (recs[1].id, recs[2].id), (recs[0].id, recs[2].id)],
edge_type="hebbian", delta=0.5,
)
_run_heavy(store)
for rec, expected in zip(recs, literals):
reloaded = store.get(rec.id)
assert reloaded is not None
assert reloaded.literal_surface == expected

View file

@ -0,0 +1,313 @@
"""Grep-based static guards for constitutional invariants.
Verifies C1..C6 hold across the daemon-side module set.
Catalog:
- C3: no ANTHROPIC_API_KEY anywhere in daemon-side code.
- Pitfall 2: no fcntl.lockf (close-fd trap) anywhere in src/iai_mcp/.
- C5: no assignment to `.literal_surface` in daemon-side modules.
- no hardcoded Western clock-time in quiet_window.py.
- seal: PROFILE_KNOBS still has exactly 14 entries (daemon does NOT
add knobs).
- C6: identity_audit.py does NOT import ProcessLock / concurrency module.
"""
from __future__ import annotations
import re
from pathlib import Path
SRC = Path(__file__).resolve().parent.parent / "src" / "iai_mcp"
# Daemon-side modules. Some (bedtime, host_cli) may not exist yet (future
# plans). We scan whichever ones exist today.
DAEMON_MODULES: tuple[str, ...] = (
"daemon.py",
"dream.py",
"identity_audit.py",
"bedtime.py",
"host_cli.py",
"insight.py",
"quiet_window.py",
"daemon_state.py",
"concurrency.py",
"hippea_cascade.py", # TOK-14 / D5-05
)
def _existing_daemon_files() -> list[Path]:
return [SRC / n for n in DAEMON_MODULES if (SRC / n).exists()]
# ---------------------------------------------------------------------------
# C3: ANTHROPIC_API_KEY must never appear in daemon-side code
# ---------------------------------------------------------------------------
def test_no_api_key_in_daemon():
"""C3 (DAEMON-07 / D-14): zero paid-API cost. ANTHROPIC_API_KEY must not
appear in ANY daemon-side module. Insight module uses `claude -p`
subprocess with the user's subscription instead."""
offenders: list[str] = []
for f in _existing_daemon_files():
text = f.read_text()
if "ANTHROPIC_API_KEY" in text:
offenders.append(f.name)
assert not offenders, f"C3 violation: ANTHROPIC_API_KEY found in {offenders}"
# ---------------------------------------------------------------------------
# Pitfall 2: fcntl.lockf must never be used (POSIX close-fd trap)
# ---------------------------------------------------------------------------
def test_no_lockf_anywhere():
"""Pitfall 2 (apenwarr 2010): POSIX fcntl.lockf is released when ANY fd
referring to the same file is closed. We must use BSD fcntl.flock which
is bound to the open file description. Scan ALL iai_mcp/*.py, not just
daemon modules -- mixing the two is also a bug."""
offenders: list[str] = []
for f in SRC.glob("*.py"):
text = f.read_text()
if "fcntl.lockf" in text:
offenders.append(f.name)
assert not offenders, f"Pitfall 2 violation: fcntl.lockf in {offenders}"
# ---------------------------------------------------------------------------
# C5: daemon must NEVER assign to record.literal_surface
# ---------------------------------------------------------------------------
def test_no_literal_surface_mutation_in_daemon():
"""C5 literal preservation. Daemon-side modules must not contain
`.literal_surface =` assignment syntax. Reading `.literal_surface` is
allowed; writing is forbidden."""
pattern = re.compile(r"\.literal_surface\s*=")
offenders: list[tuple[str, list[str]]] = []
for f in _existing_daemon_files():
text = f.read_text()
matches = pattern.findall(text)
if matches:
offenders.append((f.name, matches))
assert not offenders, f"C5 violation: {offenders}"
# ---------------------------------------------------------------------------
# no hardcoded Western 9-5 / clock-time in quiet_window.py
# ---------------------------------------------------------------------------
def test_no_hardcoded_clock_time_in_quiet_window():
"""D-05 global-product mandate: quiet window must be LEARNED from event
history, never hardcoded. Flag obvious clock-time literals."""
f = SRC / "quiet_window.py"
if not f.exists():
return # module not yet created
text = f.read_text()
# Look for common patterns that would indicate clock-based decisions.
forbidden = [
r"\b22:00\b",
r"\b02:00\b",
r"hour\s*==\s*22\b",
r"hour\s*==\s*2\b",
]
offenders: list[str] = []
for pat in forbidden:
if re.search(pat, text):
offenders.append(pat)
assert not offenders, (
f"D-05 violation: hardcoded clock-time patterns in quiet_window.py: {offenders}"
)
# ---------------------------------------------------------------------------
# Plan 07.12-02 seal: PROFILE_KNOBS has exactly 11 entries
# (10 autistic-kernel + 1 operator wake_depth MCP-12; AUTIST-02/08/11/12 removed)
# ---------------------------------------------------------------------------
def test_profile_knobs_still_sealed():
"""11-knob registry is sealed (Phase 07.12-02 post AUTIST-02/08/11/12 removal).
Daemon must not add new knobs. Transient state (hebbian-rate boost during
developmental sigma, etc.) belongs in events or .daemon-state.json,
never in PROFILE_KNOBS."""
from iai_mcp import profile
assert len(profile.PROFILE_KNOBS) == 11, (
f"PROFILE_KNOBS unseal: expected 11, got {len(profile.PROFILE_KNOBS)}"
)
# ---------------------------------------------------------------------------
# TOK-13 / D5-04: profile knob names must NEVER appear in the
# session-start payload at any wake_depth. Knobs are applied server-side via
# response_decorator.apply_profile; their names must not cross the MCP wire.
# ---------------------------------------------------------------------------
def test_no_profile_knob_in_session_start_payload(tmp_path):
"""TOK-13: knob names must not leak into the NEW pointer fields at
wake_depth=minimal (<=30 raw tok design budget).
The legacy L0 identity kernel (`_seed_l0_identity`) historically recites
a handful of autistic-kernel defaults inline in the literal_surface
('literal_preservation=strong, masking_off=true, ...'). That predates
TOK-13 and lives inside the user's identity record itself, not a
decorator output so it's scoped into the standard/deep l0 segment and
explicitly exempt from this grep guard.
The invariant this guard DEFENDS is: the lazy minimal payload
(identity_pointer / brain_handle / topic_cluster_hint) MUST NOT contain
knob names. Knobs are applied server-side by response_decorator
(Plan 05-03 D5-04); knob names must never reach the MCP wire.
"""
from iai_mcp import profile
from iai_mcp.community import CommunityAssignment
from iai_mcp.core import _seed_l0_identity
from iai_mcp.session import assemble_session_start
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
_seed_l0_identity(store)
assignment = CommunityAssignment()
for mode in ("minimal", "standard", "deep"):
state = profile.default_state()
state["wake_depth"] = mode
payload = assemble_session_start(
store, assignment, [], profile_state=state,
)
# Only scan the NEW lazy fields. Legacy l0 / l1 / l2 / rich_club
# carry user-authored identity content and remain exempt per design.
lazy_text = " ".join(
[
payload.identity_pointer,
payload.brain_handle,
payload.topic_cluster_hint,
],
)
for knob_name in profile.PROFILE_KNOBS:
# wake_depth is the operator-facing knob; its echo in the
# payload field `wake_depth` is a meta-attribute, not inline
# knob text in the lazy pointers.
assert knob_name not in lazy_text, (
f"TOK-13 violation: knob name '{knob_name}' found in "
f"lazy session-start payload at wake_depth={mode} "
f"(identity_pointer/brain_handle/topic_cluster_hint)"
)
# ---------------------------------------------------------------------------
# Pitfall 1: wake_depth=minimal payload (<=30 raw tok) is below the
# Anthropic Sonnet 4.6 cache minimum (2048 tok). Adding cache_control in
# session.py would be silently ignored — wastes a breakpoint slot. Guard
# against accidental regression.
# ---------------------------------------------------------------------------
def test_no_cache_control_in_session_assembler():
"""Pitfall 1: session.py must not set cache_control (minimal prefix
cannot be cached on Sonnet 4.6 / Opus 4.7; standard+deep caching lives
in the TS wrapper, not the Python assembler).
"""
f = SRC / "session.py"
assert f.exists(), "session.py missing"
text = f.read_text()
# Comments that mention "cache_control" are fine (they document the
# pitfall). We only guard against actual code references like setattr/
# cache_control=... — scan for the pattern with an equals sign.
pattern = re.compile(r"cache_control\s*[:=]")
offenders = pattern.findall(text)
assert not offenders, (
f"Pitfall 1 violation: cache_control assignment/kwarg in session.py: "
f"{offenders}"
)
# ---------------------------------------------------------------------------
# C3 + TOK-13: response_decorator must be pure-local. No Anthropic
# SDK import, no ANTHROPIC_API_KEY read, no paid-API coupling.
# ---------------------------------------------------------------------------
def test_no_api_key_in_response_decorator():
"""C3 + TOK-13: response_decorator.py stays local-only."""
f = SRC / "response_decorator.py"
assert f.exists(), "response_decorator.py missing after Plan 05-03"
text = f.read_text()
lower = text.lower()
assert "anthropic" not in lower, (
"C3 violation: response_decorator references 'anthropic'"
)
assert "ANTHROPIC_API_KEY" not in text, (
"C3 violation: response_decorator references ANTHROPIC_API_KEY"
)
assert "import anthropic" not in lower, (
"C3 violation: response_decorator imports anthropic SDK"
)
# ---------------------------------------------------------------------------
# C6: identity_audit.py must not import ProcessLock
# ---------------------------------------------------------------------------
def test_identity_audit_has_no_lock_import():
"""C6: continuous audit runs even when daemon is paused. To make that
invariant mechanical, identity_audit.py must NOT import the concurrency
module -- the only way to accidentally take a lock is to import it."""
f = SRC / "identity_audit.py"
if not f.exists():
return
text = f.read_text()
# No import of iai_mcp.concurrency, no `ProcessLock` symbol reference.
assert "iai_mcp.concurrency" not in text, (
"C6 violation: identity_audit.py imports iai_mcp.concurrency"
)
assert "ProcessLock" not in text, (
"C6 violation: identity_audit.py references ProcessLock"
)
# Also: no `fcntl.` calls (belt-and-braces).
assert "fcntl." not in text, (
"C6 violation: identity_audit.py uses fcntl directly"
)
# ---------------------------------------------------------------------------
# TOK-14: HIPPEA cascade module guards
# ---------------------------------------------------------------------------
def test_no_api_key_in_hippea_cascade():
"""C3 (D5-05): HIPPEA cascade is pure-local. ANTHROPIC_API_KEY and
`anthropic` SDK imports are forbidden in hippea_cascade.py."""
f = SRC / "hippea_cascade.py"
if not f.exists():
return # module not yet created
text = f.read_text()
assert "ANTHROPIC_API_KEY" not in text, (
"C3 violation: ANTHROPIC_API_KEY in hippea_cascade.py"
)
assert "import anthropic" not in text, (
"C3 violation: `import anthropic` in hippea_cascade.py"
)
assert "from anthropic" not in text, (
"C3 violation: `from anthropic` in hippea_cascade.py"
)
def test_hippea_cascade_is_read_only_against_store():
"""C6 (D5-05): cascade prefetch never mutates the store.
Grep for store-mutating call patterns (with trailing open-paren so the
module's own enumerated-forbidden list in the docstring does not trip
this guard).
"""
f = SRC / "hippea_cascade.py"
if not f.exists():
return
text = f.read_text()
forbidden_calls = [
"store.insert(",
"store.append_provenance(",
"store.append_provenance_batch(",
"store.update(",
"store.boost_edges(",
"store.add_contradicts_edge(",
]
offenders = [p for p in forbidden_calls if p in text]
assert not offenders, (
f"C6 violation: hippea_cascade.py contains store mutators: {offenders}"
)

View file

@ -0,0 +1,426 @@
"""Tests for core.py additions -- DAEMON-06 / DAEMON-09.
Covers 8 behaviours:
1. consent=False short-circuits: socket is NEVER opened (C2 guard)
2. consent=True opens socket, sends NDJSON, returns daemon response
3. Missing / wrong-typed consent raises ValueError (ASVS V5 schema)
4. force_wake opens socket, sends NDJSON with 900s timeout
5. force_wake handles daemon-unreachable gracefully
6. memory_recall dispatch injects sleep_suggestion when dual-gate passes
7. memory_recall dispatch does NOT include sleep_suggestion key when gate fails
8. memory_recall does NOT break if detect_wind_down raises (silent fail)
"""
from __future__ import annotations
import asyncio
import json
import os
import tempfile
import threading
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import patch
import pytest
from iai_mcp import core
# ----------------------------------------------------------- threaded helper
class _ThreadedFakeDaemon:
"""Fake daemon that survives across multiple asyncio.run() calls.
`core.dispatch` uses its own asyncio.run per JSON-RPC method, which tears
down the event loop each call. A server started via asyncio.run() inside
the test body dies when that call returns, so the next asyncio.run can
connect to the socket file but no task is accepting -> timeout. Running
the server on a private background loop in a daemon thread keeps the
accept loop alive for the full test lifetime.
"""
def __init__(self, path: Path, captured: list, reply: dict) -> None:
self.path = path
self.captured = captured
self.reply = reply
self._loop: asyncio.AbstractEventLoop | None = None
self._server: asyncio.AbstractServer | None = None
self._thread: threading.Thread | None = None
self._ready = threading.Event()
def start(self) -> None:
def _run() -> None:
self._loop = asyncio.new_event_loop()
asyncio.set_event_loop(self._loop)
async def _handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
try:
line = await reader.readline()
if line:
self.captured.append(json.loads(line.decode("utf-8")))
writer.write((json.dumps(self.reply) + "\n").encode("utf-8"))
await writer.drain()
finally:
try:
writer.close()
await writer.wait_closed()
except Exception:
pass
async def _serve() -> None:
self.path.parent.mkdir(parents=True, exist_ok=True)
self._server = await asyncio.start_unix_server(_handle, path=str(self.path))
self._ready.set()
async with self._server:
await self._server.serve_forever()
try:
self._loop.run_until_complete(_serve())
except asyncio.CancelledError:
pass
finally:
self._loop.close()
self._thread = threading.Thread(target=_run, daemon=True)
self._thread.start()
assert self._ready.wait(timeout=5.0), "fake daemon failed to start within 5s"
def stop(self) -> None:
loop = self._loop
if loop is None:
return
async def _shutdown() -> None:
if self._server is not None:
self._server.close()
await self._server.wait_closed()
fut = asyncio.run_coroutine_threadsafe(_shutdown(), loop)
try:
fut.result(timeout=5.0)
except Exception:
pass
loop.call_soon_threadsafe(loop.stop)
if self._thread is not None:
self._thread.join(timeout=5.0)
# ---------------------------------------------------------------- fixtures
@pytest.fixture
def tmp_socket(tmp_path: Path) -> Path:
"""Provide a short unique unix-socket path.
Unix domain sockets have a ~104-byte path limit on macOS; tmp_path can be
too long when driven by `pytest-xdist` worker names. Fall back to /tmp
when tmp_path would overflow.
"""
candidate = tmp_path / "d.sock"
if len(str(candidate)) > 100:
candidate = Path(tempfile.mkdtemp(prefix="iai-sock-")) / "d.sock"
return candidate
async def _run_fake_server(
sock: Path,
captured: list,
reply: dict,
*,
delay_before_reply: float = 0.0,
) -> asyncio.AbstractServer:
"""Spin up a single-shot fake daemon over unix socket.
Reads one NDJSON line, records it in `captured`, sleeps `delay_before_reply`
seconds, writes `reply` as NDJSON back, closes. Returns the server object
so the caller can close it afterwards.
"""
async def _handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
try:
line = await reader.readline()
if line:
captured.append(json.loads(line.decode("utf-8")))
if delay_before_reply > 0:
await asyncio.sleep(delay_before_reply)
writer.write((json.dumps(reply) + "\n").encode("utf-8"))
await writer.drain()
finally:
try:
writer.close()
await writer.wait_closed()
except Exception:
pass
sock.parent.mkdir(parents=True, exist_ok=True)
return await asyncio.start_unix_server(_handle, path=str(sock))
# ---------------------------------------------------------------- consent gate
def test_consent_false_short_circuits_no_socket_touch(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""C2 invariant: consent=False must NEVER open the daemon socket."""
async def _explode(*args, **kwargs):
raise AssertionError(
"C2 violation: asyncio.open_unix_connection reached with consent=False"
)
monkeypatch.setattr(asyncio, "open_unix_connection", _explode)
result = asyncio.run(
core.handle_initiate_sleep_mode({"consent": False, "reason": "not ready"})
)
assert result == {"ok": False, "reason": "consent_declined"}
def test_consent_missing_raises_value_error() -> None:
with pytest.raises(ValueError, match="consent"):
asyncio.run(core.handle_initiate_sleep_mode({"reason": "missing"}))
def test_consent_wrong_type_raises_value_error() -> None:
# Strings / ints / None must all be rejected; only literal bool passes.
for bad in ["true", 1, 0, None, [True]]:
with pytest.raises(ValueError):
asyncio.run(
core.handle_initiate_sleep_mode({"consent": bad, "reason": "x"})
)
def test_reason_missing_raises_value_error() -> None:
with pytest.raises(ValueError, match="reason"):
asyncio.run(core.handle_initiate_sleep_mode({"consent": True}))
def test_reason_wrong_type_raises_value_error() -> None:
with pytest.raises(ValueError, match="reason"):
asyncio.run(
core.handle_initiate_sleep_mode({"consent": True, "reason": 42})
)
def test_consent_true_opens_socket_and_returns_reply(
tmp_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""consent=True path: real socket round-trip against a fake daemon."""
captured: list[dict] = []
async def _runner() -> dict:
server = await _run_fake_server(
tmp_socket, captured, {"ok": True, "state": "TRANSITIONING"},
)
try:
async with server:
# Monkeypatch core's SOCKET_PATH so _send_to_daemon uses ours.
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
return await core.handle_initiate_sleep_mode(
{"consent": True, "reason": "good night"},
)
finally:
server.close()
await server.wait_closed()
result = asyncio.run(_runner())
assert result == {"ok": True, "state": "TRANSITIONING"}
assert len(captured) == 1
sent = captured[0]
assert sent["type"] == "user_initiated_sleep"
assert sent["reason"] == "good night"
assert "ts" in sent # ISO timestamp attached
def test_consent_true_daemon_unreachable_returns_graceful_error(
tmp_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Daemon down (socket file absent) must return daemon_not_running."""
# Do NOT start a server.
assert not tmp_socket.exists()
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
result = asyncio.run(
core.handle_initiate_sleep_mode(
{"consent": True, "reason": "night"},
)
)
assert result["ok"] is False
assert result["reason"] == "daemon_not_running"
# ---------------------------------------------------------------- force_wake
def test_force_wake_sends_correct_message(
tmp_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: list[dict] = []
async def _runner() -> dict:
server = await _run_fake_server(
tmp_socket, captured, {"ok": True, "state": "WAKE"},
)
try:
async with server:
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
return await core.handle_force_wake({})
finally:
server.close()
await server.wait_closed()
result = asyncio.run(_runner())
assert result == {"ok": True, "state": "WAKE"}
assert len(captured) == 1
assert captured[0]["type"] == "force_wake"
assert "ts" in captured[0]
def test_force_wake_daemon_unreachable_graceful(
tmp_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
assert not tmp_socket.exists()
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
result = asyncio.run(core.handle_force_wake({}))
assert result["ok"] is False
assert result["reason"] == "daemon_not_running"
def test_force_wake_timeout_is_fifteen_minutes() -> None:
"""cooperative cap is 15 minutes = 900 seconds."""
assert core.FORCE_WAKE_TIMEOUT_SEC == 900
# ---------------------------------------------------------------- inject helper
def _window_covering_now() -> tuple[int, int]:
"""Return a quiet_window (start_bucket, duration) that contains `now`.
Uses the current local time so the dual-gate is satisfied deterministically
regardless of the test-host clock.
"""
from iai_mcp.tz import load_user_tz
tz = load_user_tz()
now_local = datetime.now(timezone.utc).astimezone(tz)
cur_bucket = (now_local.hour * 60 + now_local.minute) // 30
# Make the window start 2 buckets (1h) before now and last 4h (8 buckets).
start = (cur_bucket - 2) % 48
return (start, 8)
def test_inject_sleep_suggestion_dual_gate_pass(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When phrase + window both pass, response gains sleep_suggestion."""
fake_state = {"quiet_window": _window_covering_now()}
def _load() -> dict:
return dict(fake_state)
monkeypatch.setattr("iai_mcp.daemon_state.load_state", _load)
response: dict = {"hits": [], "anti_hits": []}
core._inject_sleep_suggestion(response, cue="good night", language="en")
assert "sleep_suggestion" in response, (
f"expected injection on dual-gate pass, got {response!r}"
)
assert response["sleep_suggestion"]["message_hint"] == "user_wind_down_detected"
def test_inject_sleep_suggestion_no_phrase(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""No phrase match -> response has no sleep_suggestion key."""
fake_state = {"quiet_window": _window_covering_now()}
monkeypatch.setattr(
"iai_mcp.daemon_state.load_state",
lambda: dict(fake_state),
)
response: dict = {"hits": [], "anti_hits": []}
core._inject_sleep_suggestion(
response, cue="how do I configure pytest", language="en",
)
assert "sleep_suggestion" not in response
def test_inject_sleep_suggestion_no_window(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Phrase match but no quiet_window -> response has no sleep_suggestion."""
monkeypatch.setattr("iai_mcp.daemon_state.load_state", lambda: {})
response: dict = {"hits": [], "anti_hits": []}
core._inject_sleep_suggestion(response, cue="good night", language="en")
assert "sleep_suggestion" not in response
def test_inject_sleep_suggestion_detector_raises_is_silent(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""If detect_wind_down raises, response goes out untouched."""
def _boom(*args, **kwargs):
raise RuntimeError("synthetic bedtime failure")
monkeypatch.setattr("iai_mcp.bedtime.detect_wind_down", _boom)
response: dict = {"hits": [], "anti_hits": [], "budget_used": 0}
# Must not propagate the RuntimeError.
core._inject_sleep_suggestion(response, cue="good night", language="en")
assert "sleep_suggestion" not in response
# Pre-existing keys untouched.
assert response == {"hits": [], "anti_hits": [], "budget_used": 0}
# ---------------------------------------------------------------- dispatch wiring
def test_dispatch_routes_initiate_sleep_mode(
tmp_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""The synchronous `core.dispatch` entrypoint must route the new
methods through asyncio.run -- verified by having a fake daemon
respond to a real socket round-trip.
The fake daemon runs in a background thread/loop so it survives
dispatch()'s own asyncio.run (which tears down the calling loop).
"""
captured: list[dict] = []
daemon = _ThreadedFakeDaemon(tmp_socket, captured, {"ok": True})
daemon.start()
try:
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
# store arg is unused by our handlers -- pass None sentinel.
result = core.dispatch(
None,
"initiate_sleep_mode",
{"consent": True, "reason": "test"},
)
assert result == {"ok": True}
assert captured[0]["type"] == "user_initiated_sleep"
finally:
daemon.stop()
def test_dispatch_routes_force_wake(
tmp_socket: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
captured: list[dict] = []
daemon = _ThreadedFakeDaemon(tmp_socket, captured, {"ok": True, "state": "WAKE"})
daemon.start()
try:
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
result = core.dispatch(None, "force_wake", {})
assert result == {"ok": True, "state": "WAKE"}
assert captured[0]["type"] == "force_wake"
finally:
daemon.stop()

View file

@ -0,0 +1,168 @@
"""Tests for core._inject_overnight_digest -- (DAEMON-11).
Covers 5 behaviours:
1. First memory_recall of the day (>18h since last shown) gets overnight_digest.
2. Second recall within <18h does NOT include overnight_digest.
3. Empty state / no pending digest -> no overnight_digest key.
4. Digest is cleared from state after one delivery (D-24 once-per-window).
5. Exception in get_pending_digest does NOT break memory_recall (silent fail).
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def isolated_state(tmp_path, monkeypatch):
from iai_mcp import daemon_state
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
return state_path
# digest shape -- every required field populated.
_FULL_DIGEST = {
"rem_cycles_completed": 4,
"episodes_processed": 10,
"schemas_induced_tier0": 3,
"claude_call_used": True,
"quota_used_pct": 0.003,
"main_insight_text": "today's unifying insight",
"sigma_observed": 1.2,
"s5_drift_alerts": [],
"daemon_uptime_hours": 8,
"timed_out_cycles": 0,
}
# ---------------------------------------------------------------------------
# Test 1: first recall of day gets digest
# ---------------------------------------------------------------------------
def test_first_recall_gets_digest(isolated_state):
from iai_mcp.core import _inject_overnight_digest
from iai_mcp.daemon_state import save_state
# Seed state: pending digest + last shown 20h ago (past the 18h threshold).
now = datetime.now(timezone.utc)
save_state({
"pending_digest": dict(_FULL_DIGEST),
"last_digest_shown_at": (now - timedelta(hours=20)).isoformat(),
})
response: dict = {"hits": [], "anti_hits": [], "activation_trace": [], "budget_used": 0}
_inject_overnight_digest(response)
assert "overnight_digest" in response
dig = response["overnight_digest"]
# required fields surface.
assert dig["rem_cycles_completed"] == 4
assert dig["episodes_processed"] == 10
assert dig["schemas_induced_tier0"] == 3
assert dig["claude_call_used"] is True
assert dig["quota_used_pct"] == 0.003
assert dig["main_insight_text"] == "today's unifying insight"
assert dig["sigma_observed"] == 1.2
assert dig["s5_drift_alerts"] == []
assert dig["daemon_uptime_hours"] == 8
# ---------------------------------------------------------------------------
# Test 2: second recall within 18h window does NOT include digest
# ---------------------------------------------------------------------------
def test_not_twice(isolated_state):
"""the same digest must not appear twice inside the 18h window."""
from iai_mcp.core import _inject_overnight_digest
from iai_mcp.daemon_state import save_state
now = datetime.now(timezone.utc)
# last_shown 4h ago -- inside the window.
save_state({
"pending_digest": dict(_FULL_DIGEST),
"last_digest_shown_at": (now - timedelta(hours=4)).isoformat(),
})
response: dict = {"hits": []}
_inject_overnight_digest(response)
assert "overnight_digest" not in response
# ---------------------------------------------------------------------------
# Test 3: no pending digest -> no key added
# ---------------------------------------------------------------------------
def test_no_digest_when_none_pending(isolated_state):
from iai_mcp.core import _inject_overnight_digest
from iai_mcp.daemon_state import save_state
save_state({}) # empty state
response: dict = {"hits": []}
_inject_overnight_digest(response)
assert "overnight_digest" not in response
# ---------------------------------------------------------------------------
# Test 4: digest cleared from state after one delivery
# ---------------------------------------------------------------------------
def test_digest_cleared_after_delivery(isolated_state):
"""after surfacing the digest, state must no longer carry
pending_digest so a subsequent recall (even after another 18h) does not
re-show the stale digest."""
from iai_mcp.core import _inject_overnight_digest
from iai_mcp.daemon_state import load_state, save_state
now = datetime.now(timezone.utc)
save_state({
"pending_digest": dict(_FULL_DIGEST),
"last_digest_shown_at": (now - timedelta(hours=20)).isoformat(),
})
response: dict = {"hits": []}
_inject_overnight_digest(response)
assert "overnight_digest" in response
# Persisted state: pending_digest consumed.
on_disk = load_state()
assert "pending_digest" not in on_disk
# last_digest_shown_at advanced to roughly now.
shown_at = datetime.fromisoformat(on_disk["last_digest_shown_at"])
if shown_at.tzinfo is None:
shown_at = shown_at.replace(tzinfo=timezone.utc)
assert shown_at >= now - timedelta(seconds=5)
# ---------------------------------------------------------------------------
# Test 5: exception in get_pending_digest does NOT break memory_recall
# ---------------------------------------------------------------------------
def test_exception_is_silent(isolated_state, monkeypatch):
"""If get_pending_digest raises (corrupt state, unexpected schema), the
response must still be returned without an overnight_digest key. The
memory_recall hot path NEVER breaks on daemon-digest faults."""
from iai_mcp import core
def boom(*args, **kwargs):
raise RuntimeError("simulated state corruption")
monkeypatch.setattr("iai_mcp.core.get_pending_digest", boom)
response: dict = {"hits": [], "existing": True}
# Must not raise.
core._inject_overnight_digest(response)
assert response.get("existing") is True
assert "overnight_digest" not in response

203
tests/test_cpu_watchdog.py Normal file
View file

@ -0,0 +1,203 @@
"""Phase 07.2-05 R5 / A5 regression test — CPU watchdog emits one event under sustained overload.
Mock psutil.Process.cpu_percent with a scripted sequence so the test runs
in seconds instead of 75s wall time. D7.2-23 explicitly allows mocks for
heavy-dep tests. The synthetic-CPU-burner approach (real 80% CPU thread)
is documented in SPEC A5 but is impractical for the unit suite; we test
the SAME contract (sustained > threshold => one event) with deterministic
sample injection.
Project async-test idiom (mandatory): sync `def test_X(...)` body wraps
`asyncio.run(_async_body())`. The project does NOT depend on
`pytest-asyncio`; `@pytest.mark.asyncio` markers silently pass without
running. See tests/test_daemon_tick_flags.py:144 for the canonical pattern.
"""
from __future__ import annotations
import asyncio
from unittest.mock import MagicMock, patch
def test_sustained_overload_emits_exactly_one_daemon_cpu_overload_event(monkeypatch):
"""A5 acceptance: 2 consecutive samples > threshold => 1 critical event."""
asyncio.run(_sustained_overload_body(monkeypatch))
async def _sustained_overload_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
captured_events: list[tuple[str, dict, str]] = []
def write_event_capture(store, kind, data, severity="info", **kwargs):
captured_events.append((kind, dict(data), severity))
# Reduce poll cadence so the test loop completes in <2 seconds.
monkeypatch.setattr(daemon_mod, "WATCHDOG_POLL_SEC", 0.05)
monkeypatch.setattr(daemon_mod, "WATCHDOG_THRESHOLD_PERCENT", 50.0)
monkeypatch.setattr(daemon_mod, "WATCHDOG_EVENT_COOLDOWN_SEC", 300.0)
monkeypatch.setattr(daemon_mod, "_last_overload_event_at", 0.0)
monkeypatch.setattr(daemon_mod, "_daemon_started_monotonic", 0.0)
# Scripted CPU samples: prime call returns 0.0 (psutil first-call rule),
# then 80, 80, 30, 80, 80 — should trigger ONCE on the second 80
# (after cooldown the next two-80 burst would NOT trigger since we
# only run ~2s and cooldown is 300s).
sample_seq = iter([80.0, 80.0, 30.0, 80.0, 80.0, 80.0])
class FakeProc:
def cpu_percent(self, interval=None):
# Prime call (the first call returns 0.0 per psutil docs).
# We mimic this: first call = 0.0; subsequent calls = next()
# from the scripted sequence.
if not getattr(self, "_primed", False):
self._primed = True
return 0.0
try:
return next(sample_seq)
except StopIteration:
return 0.0
# Patch psutil.Process to return our fake proc.
# Watchdog body uses `import psutil` locally; patch the underlying class.
with patch("psutil.Process", return_value=FakeProc()), \
patch("iai_mcp.daemon.write_event", write_event_capture), \
patch("iai_mcp.daemon.load_state", lambda: {"fsm_state": "DREAMING"}):
shutdown = asyncio.Event()
store = MagicMock()
task = asyncio.create_task(daemon_mod._cpu_watchdog_loop(store, shutdown))
# Run the watchdog for ~1.5s — at 0.05s poll, that's ~30 samples,
# plenty for the scripted 6-sample sequence + trigger.
await asyncio.sleep(1.5)
shutdown.set()
try:
await asyncio.wait_for(task, timeout=2.0)
except asyncio.TimeoutError:
task.cancel()
try:
await task
except (asyncio.CancelledError, Exception):
pass
# Filter to overload events only.
overload_events = [e for e in captured_events if e[0] == "daemon_cpu_overload"]
# A5: exactly one event.
assert len(overload_events) == 1, (
f"Expected exactly 1 daemon_cpu_overload event; got "
f"{len(overload_events)}: {overload_events}"
)
kind, data, severity = overload_events[0]
assert severity == "critical"
assert data["fsm_state"] == "DREAMING"
assert data["threshold_pct"] == 50.0
assert data["sustained_sec"] == int(0.05 * 2)
assert "cpu_samples_pct" in data
assert all(s >= 0 for s in data["cpu_samples_pct"])
assert "active_tasks" in data
assert "uptime_sec" in data
def test_below_threshold_emits_no_event(monkeypatch):
"""Control: samples below threshold => no event."""
asyncio.run(_below_threshold_body(monkeypatch))
async def _below_threshold_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
captured_events: list[tuple[str, dict, str]] = []
def write_event_capture(store, kind, data, severity="info", **kwargs):
captured_events.append((kind, dict(data), severity))
monkeypatch.setattr(daemon_mod, "WATCHDOG_POLL_SEC", 0.05)
monkeypatch.setattr(daemon_mod, "WATCHDOG_THRESHOLD_PERCENT", 50.0)
monkeypatch.setattr(daemon_mod, "_last_overload_event_at", 0.0)
# All samples below threshold.
class FakeProc:
def cpu_percent(self, interval=None):
if not getattr(self, "_primed", False):
self._primed = True
return 0.0
return 30.0
with patch("psutil.Process", return_value=FakeProc()), \
patch("iai_mcp.daemon.write_event", write_event_capture), \
patch("iai_mcp.daemon.load_state", lambda: {"fsm_state": "WAKE"}):
shutdown = asyncio.Event()
store = MagicMock()
task = asyncio.create_task(daemon_mod._cpu_watchdog_loop(store, shutdown))
await asyncio.sleep(1.0)
shutdown.set()
try:
await asyncio.wait_for(task, timeout=2.0)
except asyncio.TimeoutError:
task.cancel()
try:
await task
except (asyncio.CancelledError, Exception):
pass
overload_events = [e for e in captured_events if e[0] == "daemon_cpu_overload"]
assert overload_events == [], (
f"Expected zero daemon_cpu_overload events under sub-threshold "
f"samples; got {overload_events}"
)
def test_event_cooldown_prevents_ledger_flood(monkeypatch):
"""D7.2-20: at most one event per WATCHDOG_EVENT_COOLDOWN_SEC."""
asyncio.run(_event_cooldown_body(monkeypatch))
async def _event_cooldown_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
captured_events: list[tuple[str, dict, str]] = []
def write_event_capture(store, kind, data, severity="info", **kwargs):
captured_events.append((kind, dict(data), severity))
monkeypatch.setattr(daemon_mod, "WATCHDOG_POLL_SEC", 0.05)
monkeypatch.setattr(daemon_mod, "WATCHDOG_THRESHOLD_PERCENT", 50.0)
# Long cooldown so a 2nd trigger is blocked.
monkeypatch.setattr(daemon_mod, "WATCHDOG_EVENT_COOLDOWN_SEC", 300.0)
monkeypatch.setattr(daemon_mod, "_last_overload_event_at", 0.0)
# Persistent overload — every post-prime sample = 90.
class FakeProc:
def cpu_percent(self, interval=None):
if not getattr(self, "_primed", False):
self._primed = True
return 0.0
return 90.0
with patch("psutil.Process", return_value=FakeProc()), \
patch("iai_mcp.daemon.write_event", write_event_capture), \
patch("iai_mcp.daemon.load_state", lambda: {"fsm_state": "DREAMING"}):
shutdown = asyncio.Event()
store = MagicMock()
task = asyncio.create_task(daemon_mod._cpu_watchdog_loop(store, shutdown))
await asyncio.sleep(1.5) # plenty of time for 30 samples
shutdown.set()
try:
await asyncio.wait_for(task, timeout=2.0)
except asyncio.TimeoutError:
task.cancel()
try:
await task
except (asyncio.CancelledError, Exception):
pass
overload_events = [e for e in captured_events if e[0] == "daemon_cpu_overload"]
# Cooldown should clamp it to exactly 1.
assert len(overload_events) == 1, (
f"D7.2-20 cooldown failed: expected 1 event under persistent "
f"overload; got {len(overload_events)}"
)

214
tests/test_crypto.py Normal file
View file

@ -0,0 +1,214 @@
"""crypto.py AES-256-GCM primitives + file-backed key storage.
Originally Plan 02-08; updated in W1 to retire the keyring
backend (which deadlocked the daemon under launchd via the macOS
Keychain ACL prompt) in favor of a file-backed primary backend at
`{IAI_MCP_STORE}/.crypto.key` (32 raw bytes, mode 0o600, uid-validated).
Covers:
- encrypt_field / decrypt_field round-trip (byte-for-byte)
- Cyrillic / CJK / Arabic round-trip (MEM-01 across languages)
- Associated data binding (swapped AD -> InvalidTag)
- Tamper detection (mutated ciphertext -> InvalidTag)
- is_encrypted prefix check
- Passphrase fallback when no `.crypto.key` file is present
(via IAI_MCP_CRYPTO_PASSPHRASE), deterministic across instances
File-backend specific behavior (file priority, uid/mode validation,
atomic write) is exercised in tests/test_crypto_file_backend.py.
"""
from __future__ import annotations
import os
import pytest
def test_crypto_module_exports() -> None:
"""crypto.py exposes encrypt_field / decrypt_field / is_encrypted / CryptoKey."""
from iai_mcp import crypto
assert hasattr(crypto, "encrypt_field")
assert hasattr(crypto, "decrypt_field")
assert hasattr(crypto, "is_encrypted")
assert hasattr(crypto, "CryptoKey")
assert hasattr(crypto, "derive_key_from_passphrase")
def test_crypto_roundtrip_basic() -> None:
"""encrypt(plaintext) -> decrypt -> byte-for-byte equal."""
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x00" * 32
plaintext = "hello world"
ciphertext = encrypt_field(plaintext, key)
assert isinstance(ciphertext, str)
recovered = decrypt_field(ciphertext, key)
assert recovered == plaintext
def test_crypto_roundtrip_cyrillic() -> None:
"""D-08a + Russian text byte-for-byte preserved."""
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x01" * 32
plaintext = "Привет, мир! Это тест шифрования."
ciphertext = encrypt_field(plaintext, key)
recovered = decrypt_field(ciphertext, key)
assert recovered == plaintext
# Byte-level equality after utf-8 encode+decode cycle.
assert recovered.encode("utf-8") == plaintext.encode("utf-8")
def test_crypto_roundtrip_cjk() -> None:
"""D-08a + Japanese / Chinese round-trip."""
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x02" * 32
plaintext = "こんにちは世界。これは暗号化テストです。"
ciphertext = encrypt_field(plaintext, key)
assert decrypt_field(ciphertext, key) == plaintext
def test_crypto_roundtrip_arabic() -> None:
"""D-08a + Arabic round-trip."""
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x03" * 32
plaintext = "مرحبا بالعالم. هذا اختبار تشفير."
ciphertext = encrypt_field(plaintext, key)
assert decrypt_field(ciphertext, key) == plaintext
def test_crypto_empty_string_roundtrip() -> None:
"""Empty plaintext encrypts and decrypts cleanly."""
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x04" * 32
assert decrypt_field(encrypt_field("", key), key) == ""
def test_crypto_associated_data_binding() -> None:
"""Ciphertext encrypted with AD=A cannot be decrypted with AD=B (InvalidTag)."""
from cryptography.exceptions import InvalidTag
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x05" * 32
ciphertext = encrypt_field("secret", key, associated_data=b"record_id_A")
with pytest.raises(InvalidTag):
decrypt_field(ciphertext, key, associated_data=b"record_id_B")
def test_crypto_associated_data_roundtrip_when_matching() -> None:
"""With matching AD the round-trip succeeds."""
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x06" * 32
ad = b"record_id_matching"
ct = encrypt_field("secret", key, associated_data=ad)
assert decrypt_field(ct, key, associated_data=ad) == "secret"
def test_crypto_tamper_detection() -> None:
"""A single-bit flip in ciphertext raises InvalidTag on decrypt."""
import base64
from cryptography.exceptions import InvalidTag
from iai_mcp.crypto import encrypt_field, decrypt_field
key = b"\x07" * 32
ct = encrypt_field("secret", key)
# Strip the prefix, flip one byte in the base64 payload, re-wrap.
prefix = "iai:enc:v1:"
assert ct.startswith(prefix)
payload_b64 = ct[len(prefix):]
raw = bytearray(base64.b64decode(payload_b64))
# Flip the byte after the nonce (12 bytes) -- tamper the ciphertext itself.
raw[15] ^= 0x01
tampered = prefix + base64.b64encode(bytes(raw)).decode("ascii")
with pytest.raises(InvalidTag):
decrypt_field(tampered, key)
def test_crypto_wrong_key_fails() -> None:
"""Decrypt with a different key raises InvalidTag."""
from cryptography.exceptions import InvalidTag
from iai_mcp.crypto import encrypt_field, decrypt_field
key_a = b"\x08" * 32
key_b = b"\x09" * 32
ct = encrypt_field("secret", key_a)
with pytest.raises(InvalidTag):
decrypt_field(ct, key_b)
def test_is_encrypted_prefix_true() -> None:
"""is_encrypted returns True for strings that start with iai:enc:v1:"""
from iai_mcp.crypto import encrypt_field, is_encrypted
key = b"\x0a" * 32
ct = encrypt_field("hello", key)
assert is_encrypted(ct) is True
def test_is_encrypted_prefix_false() -> None:
"""is_encrypted returns False for plaintext / None / empty / wrong prefix."""
from iai_mcp.crypto import is_encrypted
assert is_encrypted("plaintext") is False
assert is_encrypted("") is False
assert is_encrypted("iai:enc:v0:abc") is False # Different version
assert is_encrypted("foo:bar") is False
def test_crypto_unique_nonce_per_encrypt() -> None:
"""Two encryptions of the same plaintext under the same key produce different ciphertexts."""
from iai_mcp.crypto import encrypt_field
key = b"\x0b" * 32
ct1 = encrypt_field("repeat", key)
ct2 = encrypt_field("repeat", key)
assert ct1 != ct2 # Random nonce ensures ciphertext differs
def test_derive_key_from_passphrase_deterministic() -> None:
"""Same passphrase + same salt -> same derived key (PBKDF2)."""
from iai_mcp.crypto import derive_key_from_passphrase
salt = b"saltsaltsaltsalt" # 16 bytes
k1 = derive_key_from_passphrase("hunter2", salt)
k2 = derive_key_from_passphrase("hunter2", salt)
assert k1 == k2
assert len(k1) == 32 # 256 bits
def test_derive_key_from_passphrase_different_salts() -> None:
"""Same passphrase, different salts -> different keys."""
from iai_mcp.crypto import derive_key_from_passphrase
salt_a = b"A" * 16
salt_b = b"B" * 16
assert derive_key_from_passphrase("same", salt_a) != derive_key_from_passphrase("same", salt_b)
def test_derive_key_uses_600k_iterations() -> None:
"""OWASP 2023: PBKDF2-HMAC-SHA256 recommends 600k iterations minimum."""
from iai_mcp import crypto
assert crypto.PBKDF2_ITERATIONS >= 600_000
def test_crypto_key_passphrase_fallback_when_file_missing(
tmp_path, monkeypatch
) -> None:
"""Phase 07.10 W1 RED — file-backed CryptoKey falls back to passphrase
when no `.crypto.key` file exists in store_root.
Priority order under the new backend: file -> passphrase env var
-> CryptoKeyError. This test exercises the second tier: file is absent,
IAI_MCP_CRYPTO_PASSPHRASE is set, get_or_create() must return a 32-byte
derived key that is deterministic across instances (same passphrase +
same salt -> same key). NO keyring mocking the keyring backend is
gone in W2, so this test must not depend on it.
RED until W2: CryptoKey does not yet accept store_root kwarg.
"""
from iai_mcp import crypto
# No `.crypto.key` written to tmp_path -> file backend miss.
assert not (tmp_path / ".crypto.key").exists()
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "hunter2-fallback")
ck = crypto.CryptoKey(user_id="t", store_root=tmp_path)
key1 = ck.get_or_create()
assert isinstance(key1, bytes)
assert len(key1) == 32
# Same passphrase + same user_id (salt) -> same derived key on a fresh
# instance with the same store_root.
ck2 = crypto.CryptoKey(user_id="t", store_root=tmp_path)
key2 = ck2.get_or_create()
assert key1 == key2

View file

@ -0,0 +1,281 @@
"""Phase 07.10 W1 RED: file-backed crypto key {`_try_file_get`, `_try_file_set`,
get_or_create priority, migrate-to-file CLI}.
Locks the executable spec for the file-backed crypto key per CONTEXT.md
D-05 / / D-11. All 9 tests are RED until W2 (crypto.py file
backend) and W3 (cmd_crypto_migrate_to_file) land.
Failure shapes that count as a correct RED signal in this plan:
- TypeError: CryptoKey() got an unexpected keyword argument 'store_root'
(W2 adds the kwarg)
- AttributeError: 'CryptoKey' object has no attribute '_try_file_get'
/ '_try_file_set' / '_key_file_path'
- ImportError: cannot import name 'cmd_crypto_migrate_to_file'
(W3 lands the CLI command)
Imports of the new symbols stay INSIDE each test body so module-level
collection succeeds: pytest must be able to ENUMERATE the 9 tests and
then fail each one at assertion time, not crash at collection.
"""
from __future__ import annotations
import os
import secrets
import stat
from pathlib import Path
import pytest
# ---------------------------------------------------------------- _try_file_get
def test_try_file_get_returns_bytes_on_valid_0o600_file(tmp_path: Path) -> None:
"""D-11 case 1 — read 32 raw bytes back from a 0o600 key file."""
from iai_mcp.crypto import CryptoKey
key_bytes = secrets.token_bytes(32)
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(key_bytes)
os.chmod(key_path, 0o600)
ck = CryptoKey(user_id="t", store_root=tmp_path)
got = ck._try_file_get()
assert got == key_bytes
assert isinstance(got, bytes)
assert len(got) == 32
def test_try_file_get_rejects_world_or_group_bits(tmp_path: Path) -> None:
"""D-06 / case 2 — mode 0o644 is refused with CryptoKeyError ('insecure mode')."""
from iai_mcp.crypto import CryptoKey, CryptoKeyError
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(b"\x00" * 32)
os.chmod(key_path, 0o644)
ck = CryptoKey(user_id="t", store_root=tmp_path)
with pytest.raises(CryptoKeyError) as exc_info:
ck._try_file_get()
assert "insecure mode" in str(exc_info.value).lower()
def test_try_file_get_rejects_wrong_length(tmp_path: Path) -> None:
"""D-05 / case 3 — a 31-byte file is rejected with 'wrong length'."""
from iai_mcp.crypto import CryptoKey, CryptoKeyError
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(b"\x01" * 31) # short by 1 byte
os.chmod(key_path, 0o600)
ck = CryptoKey(user_id="t", store_root=tmp_path)
with pytest.raises(CryptoKeyError) as exc_info:
ck._try_file_get()
assert "wrong length" in str(exc_info.value).lower()
def test_try_file_get_rejects_foreign_uid(tmp_path: Path, monkeypatch) -> None:
"""D-06 / case 4 — st_uid != geteuid() is refused with 'uid' in message.
The fake_stat is path-scoped: only the key file gets the foreign-uid
treatment. Any other os.stat call (pytest internals, library imports)
delegates to the real os.stat. Returns a full os.stat_result tuple so
the call shape stays compatible with anything that subscripts it.
"""
from iai_mcp.crypto import CryptoKey, CryptoKeyError
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(b"\x02" * 32)
os.chmod(key_path, 0o600)
real_stat = os.stat
real_result = real_stat(key_path)
foreign_uid = (os.geteuid() + 12345) & 0xFFFF # almost certainly not us
# os.stat_result is constructible from a 10-tuple of (mode, ino, dev,
# nlink, uid, gid, size, atime, mtime, ctime).
forged = os.stat_result((
real_result.st_mode,
real_result.st_ino,
real_result.st_dev,
real_result.st_nlink,
foreign_uid,
real_result.st_gid,
real_result.st_size,
real_result.st_atime,
real_result.st_mtime,
real_result.st_ctime,
))
target_str = str(key_path)
def fake_stat(path, *args, **kwargs):
# Path-scoped: only the key file gets the foreign-uid treatment.
try:
path_str = str(path)
except Exception:
return real_stat(path, *args, **kwargs)
if path_str == target_str:
return forged
return real_stat(path, *args, **kwargs)
monkeypatch.setattr(os, "stat", fake_stat)
ck = CryptoKey(user_id="t", store_root=tmp_path)
with pytest.raises(CryptoKeyError) as exc_info:
ck._try_file_get()
assert "uid" in str(exc_info.value).lower()
# ---------------------------------------------------------------- _try_file_set
def test_try_file_set_writes_atomic_with_0o600(tmp_path: Path) -> None:
"""D-07 / case 5 — atomic write produces a 0o600 file with exact bytes.
Also asserts NO `.crypto.key.tmp.<pid>` survives after the call:
a leaked tmp would prove the rename was non-atomic or the cleanup
branch was skipped.
"""
from iai_mcp.crypto import CryptoKey
payload = b"\x00" * 32
ck = CryptoKey(user_id="t", store_root=tmp_path)
ck._try_file_set(payload)
key_path = tmp_path / ".crypto.key"
assert key_path.exists()
assert key_path.read_bytes() == payload
mode = stat.S_IMODE(os.stat(key_path).st_mode)
assert mode == 0o600
# Stale tmp scan: the dir must not contain any `.crypto.key.tmp.*` artifacts.
leftover_tmps = list(tmp_path.glob(".crypto.key.tmp.*"))
assert leftover_tmps == [], f"leaked tmp files: {leftover_tmps}"
def test_try_file_set_cleans_stale_tmp(tmp_path: Path) -> None:
"""D-07 / case 6 — stale `.crypto.key.tmp.<pid>` is removed before the new write."""
from iai_mcp.crypto import CryptoKey
stale_tmp = tmp_path / ".crypto.key.tmp.99999"
stale_tmp.write_bytes(b"GARBAGE-FROM-CRASHED-PRIOR-RUN")
payload = b"\x01" * 32
ck = CryptoKey(user_id="t", store_root=tmp_path)
ck._try_file_set(payload)
# Stale tmp gone, final key file present with new payload.
assert not stale_tmp.exists(), "stale tmp must be cleaned up before the new write"
key_path = tmp_path / ".crypto.key"
assert key_path.exists()
assert key_path.read_bytes() == payload
# ---------------------------------------------------------------- get_or_create priority
def test_get_or_create_prefers_file_over_passphrase(
tmp_path: Path, monkeypatch
) -> None:
"""D-11 case 7 — file backend wins over passphrase env var.
Pre-write a valid key file (key A); also set IAI_MCP_CRYPTO_PASSPHRASE
(which would derive a different key B). get_or_create() must return
key A (file priority).
"""
from iai_mcp.crypto import CryptoKey
key_a = secrets.token_bytes(32)
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(key_a)
os.chmod(key_path, 0o600)
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "hunter2")
ck = CryptoKey(user_id="t", store_root=tmp_path)
got = ck.get_or_create()
assert got == key_a, "file-backed key must win over passphrase fallback"
# ---------------------------------------------------------------- migrate-to-file CLI
def test_cmd_crypto_migrate_to_file_happy_path(
tmp_path: Path, monkeypatch
) -> None:
"""D-11 case 8 — migrate-to-file reads keyring, writes file, round-trip OK.
Patches `keyring.get_password` BEFORE importing the command so the
local `import keyring` inside cmd_crypto_migrate_to_file picks up
the monkeypatched attribute (Python caches modules).
"""
import argparse
import base64
import keyring as _keyring
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
keyring_key = secrets.token_bytes(32)
keyring_blob = base64.urlsafe_b64encode(keyring_key).decode("ascii")
def fake_get(service: str, username: str) -> str | None:
return keyring_blob
def fake_delete(service: str, username: str) -> None:
pass
monkeypatch.setattr(_keyring, "get_password", fake_get)
monkeypatch.setattr(_keyring, "delete_password", fake_delete)
from iai_mcp.cli import cmd_crypto_migrate_to_file # ImportError until W3 — RED.
args = argparse.Namespace(
user_id="default", keep_keychain=True, delete_keychain=False
)
exit_code = cmd_crypto_migrate_to_file(args)
assert exit_code == 0
key_path = tmp_path / ".crypto.key"
assert key_path.exists()
mode = stat.S_IMODE(os.stat(key_path).st_mode)
assert mode == 0o600
assert key_path.read_bytes() == keyring_key, (
"file contents must equal the round-tripped keyring key bytes"
)
def test_cmd_crypto_migrate_to_file_idempotent(
tmp_path: Path, monkeypatch
) -> None:
"""D-11 case 9 — file already present → no-op success, NO keyring touch.
keyring.get_password is patched to raise AssertionError; if the
idempotent path ever calls it, the test fails with a specific message.
"""
import argparse
import keyring as _keyring
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
# Pre-create a valid file so the command takes the idempotent branch.
pre_existing = secrets.token_bytes(32)
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(pre_existing)
os.chmod(key_path, 0o600)
def assert_not_called(*args, **kwargs):
raise AssertionError(
"keyring touched on idempotent path — migrate-to-file must "
"skip keyring entirely when the file is already present"
)
monkeypatch.setattr(_keyring, "get_password", assert_not_called)
monkeypatch.setattr(_keyring, "delete_password", assert_not_called)
from iai_mcp.cli import cmd_crypto_migrate_to_file # ImportError until W3 — RED.
args = argparse.Namespace(
user_id="default", keep_keychain=True, delete_keychain=False
)
exit_code = cmd_crypto_migrate_to_file(args)
assert exit_code == 0
# File contents unchanged.
assert key_path.read_bytes() == pre_existing

View file

@ -0,0 +1,52 @@
"""Tests for crypto_key_watch baseline + rotation detection."""
from __future__ import annotations
import json
import os
import secrets
from pathlib import Path
from iai_mcp.crypto_key_watch import (
check_crypto_key_file_rotation_event,
sync_crypto_key_watcher_to_disk,
)
from iai_mcp.events import query_events
from iai_mcp.store import MemoryStore
def test_watcher_baseline_then_rotation_emits_event(tmp_path: Path) -> None:
root = tmp_path / "w"
root.mkdir()
kpath = root / ".crypto.key"
kpath.write_bytes(secrets.token_bytes(32))
os.chmod(kpath, 0o600)
store = MemoryStore(path=root, user_id="default")
check_crypto_key_file_rotation_event(store)
ev0 = query_events(store, kind="crypto_key_rotated", limit=10)
assert len(ev0) == 0
kpath.write_bytes(secrets.token_bytes(32))
os.chmod(kpath, 0o600)
check_crypto_key_file_rotation_event(store)
ev1 = query_events(store, kind="crypto_key_rotated", limit=10)
assert len(ev1) == 1
check_crypto_key_file_rotation_event(store)
ev2 = query_events(store, kind="crypto_key_rotated", limit=10)
assert len(ev2) == 1
def test_sync_watcher_without_event(tmp_path: Path) -> None:
root = tmp_path / "s"
root.mkdir()
kpath = root / ".crypto.key"
kpath.write_bytes(secrets.token_bytes(32))
os.chmod(kpath, 0o600)
store = MemoryStore(path=root, user_id="default")
sync_crypto_key_watcher_to_disk(store)
wp = root / ".crypto-key-watcher.json"
assert wp.is_file()
data = json.loads(wp.read_text(encoding="utf-8"))
assert "mtime_ns" in data and "size" in data

251
tests/test_curiosity.py Normal file
View file

@ -0,0 +1,251 @@
"""Tests for LEARN-04 curiosity (D-23, D-24).
D-23 trigger: entropy > 0.7 bits, 3-turn cooldown.
D-24 tiered style:
- low entropy (0.4-0.7): silent log via events table (curiosity_silent_log)
- mid entropy (0.7-0.9): inline hint in next response
- high entropy (>0.9): direct clarifying question
compute_entropy operates in base-2 (bits) consistent with "0.7 bits".
"""
from __future__ import annotations
import math
from datetime import datetime, timezone
from uuid import UUID, uuid4
import pytest
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _rec(vec=None, tags=None):
vec = vec or [1.0] + [0.0] * (EMBED_DIM - 1)
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface="r",
aaak_index="",
embedding=vec,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=list(tags or []),
language="en",
)
class _Hit:
def __init__(self, rid: UUID, score: float):
self.record_id = rid
self.score = score
# ---------------------------------------------------------------- constants
def test_curiosity_thresholds():
from iai_mcp import curiosity
assert curiosity.ENTROPY_LOW == 0.4
assert curiosity.ENTROPY_MID == 0.7
assert curiosity.ENTROPY_HIGH == 0.9
assert curiosity.COOLDOWN_TURNS == 3
# ---------------------------------------------------------------- compute_entropy
def test_compute_entropy_uniform():
"""Shannon entropy of [0.5, 0.5] = 1.0 bit."""
from iai_mcp.curiosity import compute_entropy
e = compute_entropy([0.5, 0.5])
assert abs(e - 1.0) < 1e-6
def test_compute_entropy_skewed():
from iai_mcp.curiosity import compute_entropy
e = compute_entropy([0.9, 0.1])
# H([0.9,0.1]) = -(0.9*log2(0.9) + 0.1*log2(0.1)) ~ 0.469
assert e < 0.5
def test_compute_entropy_degenerate():
from iai_mcp.curiosity import compute_entropy
assert compute_entropy([1.0]) == 0.0
def test_compute_entropy_empty():
from iai_mcp.curiosity import compute_entropy
assert compute_entropy([]) == 0.0
def test_compute_entropy_zero_scores_handled():
from iai_mcp.curiosity import compute_entropy
# Negative scores shouldn't crash (max(0, s) normalisation).
e = compute_entropy([-1.0, 0.5, 0.5])
assert e >= 0.0
# ---------------------------------------------------------------- fire_curiosity
def test_fire_curiosity_below_threshold_silent(tmp_path):
"""Low entropy (0.5) -> silent log, returns None."""
from iai_mcp.curiosity import fire_curiosity
from iai_mcp.events import query_events
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.8)]
q = fire_curiosity(
store, hits, cue="ambiguous", entropy=0.5,
session_id="s1", turn=1,
)
assert q is None
silent = query_events(store, kind="curiosity_silent_log")
assert len(silent) >= 1
def test_fire_curiosity_below_ENTROPY_LOW_returns_none(tmp_path):
"""Very low entropy (below ENTROPY_LOW=0.4) returns None without logging."""
from iai_mcp.curiosity import fire_curiosity
store = MemoryStore(path=tmp_path)
q = fire_curiosity(
store, [], cue="x", entropy=0.1,
session_id="s-silent", turn=1,
)
assert q is None
def test_fire_curiosity_mid_entropy_inline_hint(tmp_path):
"""Entropy 0.8 -> CuriosityQuestion with tier='inline'."""
from iai_mcp.curiosity import fire_curiosity
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.6)]
q = fire_curiosity(
store, hits, cue="maybe", entropy=0.8,
session_id="s2", turn=1,
)
assert q is not None
assert q.tier == "inline"
def test_fire_curiosity_high_entropy_direct_question(tmp_path):
from iai_mcp.curiosity import fire_curiosity
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.5)]
q = fire_curiosity(
store, hits, cue="unknown", entropy=0.95,
session_id="s3", turn=1,
)
assert q is not None
assert q.tier == "question"
def test_fire_curiosity_cooldown_3_turns(tmp_path):
"""Fire turn 1 -> fires. Turn 2 -> None (cooldown). Turn 3 -> None."""
from iai_mcp.curiosity import fire_curiosity
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.5)]
q1 = fire_curiosity(store, hits, "x", 0.95, "s4", turn=1)
assert q1 is not None
q2 = fire_curiosity(store, hits, "x", 0.95, "s4", turn=2)
assert q2 is None
q3 = fire_curiosity(store, hits, "x", 0.95, "s4", turn=3)
assert q3 is None
def test_fire_curiosity_cooldown_releases(tmp_path):
"""Turn 4 after turn 1 firing -> cooldown released."""
from iai_mcp.curiosity import fire_curiosity
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.5)]
q1 = fire_curiosity(store, hits, "x", 0.95, "s5", turn=1)
assert q1 is not None
q4 = fire_curiosity(store, hits, "x", 0.95, "s5", turn=4)
assert q4 is not None
# ---------------------------------------------------------------- pending_questions
def test_pending_questions_empty(tmp_path):
from iai_mcp.curiosity import pending_questions
store = MemoryStore(path=tmp_path)
assert pending_questions(store) == []
def test_pending_questions_filter_resolved(tmp_path):
"""5 fired, 3 resolved -> pending_questions returns 2."""
from iai_mcp.curiosity import fire_curiosity, pending_questions
from iai_mcp.events import write_event
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.5)]
# Fire 5 questions across different sessions so cooldown doesn't block.
q_ids: list = []
for i in range(5):
q = fire_curiosity(store, hits, f"cue{i}", 0.95, f"session-{i}", turn=1)
assert q is not None
q_ids.append(q.id)
# Resolve 3 via curiosity_resolved event
for qid in q_ids[:3]:
write_event(
store, kind="curiosity_resolved",
data={"question_id": str(qid)},
severity="info",
)
pending = pending_questions(store)
assert len(pending) == 2
def test_pending_questions_by_session(tmp_path):
from iai_mcp.curiosity import fire_curiosity, pending_questions
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.5)]
fire_curiosity(store, hits, "c", 0.95, "sA", turn=1)
fire_curiosity(store, hits, "c", 0.95, "sB", turn=1)
onlyA = pending_questions(store, session_id="sA")
onlyB = pending_questions(store, session_id="sB")
assert len(onlyA) == 1
assert len(onlyB) == 1

View file

@ -0,0 +1,121 @@
"""Tests for curiosity_bridge edges.
curiosity_bridge edges:
- Created when fire_curiosity surfaces a mid/high-entropy question.
- Weight proportional to entropy.
- Persist in the edges table with edge_type='curiosity_bridge'.
- adds fading on resolution.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import UUID, uuid4
import pytest
from iai_mcp.store import EDGES_TABLE, MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _rec(vec=None, tags=None):
vec = vec or [1.0] + [0.0] * (EMBED_DIM - 1)
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface="r",
aaak_index="",
embedding=vec,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=list(tags or []),
language="en",
)
class _Hit:
def __init__(self, rid: UUID, score: float):
self.record_id = rid
self.score = score
def test_curiosity_bridge_edge_on_fire(tmp_path):
"""fire_curiosity creates curiosity_bridge edges from question id -> triggering records."""
from iai_mcp.curiosity import fire_curiosity
store = MemoryStore(path=tmp_path)
recs = [_rec() for _ in range(3)]
for r in recs:
store.insert(r)
hits = [_Hit(r.id, 0.5) for r in recs]
q = fire_curiosity(
store, hits, "ambiguous", entropy=0.85,
session_id="s-bridge", turn=1,
)
assert q is not None
edges = store.db.open_table(EDGES_TABLE).to_pandas()
cb = edges[edges["edge_type"] == "curiosity_bridge"]
assert len(cb) >= 3 # One per triggering record
def test_curiosity_bridge_edge_weight_proportional_entropy(tmp_path):
"""Higher entropy -> larger edge delta."""
from iai_mcp.curiosity import fire_curiosity
store = MemoryStore(path=tmp_path)
r1 = _rec()
r2 = _rec()
store.insert(r1)
store.insert(r2)
hits_low = [_Hit(r1.id, 0.5)]
hits_high = [_Hit(r2.id, 0.5)]
q1 = fire_curiosity(store, hits_low, "a", 0.75, session_id="s-a", turn=1)
assert q1 is not None
# Different session to bypass cooldown
q2 = fire_curiosity(store, hits_high, "b", 0.95, session_id="s-b", turn=1)
assert q2 is not None
edges = store.db.open_table(EDGES_TABLE).to_pandas()
cb = edges[edges["edge_type"] == "curiosity_bridge"]
# Records should have edges with delta reflecting the respective entropies.
# Low-entropy-linked edges should have weights below 0.9
# High-entropy-linked edges should have weights above 0.9
assert (cb["weight"] > 0).all()
def test_curiosity_bridge_edge_never_decays_in_sweep(tmp_path):
"""curiosity_bridge edges not decayed by hebbian-only sweep."""
from datetime import timedelta
from iai_mcp.curiosity import fire_curiosity
from iai_mcp.sleep import _decay_edges
store = MemoryStore(path=tmp_path)
r = _rec()
store.insert(r)
hits = [_Hit(r.id, 0.5)]
fire_curiosity(store, hits, "c", 0.9, "s-never", turn=1)
edges_tbl = store.db.open_table(EDGES_TABLE)
ancient = datetime.now(timezone.utc) - timedelta(days=500)
edges_tbl.update(
where="edge_type = 'curiosity_bridge'",
values={"updated_at": ancient, "weight": 0.0001},
)
_decay_edges(store)
df = edges_tbl.to_pandas()
cb = df[df["edge_type"] == "curiosity_bridge"]
assert len(cb) >= 1

465
tests/test_daemon.py Normal file
View file

@ -0,0 +1,465 @@
"""Tests for iai_mcp.daemon -- Task 3.
Covers 10 behaviours:
1. main() completes cleanly when shutdown event is set externally.
2. State-machine transitions: valid edges succeed, illegal edges raise ValueError.
3. Scheduler tick body gets called repeatedly; exceptions caught, daemon continues.
4. bge-m3 prewarm invoked exactly once at boot.
5. Graceful shutdown cancels scheduler + socket tasks; lock fd closed.
5b. mid-night MCP shared-lock acquisition surfaces via holds_exclusive_nb=False.
6. Empty-store shortcut: _tick_body records `empty_store` reason without REM work.
7. launchd plist is valid XML + has required Label/KeepAlive/ThrottleInterval keys.
8. systemd unit has Type=simple + Restart=on-failure + WantedBy=default.target +
python3 -m iai_mcp.daemon + TimeoutStopSec=60.
9. Neither plist nor systemd unit contains ANTHROPIC_API_KEY (C3 guard).
"""
from __future__ import annotations
import asyncio
import plistlib
import signal
import subprocess
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import patch
import pytest
PROJECT_ROOT = Path(__file__).resolve().parent.parent
PLIST_PATH = PROJECT_ROOT / "deploy" / "launchd" / "com.iai-mcp.daemon.plist"
SERVICE_PATH = PROJECT_ROOT / "deploy" / "systemd" / "iai-mcp-daemon.service"
def _module_child_take_shared(path_str: str, ready_flag: str, release_flag: str) -> None:
"""Module-level helper (spawn context requires top-level serialisation)."""
import fcntl
import os
import time
from pathlib import Path
fd = os.open(path_str, os.O_RDWR | os.O_CREAT, 0o600)
try:
fcntl.flock(fd, fcntl.LOCK_SH)
Path(ready_flag).write_text("ok")
rel = Path(release_flag)
for _ in range(300):
if rel.exists():
break
time.sleep(0.1)
finally:
try:
fcntl.flock(fd, fcntl.LOCK_UN)
except OSError:
pass
os.close(fd)
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
def _fresh_store(tmp_path, monkeypatch):
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
from iai_mcp.store import MemoryStore
return MemoryStore()
def _short_socket_paths(tmp_path, monkeypatch):
"""Redirect concurrency LOCK_PATH + SOCKET_PATH to short paths (AF_UNIX 104-char limit)."""
import os
from iai_mcp import concurrency
lock_path = tmp_path / ".lock"
sock_dir = Path(f"/tmp/iai-daemon-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path)
return lock_path, sock_path, sock_dir
# ---------------------------------------------------------------------------
# Test 1: clean shutdown via signal-like event trigger
# ---------------------------------------------------------------------------
def test_main_clean_shutdown(tmp_path, monkeypatch):
"""main() returns 0 when shutdown fires shortly after boot."""
from iai_mcp import daemon as daemon_mod
from iai_mcp import daemon_state as ds_mod
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
_short_socket_paths(tmp_path, monkeypatch)
# Prevent real embedder instantiation (saves 10s + avoids model download).
def _fake_embedder(store):
class _Stub:
def embed(self, text):
return [0.0]
return _Stub()
monkeypatch.setattr("iai_mcp.embed.embedder_for_store", _fake_embedder)
async def runner():
task = asyncio.create_task(daemon_mod.main())
# Give the daemon a chance to boot, then trigger shutdown by sending SIGTERM.
await asyncio.sleep(0.2)
# Simulate signal delivery: find the loop's shutdown event and set it.
# Easiest: raise CancelledError on the main task after a brief run.
# We inject shutdown by cancelling the task, then verifying it returns cleanly.
task.cancel()
try:
return await task
except asyncio.CancelledError:
return 0
rc = asyncio.run(runner())
assert rc == 0
# ---------------------------------------------------------------------------
# Test 2: state-machine transitions
# ---------------------------------------------------------------------------
def test_state_machine_transitions(tmp_path, monkeypatch):
from iai_mcp import daemon as daemon_mod
from iai_mcp import daemon_state as ds_mod
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
state: dict = {} # fresh state starts at WAKE default
# WAKE -> TRANSITIONING (valid)
daemon_mod.transition(state, daemon_mod.STATE_TRANSITIONING)
assert state["fsm_state"] == daemon_mod.STATE_TRANSITIONING
# TRANSITIONING -> SLEEP (valid)
daemon_mod.transition(state, daemon_mod.STATE_SLEEP)
assert state["fsm_state"] == daemon_mod.STATE_SLEEP
# SLEEP -> DREAMING (valid)
daemon_mod.transition(state, daemon_mod.STATE_DREAMING)
assert state["fsm_state"] == daemon_mod.STATE_DREAMING
# DREAMING -> TRANSITIONING (ILLEGAL)
with pytest.raises(ValueError, match="Illegal transition"):
daemon_mod.transition(state, daemon_mod.STATE_TRANSITIONING)
assert state["fsm_state"] == daemon_mod.STATE_DREAMING # state unchanged
# DREAMING -> SLEEP (valid)
daemon_mod.transition(state, daemon_mod.STATE_SLEEP)
assert state["fsm_state"] == daemon_mod.STATE_SLEEP
# SLEEP -> WAKE (valid)
daemon_mod.transition(state, daemon_mod.STATE_WAKE)
assert state["fsm_state"] == daemon_mod.STATE_WAKE
# WAKE -> SLEEP (ILLEGAL, must go through TRANSITIONING)
with pytest.raises(ValueError):
daemon_mod.transition(state, daemon_mod.STATE_SLEEP)
# State persisted each time: load_state finds fsm_state=WAKE after final txn.
loaded = ds_mod.load_state()
assert loaded["fsm_state"] == daemon_mod.STATE_WAKE
# ---------------------------------------------------------------------------
# Test 3: scheduler tick loop continues after exceptions
# ---------------------------------------------------------------------------
def test_scheduler_tick_survives_exceptions(tmp_path, monkeypatch):
from iai_mcp import daemon as daemon_mod
store = _fresh_store(tmp_path, monkeypatch)
# Shrink tick interval so the test finishes quickly.
monkeypatch.setattr(daemon_mod, "TICK_INTERVAL_SEC", 0)
from iai_mcp.concurrency import ProcessLock
lock = ProcessLock(tmp_path / ".lock")
state: dict = {}
call_count = {"n": 0}
async def flaky_body(store, lock, state):
call_count["n"] += 1
if call_count["n"] == 1:
raise RuntimeError("simulated tick failure")
async def runner():
task = asyncio.create_task(
daemon_mod._scheduler_tick(store, lock, state, tick_body=flaky_body)
)
# Let several ticks happen.
await asyncio.sleep(0.1)
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
asyncio.run(runner())
lock.close()
assert call_count["n"] >= 2, (
f"tick loop did not continue past first exception; only {call_count['n']} calls"
)
# tick_error event recorded on the first failing call.
from iai_mcp.events import query_events
err_events = query_events(store, kind="tick_error", limit=5)
assert len(err_events) >= 1
assert "simulated tick failure" in err_events[0]["data"].get("error", "")
# ---------------------------------------------------------------------------
# Test 4: bge-m3 prewarm called exactly once at boot
# ---------------------------------------------------------------------------
def test_prewarm_called_once_at_boot(tmp_path, monkeypatch):
from iai_mcp import daemon as daemon_mod
from iai_mcp import daemon_state as ds_mod
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
_short_socket_paths(tmp_path, monkeypatch)
prewarm_calls = {"n": 0}
class _StubEmbedder:
def embed(self, text):
prewarm_calls["n"] += 1
return [0.0]
def _fake_embedder(store):
return _StubEmbedder()
monkeypatch.setattr("iai_mcp.embed.embedder_for_store", _fake_embedder)
async def runner():
task = asyncio.create_task(daemon_mod.main())
await asyncio.sleep(0.15)
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
asyncio.run(runner())
assert prewarm_calls["n"] == 1, (
f"prewarm expected once, got {prewarm_calls['n']}"
)
# ---------------------------------------------------------------------------
# Test 5: graceful shutdown cancels both tasks + closes lock fd
# ---------------------------------------------------------------------------
def test_graceful_shutdown_cancels_tasks_and_closes_lock(tmp_path, monkeypatch):
"""We monkeypatch ProcessLock.close to observe it being called on shutdown."""
from iai_mcp import daemon as daemon_mod
from iai_mcp import daemon_state as ds_mod
from iai_mcp import concurrency
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
_short_socket_paths(tmp_path, monkeypatch)
def _fake_embedder(store):
class _S:
def embed(self, text): return [0.0] * 384
return _S()
monkeypatch.setattr("iai_mcp.embed.embedder_for_store", _fake_embedder)
close_calls = {"n": 0}
real_close = concurrency.ProcessLock.close
def _tracked_close(self):
close_calls["n"] += 1
real_close(self)
monkeypatch.setattr(concurrency.ProcessLock, "close", _tracked_close)
async def runner():
task = asyncio.create_task(daemon_mod.main())
# added ~5 startup steps before `await shutdown.wait()`
# (LifecycleLock acquire, capture_queue ingest, lifecycle FSM init,
# heartbeat scanner init, sleep_pipeline init, lifecycle_tick spawn).
# Wait up to 5 sec for the daemon to reach `await shutdown.wait()`
# so cancellation propagates through the finally block instead of
# being raised in synchronous setup.
deadline = 5.0
step = 0.05
elapsed = 0.0
while elapsed < deadline:
await asyncio.sleep(step)
elapsed += step
if close_calls["n"] >= 0 and task.done():
break
# Daemon should have hit await shutdown.wait() by this point
# for any reasonable Lance + embedder warmup. If we cancel
# mid-startup, finally will not fire (no await-point reached).
if elapsed >= 1.0:
break
task.cancel()
try:
await task
except asyncio.CancelledError:
pass
asyncio.run(runner())
assert close_calls["n"] >= 1, "lock.close() was never called on shutdown"
# ---------------------------------------------------------------------------
# Test 5b: holds_exclusive_nb returns False when a shared holder appears
# ---------------------------------------------------------------------------
def test_d06_holds_exclusive_nb_yields_to_mcp(tmp_path, monkeypatch):
"""While the daemon holds EX, a second process taking SH forces
holds_exclusive_nb() to return False -- the cooperative-yield signal
that downstream plans (04-02) use to abort mid-cycle."""
import multiprocessing
import time
from iai_mcp.concurrency import ProcessLock
spawn = multiprocessing.get_context("spawn")
lock_path = tmp_path / ".lock"
daemon_lock = ProcessLock(lock_path)
try:
assert daemon_lock.try_acquire_exclusive() is True
assert daemon_lock.holds_exclusive_nb() is True
# Daemon releases to allow child to grab shared (simulating the gap
# between REM cycles when the daemon intentionally yields).
daemon_lock.release()
ready_flag = tmp_path / ".ready"
release_flag = tmp_path / ".release"
child = spawn.Process(
target=_module_child_take_shared,
args=(str(lock_path), str(ready_flag), str(release_flag)),
)
child.start()
try:
deadline = time.time() + 15
while time.time() < deadline and not ready_flag.exists():
time.sleep(0.05)
assert ready_flag.exists()
# Probe: daemon should see "no, we don't hold EX; MCP is active".
assert daemon_lock.holds_exclusive_nb() is False
finally:
release_flag.write_text("go")
child.join(timeout=10)
if child.is_alive():
child.terminate()
child.join(timeout=2)
finally:
daemon_lock.close()
# ---------------------------------------------------------------------------
# Test 6: empty-store shortcut in _tick_body
# ---------------------------------------------------------------------------
def test_empty_store_shortcut(tmp_path, monkeypatch):
from iai_mcp import daemon as daemon_mod
store = _fresh_store(tmp_path, monkeypatch)
from iai_mcp.concurrency import ProcessLock
lock = ProcessLock(tmp_path / ".lock")
state: dict = {"fsm_state": "WAKE"}
async def run_once():
await daemon_mod._tick_body(store, lock, state)
asyncio.run(run_once())
lock.close()
assert state.get("last_tick_skipped_reason") == "empty_store"
# No `rem_cycle_started` event emitted on empty store.
from iai_mcp.events import query_events
rem = query_events(store, kind="rem_cycle_started", limit=5)
assert rem == []
# ---------------------------------------------------------------------------
# Test 7: launchd plist valid XML + required keys
# ---------------------------------------------------------------------------
def test_launchd_plist_valid_xml_with_required_keys():
assert PLIST_PATH.exists(), f"missing plist at {PLIST_PATH}"
with open(PLIST_PATH, "rb") as f:
data = plistlib.load(f)
assert data["Label"] == "com.iai-mcp.daemon"
assert data["ProgramArguments"][-1] == "iai_mcp.daemon"
assert data["RunAtLoad"] is True
keepalive = data["KeepAlive"]
assert isinstance(keepalive, dict)
# Plan 10.6-01 Task 1.7: KeepAlive policy is now
# `Crashed=true` only. The legacy `SuccessfulExit=false` paired
# with the 75/0 exit-code branching; with the new lifecycle
# state machine exit code is uniformly 0 on graceful shutdown,
# so SuccessfulExit=false would create a respawn loop.
assert keepalive.get("Crashed") is True
assert "SuccessfulExit" not in keepalive
assert data["ThrottleInterval"] == 5
assert "StandardOutPath" in data
assert "StandardErrorPath" in data
assert "WorkingDirectory" in data
env = data["EnvironmentVariables"]
for required_key in ("PATH", "IAI_MCP_STORE", "HOME", "LANG"):
assert required_key in env, f"missing env key {required_key}"
# C3 guard (redundant with Test 9 but check locally too):
assert "ANTHROPIC_API_KEY" not in env
# ---------------------------------------------------------------------------
# Test 8: systemd unit required keys
# ---------------------------------------------------------------------------
def test_systemd_unit_required_keys():
assert SERVICE_PATH.exists(), f"missing unit file at {SERVICE_PATH}"
text = SERVICE_PATH.read_text()
assert "[Unit]" in text
assert "Description=" in text
assert "[Service]" in text
assert "Type=simple" in text
assert "Restart=on-failure" in text
assert "RestartSec=30" in text
assert "StartLimitIntervalSec=60" in text
assert "StartLimitBurst=3" in text
assert "python3 -m iai_mcp.daemon" in text
assert "StandardOutput=journal" in text
assert "StandardError=journal" in text
assert "SyslogIdentifier=iai-mcp-daemon" in text
assert "TimeoutStopSec=60" in text
assert "KillSignal=SIGTERM" in text
assert "[Install]" in text
assert "WantedBy=default.target" in text
# ---------------------------------------------------------------------------
# Test 9: C3 guard -- no ANTHROPIC_API_KEY anywhere
# ---------------------------------------------------------------------------
def test_c3_no_anthropic_api_key_in_artifacts():
daemon_src = (PROJECT_ROOT / "src" / "iai_mcp" / "daemon.py").read_text()
plist_src = PLIST_PATH.read_text()
service_src = SERVICE_PATH.read_text()
for name, src in (("daemon.py", daemon_src), ("plist", plist_src), ("service", service_src)):
assert "ANTHROPIC_API_KEY" not in src, (
f"C3 VIOLATION: ANTHROPIC_API_KEY found in {name}"
)

View file

@ -0,0 +1,556 @@
"""End-to-end round-trip tests for the daemon socket dispatcher (Plan 04-gap-1).
Unlike tests/test_core_bedtime_inject.py (which uses _ThreadedFakeDaemon that
echoes canned OK replies), these tests spin up the REAL serve_control_socket
with the REAL _dispatch_socket_request bound to a REAL state dict + real
ProcessLock on a tmp directory. They send each of the 6 message types as
real NDJSON over a real AF_UNIX socket and assert:
- correct response shape per message type
- state mutations actually persisted to ~/.iai-mcp/.daemon-state.json
(scoped to tmp_path via monkeypatch of daemon_state.STATE_PATH)
- invalid messages rejected with invalid_message reason code
- unknown types rejected with unknown_message_type reason code
- version field present in status response
- concurrent clients handled without corruption
This closes the verifier-identified test gap that masked the dispatcher
blocker throughout execution.
"""
from __future__ import annotations
import asyncio
import json
import os
import tempfile
from pathlib import Path
import pytest
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def short_socket_paths(tmp_path, monkeypatch):
"""Redirect LOCK_PATH + SOCKET_PATH + STATE_PATH to tmp_path.
AF_UNIX on macOS caps socket paths at ~104 bytes; pytest's tmp_path can
be too long under xdist. Use a short /tmp/iai-<pid>-<n>/ fallback for
the socket. The state file lives under tmp_path (regular filesystem,
no length limit).
"""
from iai_mcp import concurrency, daemon_state
lock_path = tmp_path / ".lock"
sock_dir = Path(f"/tmp/iai-disp-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path)
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
try:
yield lock_path, sock_path, state_path
finally:
try:
if sock_path.exists():
sock_path.unlink()
except OSError:
pass
try:
sock_dir.rmdir()
except OSError:
pass
async def _send_ndjson(sock_path: Path, message: dict, *, timeout: float = 5.0) -> dict:
"""Connect, send one NDJSON line, read one line back, close."""
reader, writer = await asyncio.wait_for(
asyncio.open_unix_connection(path=str(sock_path)),
timeout=timeout,
)
try:
writer.write((json.dumps(message) + "\n").encode("utf-8"))
await writer.drain()
line = await asyncio.wait_for(reader.readline(), timeout=timeout)
finally:
writer.close()
try:
await writer.wait_closed()
except Exception:
pass
if not line:
raise AssertionError("daemon closed without reply")
return json.loads(line.decode("utf-8"))
async def _with_real_dispatcher(sock_path: Path, state: dict, coro_fn):
"""Boot real serve_control_socket + real _dispatch_socket_request, run
`coro_fn(sock_path, state)`, tear down cleanly.
"""
from iai_mcp.concurrency import ProcessLock, serve_control_socket
lock = ProcessLock(sock_path.parent / ".lock_inline")
shutdown = asyncio.Event()
server_task = asyncio.create_task(
serve_control_socket(
store=None,
lock=lock,
state=state,
shutdown=shutdown,
socket_path=sock_path,
),
)
# Wait for bind.
for _ in range(250):
if sock_path.exists():
break
await asyncio.sleep(0.01)
if not sock_path.exists():
shutdown.set()
await asyncio.wait_for(server_task, timeout=5)
lock.close()
raise AssertionError("socket never bound")
try:
result = await coro_fn(sock_path, state)
finally:
shutdown.set()
try:
await asyncio.wait_for(server_task, timeout=5)
except Exception:
pass
lock.close()
return result
# ---------------------------------------------------------------------------
# Test 1: status returns version + fsm_state + uptime + pending_digest shape
# ---------------------------------------------------------------------------
def test_status_returns_version_and_full_snapshot(short_socket_paths):
_, sock_path, _ = short_socket_paths
from iai_mcp import __version__ as pkg_version
state = {
"fsm_state": "WAKE",
"daemon_started_at": "2026-04-18T00:00:00+00:00",
"last_tick_at": "2026-04-18T12:30:00+00:00",
"quiet_window": [44, 16],
"pending_digest": {
"rem_cycles_completed": 2,
"episodes_processed": 15,
"schemas_induced_tier0": 3,
"claude_call_used": True,
"main_insight_text": "deeply long verbose insight text " * 50,
},
"scheduler_paused": False,
}
async def _runner(sock_path, state):
return await _send_ndjson(sock_path, {"type": "status"})
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp["ok"] is True
# backwards-compat keys.
assert resp["state"] == "WAKE"
assert isinstance(resp["uptime_sec"], (int, float))
# Plan 04-gap-1 additions.
assert resp["version"] == pkg_version
assert resp["fsm_state"] == "WAKE"
assert resp["last_tick_at"] == "2026-04-18T12:30:00+00:00"
assert resp["quiet_window"] == [44, 16]
assert resp["daemon_started_at"] == "2026-04-18T00:00:00+00:00"
assert resp["scheduler_paused"] is False
# pending_digest is truncated to top-level counters (no main_insight_text).
pd = resp["pending_digest"]
assert pd["rem_cycles_completed"] == 2
assert pd["episodes_processed"] == 15
assert pd["schemas_induced_tier0"] == 3
assert pd["claude_call_used"] is True
assert "main_insight_text" not in pd, (
"truncated digest leaked verbose text over the socket"
)
# ---------------------------------------------------------------------------
# Test 2: user_initiated_sleep persists state AND respects already_sleeping
# ---------------------------------------------------------------------------
def test_user_initiated_sleep_sets_pending_flag(short_socket_paths):
_, sock_path, state_path = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(
sock_path,
{
"type": "user_initiated_sleep",
"reason": "I am going to bed",
"ts": "2026-04-18T23:00:00+00:00",
},
)
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp == {"ok": True, "state": "TRANSITIONING"}
# State mutation persisted to disk.
from iai_mcp.daemon_state import load_state
loaded = load_state()
req = loaded["user_sleep_request"]
assert req["pending"] is True
assert req["reason"] == "I am going to bed"
assert req["ts"] == "2026-04-18T23:00:00+00:00"
def test_user_initiated_sleep_rejects_when_already_sleeping(short_socket_paths):
_, sock_path, state_path = short_socket_paths
state = {"fsm_state": "DREAMING"}
async def _runner(sock_path, state):
return await _send_ndjson(
sock_path,
{
"type": "user_initiated_sleep",
"reason": "redundant",
"ts": "2026-04-18T23:00:00+00:00",
},
)
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp == {"ok": False, "reason": "already_sleeping"}
# State was NOT mutated (no user_sleep_request written).
from iai_mcp.daemon_state import load_state
loaded = load_state()
# The dispatcher doesn't touch state in the already_sleeping branch, so
# the file may not exist (no prior save_state call). Either way: no flag.
assert "user_sleep_request" not in loaded
# ---------------------------------------------------------------------------
# Test 3: force_wake / force_rem set pending flags + persist
# ---------------------------------------------------------------------------
def test_force_wake_queues_flag(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "DREAMING"}
async def _runner(sock_path, state):
return await _send_ndjson(
sock_path,
{"type": "force_wake", "ts": "2026-04-18T23:45:00+00:00"},
)
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp == {"ok": True, "reason": "wake_queued"}
from iai_mcp.daemon_state import load_state
loaded = load_state()
assert loaded["force_wake_request"]["pending"] is True
assert loaded["force_wake_request"]["ts"] == "2026-04-18T23:45:00+00:00"
def test_force_rem_queues_flag(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(
sock_path,
{"type": "force_rem", "ts": "2026-04-18T10:00:00+00:00"},
)
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp == {"ok": True, "reason": "rem_queued"}
from iai_mcp.daemon_state import load_state
loaded = load_state()
assert loaded["force_rem_request"]["pending"] is True
assert loaded["force_rem_request"]["ts"] == "2026-04-18T10:00:00+00:00"
# ---------------------------------------------------------------------------
# Test 4: pause/resume flip scheduler_paused flag
# ---------------------------------------------------------------------------
def test_pause_then_resume_flips_flag(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
r1 = await _send_ndjson(sock_path, {"type": "pause"})
r2 = await _send_ndjson(sock_path, {"type": "resume"})
return r1, r2
r1, r2 = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert r1 == {"ok": True, "paused": True}
assert r2 == {"ok": True, "paused": False}
from iai_mcp.daemon_state import load_state
loaded = load_state()
# After resume, scheduler_paused must be False (the LAST value written).
assert loaded["scheduler_paused"] is False
def test_pause_persists_True_before_resume(short_socket_paths):
"""After only pause (no resume yet), state["scheduler_paused"] is True."""
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(sock_path, {"type": "pause"})
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp == {"ok": True, "paused": True}
from iai_mcp.daemon_state import load_state
loaded = load_state()
assert loaded["scheduler_paused"] is True
# ---------------------------------------------------------------------------
# Test 5: unknown type returns structured error
# ---------------------------------------------------------------------------
def test_unknown_message_type_returns_error(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(
sock_path,
{"type": "nuke_from_orbit", "ts": "whatever"},
)
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp["ok"] is False
assert resp["reason"] == "unknown_message_type"
assert resp["type"] == "nuke_from_orbit"
# ---------------------------------------------------------------------------
# Test 6: invalid messages rejected with ASVS V5 reason code
# ---------------------------------------------------------------------------
def test_invalid_message_missing_ts_on_force_wake(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(sock_path, {"type": "force_wake"})
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp["ok"] is False
assert resp["reason"] == "invalid_message"
assert "ts" in resp["error"]
def test_invalid_message_wrong_type_user_sleep(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(
sock_path,
{"type": "user_initiated_sleep", "reason": 42, "ts": "x"},
)
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp["ok"] is False
assert resp["reason"] == "invalid_message"
assert "reason" in resp["error"]
def test_invalid_message_non_string_type(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(sock_path, {"type": 42})
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp["ok"] is False
assert resp["reason"] == "invalid_message"
def test_invalid_message_pause_wrong_seconds_type(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
return await _send_ndjson(sock_path, {"type": "pause", "seconds": "forever"})
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp["ok"] is False
assert resp["reason"] == "invalid_message"
assert "seconds" in resp["error"]
# ---------------------------------------------------------------------------
# Test 7: C2 guard -- dispatcher never transitions FSM directly
# ---------------------------------------------------------------------------
def test_dispatcher_does_not_transition_fsm_directly(short_socket_paths):
"""C2: the socket dispatcher thread never calls daemon.transition().
user_initiated_sleep sets a pending flag; the FSM stays at WAKE until
the scheduler tick picks up the flag. Without this invariant, the
dispatcher and scheduler race on the FSM state.
"""
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
await _send_ndjson(
sock_path,
{
"type": "user_initiated_sleep",
"reason": "night",
"ts": "2026-04-18T23:00:00+00:00",
},
)
return state["fsm_state"]
fsm_after = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
# The dispatcher MUST leave fsm_state at WAKE; only the scheduler
# transitions it (under the fcntl exclusive lock).
assert fsm_after == "WAKE"
# ---------------------------------------------------------------------------
# Test 8: reason string clipped to 500 chars (ASVS V5 output hardening)
# ---------------------------------------------------------------------------
def test_user_initiated_sleep_reason_clipped(short_socket_paths):
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
long_reason = "x" * 5000
async def _runner(sock_path, state):
return await _send_ndjson(
sock_path,
{
"type": "user_initiated_sleep",
"reason": long_reason,
"ts": "2026-04-18T23:00:00+00:00",
},
)
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert resp == {"ok": True, "state": "TRANSITIONING"}
from iai_mcp.daemon_state import load_state
loaded = load_state()
assert len(loaded["user_sleep_request"]["reason"]) == 500
# ---------------------------------------------------------------------------
# Test 9: concurrent clients handled without data races
# ---------------------------------------------------------------------------
def test_concurrent_clients_both_succeed(short_socket_paths):
"""Two clients hit the socket in parallel -- the dispatcher must serve
both without corrupting the state file or double-writing."""
_, sock_path, _ = short_socket_paths
state = {"fsm_state": "WAKE"}
async def _runner(sock_path, state):
# Issue two requests concurrently.
coro1 = _send_ndjson(
sock_path,
{"type": "force_rem", "ts": "2026-04-18T01:00:00+00:00"},
)
coro2 = _send_ndjson(sock_path, {"type": "pause"})
results = await asyncio.gather(coro1, coro2)
return results
r1, r2 = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
# Both responses well-formed; dispatcher handled each independently.
assert r1 == {"ok": True, "reason": "rem_queued"}
assert r2 == {"ok": True, "paused": True}
# Both state mutations persisted.
from iai_mcp.daemon_state import load_state
loaded = load_state()
assert loaded["force_rem_request"]["pending"] is True
assert loaded["scheduler_paused"] is True
# ---------------------------------------------------------------------------
# Test 10: full suite hitting all 6 message types against one daemon
# ---------------------------------------------------------------------------
def test_full_message_type_matrix_end_to_end(short_socket_paths):
"""Single live daemon instance serves all 6 message types sequentially.
Mirrors what the CLI + MCP wrapper do in production.
"""
_, sock_path, _ = short_socket_paths
state = {
"fsm_state": "WAKE",
"daemon_started_at": "2026-04-18T00:00:00+00:00",
}
async def _runner(sock_path, state):
out = {}
out["status"] = await _send_ndjson(sock_path, {"type": "status"})
out["user_initiated_sleep"] = await _send_ndjson(
sock_path,
{
"type": "user_initiated_sleep",
"reason": "bedtime",
"ts": "2026-04-18T23:30:00+00:00",
},
)
out["force_rem"] = await _send_ndjson(
sock_path,
{"type": "force_rem", "ts": "2026-04-18T23:31:00+00:00"},
)
out["force_wake"] = await _send_ndjson(
sock_path,
{"type": "force_wake", "ts": "2026-04-18T23:32:00+00:00"},
)
out["pause"] = await _send_ndjson(sock_path, {"type": "pause"})
out["resume"] = await _send_ndjson(sock_path, {"type": "resume"})
return out
results = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
assert results["status"]["ok"] is True
assert results["status"]["fsm_state"] == "WAKE"
assert results["user_initiated_sleep"] == {"ok": True, "state": "TRANSITIONING"}
assert results["force_rem"] == {"ok": True, "reason": "rem_queued"}
assert results["force_wake"] == {"ok": True, "reason": "wake_queued"}
assert results["pause"] == {"ok": True, "paused": True}
assert results["resume"] == {"ok": True, "paused": False}
# All mutations land in the ONE state file.
from iai_mcp.daemon_state import load_state
loaded = load_state()
assert loaded["user_sleep_request"]["pending"] is True
assert loaded["force_rem_request"]["pending"] is True
assert loaded["force_wake_request"]["pending"] is True
# scheduler_paused was toggled last via resume -> False.
assert loaded["scheduler_paused"] is False

View file

@ -0,0 +1,281 @@
"""Phase 10.6 Plan 10.6-01 Task 1.8 -- rewritten contract tests.
Old contract (Phase 07.8 + bug-fix 2026-05-01):
Every non-RSS, non-user shutdown path returned exit 75. The
`user_requested_shutdown` sentinel + `_resolve_shutdown_exit_code`
helper differentiated explicit `iai-mcp daemon stop` (exit 0,
plist suppresses respawn) from every other shutdown path
(exit 75, plist respawns).
New contract:
Daemon main() exits 0 uniformly on graceful shutdown, regardless
of who triggered it. The plist's `KeepAlive={"Crashed": true}`
ensures graceful exit 0 stays DEAD until wrapper kickstart fires.
Only path returning a non-zero exit is `LifecycleLockConflict`
(a same-host live-PID conflict) which returns 1.
Cross-process invariant PRESERVED from 541c874:
The CLI `iai-mcp daemon stop` runs in a SEPARATE process from
the daemon. CLI writes the `user_requested_shutdown=True`
sentinel to `.daemon-state.json` BEFORE sending SIGTERM. The
daemon's main() finally block calls
`_clear_user_shutdown_sentinel(state)` which:
1. Reads the on-disk state file (the source of truth, since
the in-memory state was loaded at boot).
2. Pops the sentinel from disk + memory.
3. Re-saves the cleaned state record.
The sentinel is now informational rather than control: its presence
on disk no longer changes the exit code. Tests E + F still verify
the CLI write-before-SIGTERM ordering -- that ordering is what
makes the daemon's later cleanup symmetric across boots.
Validates: WAKE-14.
"""
from __future__ import annotations
import platform
from pathlib import Path
import pytest
from iai_mcp import cli as cli_mod
from iai_mcp import daemon as daemon_mod
from iai_mcp import daemon_state as state_mod
# ---------------------------------------------------------------------------
# Test A -- _clear_user_shutdown_sentinel: clean state -> in-memory pop only
# ---------------------------------------------------------------------------
def test_clear_sentinel_no_disk_flag(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
) -> None:
"""No sentinel on disk + no in-memory flag -> helper is a no-op.
Locks the regression where a clean shutdown without an explicit
`iai-mcp daemon stop` must leave the on-disk record consistent
(no spurious sentinel write, no exception).
"""
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
state: dict = {"fsm_state": "WAKE", "daemon_pid": 12345}
snapshot = dict(state)
daemon_mod._clear_user_shutdown_sentinel(state)
# In-memory dict shape is preserved (no spurious keys / drops).
assert state == snapshot
# ---------------------------------------------------------------------------
# Test B -- sentinel True on disk -> cleared from disk + memory
# ---------------------------------------------------------------------------
def test_clear_sentinel_true_on_disk(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
) -> None:
"""Production flow: CLI process wrote sentinel to disk; daemon
clears it on graceful exit so it does not leak across boots.
"""
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
state_mod.save_state(
{"user_requested_shutdown": True, "fsm_state": "WAKE"}
)
daemon_in_memory: dict = {
"fsm_state": "DREAMING",
"daemon_pid": 999,
# No "user_requested_shutdown" key here -- production reality.
}
daemon_mod._clear_user_shutdown_sentinel(daemon_in_memory)
# Disk-side sentinel is gone.
on_disk = state_mod.load_state()
assert "user_requested_shutdown" not in on_disk
# In-memory dict picked up no spurious flag.
assert "user_requested_shutdown" not in daemon_in_memory
# ---------------------------------------------------------------------------
# Test C -- helper does not mutate unrelated keys
# ---------------------------------------------------------------------------
def test_clear_sentinel_preserves_unrelated_keys(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
) -> None:
"""The helper does exactly one in-memory mutation
(`state.pop(_USER_SHUTDOWN_FLAG, None)`). Any future refactor
that adds drive-by mutations would silently drop fields like
daemon_pid / fsm_state / pending_digest, which main()'s finally
block depends on for the doctor / next-boot pipeline.
"""
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
state_mod.save_state({"user_requested_shutdown": True, "fsm_state": "WAKE"})
snapshot = {
"fsm_state": "DREAMING",
"daemon_pid": 42,
"pending_digest": {"rem_cycles_completed": 79},
"user_requested_shutdown": True,
"fsm_transition_at": "2026-05-01T10:17:54+00:00",
}
state = dict(snapshot)
daemon_mod._clear_user_shutdown_sentinel(state)
expected = {
k: v for k, v in snapshot.items() if k != "user_requested_shutdown"
}
assert state == expected
# ---------------------------------------------------------------------------
# Test D -- read failure during shutdown is fail-safe (in-memory pop only)
# ---------------------------------------------------------------------------
def test_clear_sentinel_disk_read_failure_is_fail_safe(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""If load_state() raises (transient FS error / corrupt file),
the helper must NOT propagate -- shutdown must always proceed.
"""
def boom() -> dict:
raise OSError("simulated transient read error")
monkeypatch.setattr(daemon_mod, "load_state", boom)
state: dict = {"fsm_state": "WAKE", "user_requested_shutdown": True}
daemon_mod._clear_user_shutdown_sentinel(state)
# In-memory still gets popped even when disk read fails.
assert "user_requested_shutdown" not in state
# ---------------------------------------------------------------------------
# Test E -- cmd_daemon_stop writes the sentinel BEFORE launchctl (macOS)
# ---------------------------------------------------------------------------
def test_e_cmd_daemon_stop_writes_sentinel_before_launchctl(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
) -> None:
"""Cross-process invariant from 541c874 PRESERVED:
`iai-mcp daemon stop` writes user_requested_shutdown=True to
.daemon-state.json BEFORE sending SIGTERM. The daemon's later
`_clear_user_shutdown_sentinel` then cleans up. Phase 10.6
no longer branches the exit code on the sentinel, but the
write-before-SIGTERM ordering is still part of the wakeup-
safe shutdown protocol (a hung CLI write must not delay the
SIGTERM the user expects).
"""
monkeypatch.setattr(platform, "system", lambda: "Darwin")
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
call_log: list[str] = []
real_save_state = state_mod.save_state
def tracking_save_state(state: dict) -> None:
call_log.append(f"save_state:{state.get('user_requested_shutdown')}")
real_save_state(state)
monkeypatch.setattr(state_mod, "save_state", tracking_save_state)
def fake_run(argv, **_kwargs):
call_log.append(f"subprocess.run:{argv[0]}:{argv[1]}")
return type("R", (), {"returncode": 0})()
monkeypatch.setattr(cli_mod.subprocess, "run", fake_run)
rc = cli_mod.main(["daemon", "stop"])
assert rc == 0
import json as json_mod
persisted = json_mod.loads(state_path.read_text())
assert persisted.get("user_requested_shutdown") is True
assert call_log[0].startswith("save_state:True"), call_log
assert any(
entry.startswith("subprocess.run:launchctl") for entry in call_log
), call_log
save_idx = next(
i for i, e in enumerate(call_log) if e.startswith("save_state:")
)
launchctl_idx = next(
i for i, e in enumerate(call_log)
if e.startswith("subprocess.run:launchctl")
)
assert save_idx < launchctl_idx, call_log
# ---------------------------------------------------------------------------
# Test F -- cmd_daemon_stop writes the sentinel BEFORE systemctl (Linux)
# ---------------------------------------------------------------------------
def test_f_cmd_daemon_stop_writes_sentinel_before_systemctl(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
) -> None:
"""Linux variant of Test E. Same ordering invariant, different
process-supervisor command.
"""
monkeypatch.setattr(platform, "system", lambda: "Linux")
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
call_log: list[str] = []
real_save_state = state_mod.save_state
def tracking_save_state(state: dict) -> None:
call_log.append(f"save_state:{state.get('user_requested_shutdown')}")
real_save_state(state)
monkeypatch.setattr(state_mod, "save_state", tracking_save_state)
def fake_run(argv, **_kwargs):
call_log.append(f"subprocess.run:{argv[0]}")
return type("R", (), {"returncode": 0})()
monkeypatch.setattr(cli_mod.subprocess, "run", fake_run)
rc = cli_mod.main(["daemon", "stop"])
assert rc == 0
import json as json_mod
persisted = json_mod.loads(state_path.read_text())
assert persisted.get("user_requested_shutdown") is True
save_idx = next(
i for i, e in enumerate(call_log) if e.startswith("save_state:")
)
systemctl_idx = next(
i for i, e in enumerate(call_log)
if e.startswith("subprocess.run:systemctl")
)
assert save_idx < systemctl_idx, call_log
# ---------------------------------------------------------------------------
# Test G -- _USER_SHUTDOWN_FLAG constant pinned (cross-process protocol)
# ---------------------------------------------------------------------------
def test_g_user_shutdown_flag_constant_is_stable() -> None:
"""The CLI (separate process) and daemon both reference this
string literal in different code paths; renaming it would silently
break the cross-process protocol from 541c874.
"""
assert daemon_mod._USER_SHUTDOWN_FLAG == "user_requested_shutdown"

View file

@ -0,0 +1,207 @@
"""Phase 07.6 W1 / tests for the startup grace before the first
`_s4_offline_loop` iteration.
Defends against the regression where a freshly-spawned daemon immediately
runs the heavy S4 viability scan (sigma.compute_and_emit ->
retrieve.build_runtime_graph -> runtime_graph_cache.save -> json.dumps),
materialising a multi-GB intermediate Python string (CONTEXT.md D-01:
py-spy 2026-04-29 PID 7959 RSS 7.6GB).
Project async-test idiom (mandatory): sync `def test_X(...)` body wraps
`asyncio.run(_async_body(...))`. The project does NOT depend on
`pytest-asyncio`; `@pytest.mark.asyncio` markers silently pass without
running. See tests/test_cpu_watchdog.py:12, tests/test_cascade_no_block.py:11
for the canonical pattern. The plan template prescribed pytest-asyncio
markers; this file deviates (Rule 1 fake-GREEN avoidance) per project
precedent.
"""
from __future__ import annotations
import asyncio
import time
from types import SimpleNamespace
# ---------------------------------------------------------------------------
# helpers
# ---------------------------------------------------------------------------
def _fake_store():
"""_s4_offline_loop only forwards `store` to s4.run_offline_pass and
write_event; both are stubbed in these tests, so a SimpleNamespace
placeholder is enough never touches LanceDB.
"""
return SimpleNamespace()
# ---------------------------------------------------------------------------
# Test 1: grace=0 fast-path — first iter runs within ≤100ms
# ---------------------------------------------------------------------------
def test_grace_zero_runs_first_iter_within_100ms(monkeypatch):
"""D-06 (a): grace=0 => stubbed run_offline_pass invoked within ≤100ms."""
asyncio.run(_grace_zero_fast_path_body(monkeypatch))
async def _grace_zero_fast_path_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 0.0)
called = asyncio.Event()
call_count = {"n": 0}
def _stub_run_offline_pass(_store):
call_count["n"] += 1
called.set()
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
shutdown = asyncio.Event()
store = _fake_store()
t0 = time.monotonic()
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
try:
await asyncio.wait_for(called.wait(), timeout=0.1)
elapsed = time.monotonic() - t0
assert elapsed <= 0.15, (
f"first run_offline_pass took {elapsed*1000:.1f}ms; expected <=100ms "
f"(plus ~50ms slack for to_thread schedule)"
)
finally:
shutdown.set()
try:
await asyncio.wait_for(task, timeout=1.0)
except asyncio.TimeoutError:
task.cancel()
try:
await task
except (asyncio.CancelledError, Exception):
pass
assert call_count["n"] >= 1
# ---------------------------------------------------------------------------
# Test 2: grace>0 deferred-path — no call before grace, ≥1 call after
# ---------------------------------------------------------------------------
def test_grace_positive_defers_first_iter(monkeypatch):
"""D-06 (b): grace=0.5 => no call before 0.4s; ≥1 call after 0.7s."""
asyncio.run(_grace_positive_deferred_body(monkeypatch))
async def _grace_positive_deferred_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 0.5)
call_count = {"n": 0}
def _stub_run_offline_pass(_store):
call_count["n"] += 1
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
shutdown = asyncio.Event()
store = _fake_store()
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
try:
await asyncio.sleep(0.4)
assert call_count["n"] == 0, (
f"S4 ran before 0.5s grace elapsed: call_count={call_count['n']}"
)
# Total ~0.7s — past 0.5s grace + to_thread schedule slack.
await asyncio.sleep(0.3)
assert call_count["n"] >= 1, (
f"S4 did not run after grace elapsed: call_count={call_count['n']}"
)
finally:
shutdown.set()
try:
await asyncio.wait_for(task, timeout=1.0)
except asyncio.TimeoutError:
task.cancel()
try:
await task
except (asyncio.CancelledError, Exception):
pass
# ---------------------------------------------------------------------------
# Test 3: shutdown during grace — clean return, no run, no exception
# ---------------------------------------------------------------------------
def test_shutdown_during_grace_returns_cleanly(monkeypatch):
"""shutdown set during grace => loop returns cleanly, 0 calls."""
asyncio.run(_shutdown_during_grace_body(monkeypatch))
async def _shutdown_during_grace_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 5.0)
call_count = {"n": 0}
def _stub_run_offline_pass(_store):
call_count["n"] += 1
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
shutdown = asyncio.Event()
store = _fake_store()
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
await asyncio.sleep(0.05)
shutdown.set()
# raises if loop did not return cleanly within 1s.
await asyncio.wait_for(task, timeout=1.0)
assert call_count["n"] == 0, (
f"S4 ran despite shutdown during grace: call_count={call_count['n']}"
)
assert task.done(), "loop task did not finish"
assert task.exception() is None, (
f"loop raised during shutdown-in-grace: {task.exception()!r}"
)
# ---------------------------------------------------------------------------
# Test 4: existing s4_offline_pass_error event-emit preserved
# ---------------------------------------------------------------------------
def test_run_offline_pass_error_still_emits_event(monkeypatch):
"""Existing layered-defense preserved: run_offline_pass raises => write_event
called with kind='s4_offline_pass_error' + severity='warning'.
"""
asyncio.run(_error_event_preserved_body(monkeypatch))
async def _error_event_preserved_body(monkeypatch):
import iai_mcp.daemon as daemon_mod
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 0.0)
events: list[tuple[str, dict, str]] = []
def _stub_run_offline_pass(_store):
raise RuntimeError("boom")
def _stub_write_event(_store, kind, payload, severity="info", **_kwargs):
events.append((kind, dict(payload) if isinstance(payload, dict) else payload, severity))
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
monkeypatch.setattr(daemon_mod, "write_event", _stub_write_event)
shutdown = asyncio.Event()
store = _fake_store()
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
# Give the loop time to: enter while-body, hit run_offline_pass raise,
# emit s4_offline_pass_error, then await the inter-iteration wait_for.
await asyncio.sleep(0.1)
shutdown.set()
try:
await asyncio.wait_for(task, timeout=1.0)
except asyncio.TimeoutError:
task.cancel()
try:
await task
except (asyncio.CancelledError, Exception):
pass
matching = [
e for e in events
if e[0] == "s4_offline_pass_error"
and e[2] == "warning"
and "boom" in str(e[1])
]
assert matching, f"expected s4_offline_pass_error event with severity=warning + 'boom' payload, got: {events}"

213
tests/test_daemon_state.py Normal file
View file

@ -0,0 +1,213 @@
"""Tests for iai_mcp.daemon_state -- Task 2.
Covers:
1. save_state atomically persists and load_state round-trips.
2. File mode is 0o600.
3. save_state is atomic under simulated mid-write failure (temp file unlinked).
4. get_pending_digest returns + clears digest when > threshold elapsed.
5. get_pending_digest returns None when <18h since last shown.
"""
from __future__ import annotations
import json
import os
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
@pytest.fixture
def isolated_state_path(tmp_path, monkeypatch):
"""Redirect STATE_PATH to tmp_path for test isolation."""
from iai_mcp import daemon_state
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
return state_path
# ---------------------------------------------------------------------------
# Test 1 + 2: roundtrip + 0o600
# ---------------------------------------------------------------------------
def test_save_and_load_roundtrip_with_0600_mode(isolated_state_path):
from iai_mcp.daemon_state import load_state, save_state
# Fresh load -> {}.
assert load_state() == {}
state = {
"fsm_state": "WAKE",
"daemon_started_at": "2026-04-18T00:00:00+00:00",
"pending_digest": {"cycles": 4, "insight": "test"},
}
save_state(state)
# File exists, mode is 0o600.
assert isolated_state_path.exists()
mode = isolated_state_path.stat().st_mode & 0o777
assert mode == 0o600, f"expected 0o600, got {oct(mode)}"
# load returns identical dict.
loaded = load_state()
assert loaded == state
# ---------------------------------------------------------------------------
# Test 3: atomic write via tempfile + os.replace
# ---------------------------------------------------------------------------
def test_save_state_atomic_rename_preserves_old_on_failure(isolated_state_path, monkeypatch):
"""If os.replace raises, the target file must remain untouched and the
temp file must be cleaned up."""
from iai_mcp.daemon_state import load_state, save_state
# Seed a known-good file.
original = {"fsm_state": "WAKE", "version": 1}
save_state(original)
assert load_state() == original
# Patch os.replace to raise on the next call so the atomic swap fails.
import iai_mcp.daemon_state as ds
real_replace = os.replace
def _boom(src, dst):
raise OSError("simulated swap failure")
monkeypatch.setattr(ds.os, "replace", _boom)
with pytest.raises(OSError):
save_state({"fsm_state": "SLEEP", "version": 2})
# Original file preserved (atomic rename never happened).
loaded = load_state()
assert loaded == original
# Temp file cleaned up -- no leftover .tmp files in the directory.
leftovers = list(isolated_state_path.parent.glob(".daemon-state.*.tmp"))
assert leftovers == [], f"temp files not cleaned: {leftovers}"
# ---------------------------------------------------------------------------
# Test 4: pending digest returned after threshold window
# ---------------------------------------------------------------------------
def test_pending_digest_returned_after_18h(isolated_state_path):
from iai_mcp.daemon_state import (
DIGEST_SHOW_THRESHOLD_HOURS,
get_pending_digest,
load_state,
save_state,
)
assert DIGEST_SHOW_THRESHOLD_HOURS == 18
now = datetime(2026, 4, 18, 20, 0, tzinfo=timezone.utc)
last_shown = now - timedelta(hours=20)
state = {
"last_digest_shown_at": last_shown.isoformat(),
"pending_digest": {"cycles": 4, "insight": "after-threshold"},
}
save_state(state)
digest = get_pending_digest(state, now)
assert digest == {"cycles": 4, "insight": "after-threshold"}
# State mutated and persisted: pending_digest cleared, last_digest_shown_at bumped.
assert "pending_digest" not in state
assert state["last_digest_shown_at"] == now.isoformat()
# Persisted to disk.
on_disk = load_state()
assert "pending_digest" not in on_disk
assert on_disk["last_digest_shown_at"] == now.isoformat()
# ---------------------------------------------------------------------------
# Test 5: digest withheld when <18h since last shown
# ---------------------------------------------------------------------------
def test_pending_digest_withheld_before_18h(isolated_state_path):
from iai_mcp.daemon_state import get_pending_digest
now = datetime(2026, 4, 18, 20, 0, tzinfo=timezone.utc)
last_shown = now - timedelta(hours=4)
state = {
"last_digest_shown_at": last_shown.isoformat(),
"pending_digest": {"cycles": 4, "insight": "too-early"},
}
digest = get_pending_digest(state, now)
assert digest is None
# State preserved (digest still pending for later).
assert state["pending_digest"] == {"cycles": 4, "insight": "too-early"}
assert state["last_digest_shown_at"] == last_shown.isoformat()
# ---------------------------------------------------------------------------
# Extra: no digest when state has no pending_digest
# ---------------------------------------------------------------------------
def test_pending_digest_none_when_not_set(isolated_state_path):
from iai_mcp.daemon_state import get_pending_digest
now = datetime(2026, 4, 18, 20, 0, tzinfo=timezone.utc)
state: dict = {}
assert get_pending_digest(state, now) is None
# ---------------------------------------------------------------------------
# prune_stale_first_turn: evicts legacy bool + aged ISO entries
# ---------------------------------------------------------------------------
def test_prune_evicts_legacy_bool_first_turn_pending():
"""Legacy {sid: True} entries evict on first prune — they have no
recoverable timestamp so we cannot age them sensibly."""
from iai_mcp.daemon_state import prune_stale_first_turn
state = {"first_turn_pending": {"sess-1": True, "sess-2": False, "sess-3": True}}
removed = prune_stale_first_turn(state)
assert removed == 3
assert state["first_turn_pending"] == {}
def test_prune_keeps_fresh_iso_entries_and_evicts_aged():
"""ISO timestamps within TTL survive; older than TTL get evicted."""
from iai_mcp.daemon_state import prune_stale_first_turn
now = datetime(2026, 4, 23, 12, 0, tzinfo=timezone.utc)
fresh = (now - timedelta(hours=1)).isoformat()
stale = (now - timedelta(hours=48)).isoformat()
state = {"first_turn_pending": {"fresh": fresh, "stale": stale}}
removed = prune_stale_first_turn(state, now=now, ttl_hours=24)
assert removed == 1
assert "fresh" in state["first_turn_pending"]
assert "stale" not in state["first_turn_pending"]
def test_prune_caps_max_entries_keeps_newest():
"""Secondary cap: keep newest max_entries entries by timestamp."""
from iai_mcp.daemon_state import prune_stale_first_turn
now = datetime(2026, 4, 23, 12, 0, tzinfo=timezone.utc)
pending = {f"sess-{i}": (now - timedelta(minutes=i)).isoformat() for i in range(10)}
state = {"first_turn_pending": pending}
removed = prune_stale_first_turn(state, now=now, ttl_hours=24, max_entries=3)
assert removed == 7
kept = state["first_turn_pending"]
assert len(kept) == 3
# Newest three minutes (0, 1, 2) survive.
assert set(kept.keys()) == {"sess-0", "sess-1", "sess-2"}
def test_prune_handles_empty_and_missing_pending():
"""Idempotent on empty / missing first_turn_pending."""
from iai_mcp.daemon_state import prune_stale_first_turn
assert prune_stale_first_turn({}) == 0
assert prune_stale_first_turn({"first_turn_pending": {}}) == 0
assert prune_stale_first_turn({"first_turn_pending": None}) == 0

View file

@ -0,0 +1,403 @@
"""Tests for _tick_body honoring socket control flags (Plan 04-gap-1).
The dispatcher (tests/test_daemon_dispatcher.py) proves the flags are
SET correctly on the daemon state. These tests prove the scheduler
READS those flags and acts on them:
- scheduler_paused=True -> _tick_body emits daemon_tick_skipped and
returns without acquiring the lock.
- user_sleep_request.pending=True + empty quiet_window -> _tick_body
still bypasses the gate, enters SLEEP,
clears the flag.
- force_rem_request.pending=True -> ONE REM cycle runs out of schedule
(total_cycles=1), flag cleared.
- force_wake_request.pending=True set mid-night -> REM loop breaks
early with daemon_yielded reason=
force_wake_requested; flag cleared.
All REM cycles are mocked with a coroutine that sleeps 0.01s to avoid
the real 15-minute cap + real consolidation pipeline.
"""
from __future__ import annotations
import asyncio
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import AsyncMock, patch
import pytest
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def tick_env(tmp_path, monkeypatch):
"""Isolate LOCK_PATH / STATE_PATH to tmp_path; mock REM cycle.
Returns (store, lock, state_path, rem_calls_list).
`state_path` points at the tmp_path state file so tests can verify
flag persistence via load_state().
"""
from iai_mcp import concurrency, daemon_state
from iai_mcp.concurrency import ProcessLock
from iai_mcp.store import MemoryStore
lock_path = tmp_path / ".lock"
state_path = tmp_path / ".daemon-state.json"
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
store = MemoryStore()
# Seed a single record so _store_is_empty returns False (we want the
# scheduler to reach the flag-gate, not the empty-store shortcut).
from iai_mcp.types import MemoryRecord
from uuid import uuid4
rec = MemoryRecord(
id=uuid4(),
tier="semantic",
literal_surface="seed record so the store is not empty",
aaak_index="",
embedding=[0.0] * store.embed_dim,
community_id=None,
centrality=0.0,
detail_level=1,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
tags=[],
language="en",
)
store.insert(rec)
lock = ProcessLock(lock_path)
yield store, lock, state_path, tmp_path
try:
lock.release()
except Exception:
pass
lock.close()
async def _fast_rem_cycle(
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
):
"""Stand-in for dream.run_rem_cycle -- completes in 0.01s."""
await asyncio.sleep(0.01)
return {
"cycle": cycle_num,
"summaries_created": 1,
"schemas_induced": 0,
"schema_candidates": 0,
"claude_call_used": False,
"main_insight_text": None,
"timed_out": False,
}
def _window_covering_now() -> list[int]:
"""A quiet_window [start_bucket, duration] that contains the current local time."""
from iai_mcp.tz import load_user_tz
tz = load_user_tz()
now_local = datetime.now(timezone.utc).astimezone(tz)
cur_bucket = (now_local.hour * 60 + now_local.minute) // 30
start = (cur_bucket - 2) % 48
return [start, 8]
# ---------------------------------------------------------------------------
# Test 1: scheduler_paused=True short-circuits the tick
# ---------------------------------------------------------------------------
def test_scheduler_paused_emits_skip_event_and_returns(tick_env, monkeypatch):
from iai_mcp import daemon as daemon_mod
from iai_mcp.daemon_state import load_state
from iai_mcp.events import query_events
store, lock, state_path, tmp_path = tick_env
state = {
"fsm_state": "WAKE",
"scheduler_paused": True,
"quiet_window": _window_covering_now(),
}
# If the body reaches the REM loop, this mock fails the test.
monkeypatch.setattr(daemon_mod, "run_rem_cycle", AsyncMock(
side_effect=AssertionError("REM loop must not run when paused")
))
asyncio.run(daemon_mod._tick_body(store, lock, state))
# State reports the pause reason.
assert state.get("last_tick_skipped_reason") == "paused"
# Event recorded.
events = query_events(store, kind="daemon_tick_skipped", limit=1)
assert len(events) == 1
assert events[0]["data"]["reason"] == "paused"
# FSM stayed at WAKE.
assert state["fsm_state"] == "WAKE"
# ---------------------------------------------------------------------------
# Test 2: user_sleep_request bypasses quiet-window gate
# ---------------------------------------------------------------------------
def test_user_sleep_request_bypasses_quiet_window(tick_env, monkeypatch):
"""Empty quiet_window + no recent sessions should normally skip the tick
(outside_window). A pending user_sleep_request must override that gate
and actually run the REM loop + clear the flag.
"""
from iai_mcp import daemon as daemon_mod
from iai_mcp.daemon_state import load_state
store, lock, state_path, tmp_path = tick_env
state = {
"fsm_state": "WAKE",
"quiet_window": None, # Empty quiet window -- gate would normally skip.
"user_sleep_request": {
"reason": "I am going to bed now",
"ts": "2026-04-18T23:00:00+00:00",
"pending": True,
},
# Ensure the bootstrap idle check ALSO fails (recent session marker).
"last_session_ts": datetime.now(timezone.utc).isoformat(),
}
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _fast_rem_cycle)
# Skip quiet-window relearn path entirely.
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
asyncio.run(daemon_mod._tick_body(store, lock, state))
# Flag cleared after honoring the request.
assert state["user_sleep_request"]["pending"] is False
assert "honored_at" in state["user_sleep_request"]
# FSM returned to WAKE after the full cycle loop.
assert state["fsm_state"] == "WAKE"
# At least one cycle completed.
assert state.get("last_completed_cycles", 0) >= 1
# State was persisted.
loaded = load_state()
assert loaded["user_sleep_request"]["pending"] is False
# ---------------------------------------------------------------------------
# Test 3: force_rem_request runs EXACTLY ONE REM cycle out of schedule
# ---------------------------------------------------------------------------
def test_force_rem_request_runs_single_cycle(tick_env, monkeypatch):
from iai_mcp import daemon as daemon_mod
store, lock, state_path, tmp_path = tick_env
state = {
"fsm_state": "WAKE",
"quiet_window": None,
"force_rem_request": {
"ts": "2026-04-18T10:00:00+00:00",
"pending": True,
},
# rem_cycle_count=4 -- we want to confirm force_rem overrides this
# with total_cycles=1 (NOT 4).
"rem_cycle_count": 4,
"last_session_ts": datetime.now(timezone.utc).isoformat(),
}
cycle_calls: list[int] = []
async def _tracking_rem(
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
):
cycle_calls.append(cycle_num)
await asyncio.sleep(0.005)
return {
"cycle": cycle_num,
"summaries_created": 0,
"schemas_induced": 0,
"schema_candidates": 0,
"claude_call_used": False,
"main_insight_text": None,
"timed_out": False,
}
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _tracking_rem)
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
asyncio.run(daemon_mod._tick_body(store, lock, state))
# Exactly ONE cycle fired despite rem_cycle_count=4 being set.
assert cycle_calls == [1], (
f"force_rem must bound the loop to 1 cycle, got {cycle_calls}"
)
# Flag cleared.
assert state["force_rem_request"]["pending"] is False
assert state["fsm_state"] == "WAKE"
# ---------------------------------------------------------------------------
# Test 4: force_wake_request mid-night breaks the REM loop early
# ---------------------------------------------------------------------------
def test_force_wake_request_breaks_rem_loop_early(tick_env, monkeypatch):
from iai_mcp import daemon as daemon_mod
from iai_mcp.events import query_events
store, lock, state_path, tmp_path = tick_env
state = {
"fsm_state": "WAKE",
"quiet_window": _window_covering_now(),
"rem_cycle_count": 5,
}
cycle_calls: list[int] = []
async def _rem_sets_force_wake_on_second_cycle(
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
):
cycle_calls.append(cycle_num)
await asyncio.sleep(0.005)
# Halfway into the night, simulate the dispatcher flipping the flag.
# The _tick_body loop checks force_wake_request.pending AFTER each
# cycle completes -- so setting it on cycle 2 breaks before cycle 3.
if cycle_num == 2:
state["force_wake_request"] = {
"ts": datetime.now(timezone.utc).isoformat(),
"pending": True,
}
return {
"cycle": cycle_num,
"summaries_created": 0,
"schemas_induced": 0,
"schema_candidates": 0,
"claude_call_used": False,
"main_insight_text": None,
"timed_out": False,
}
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _rem_sets_force_wake_on_second_cycle)
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
asyncio.run(daemon_mod._tick_body(store, lock, state))
# Loop broke after cycle 2; cycles 3/4/5 never ran.
assert cycle_calls == [1, 2], (
f"force_wake must break the loop after cycle 2, got {cycle_calls}"
)
# Flag cleared.
assert state["force_wake_request"]["pending"] is False
assert "honored_at" in state["force_wake_request"]
# daemon_yielded event emitted with the correct reason.
yield_events = query_events(store, kind="daemon_yielded", limit=5)
reasons = [e["data"].get("reason") for e in yield_events]
assert "force_wake_requested" in reasons, (
f"expected force_wake_requested in {reasons}"
)
# FSM returned cleanly to WAKE.
assert state["fsm_state"] == "WAKE"
# ---------------------------------------------------------------------------
# Test 5: flags work under concurrent state changes (realistic race)
# ---------------------------------------------------------------------------
def test_user_sleep_plus_force_rem_still_bounds_one_cycle(tick_env, monkeypatch):
"""If both user_sleep_request AND force_rem_request are pending (e.g.
the user sent both MCP messages in quick succession), force_rem still
constrains the loop to 1 cycle, and BOTH flags get cleared.
"""
from iai_mcp import daemon as daemon_mod
store, lock, state_path, tmp_path = tick_env
state = {
"fsm_state": "WAKE",
"quiet_window": None,
"user_sleep_request": {
"reason": "bedtime",
"ts": "2026-04-18T23:00:00+00:00",
"pending": True,
},
"force_rem_request": {
"ts": "2026-04-18T23:00:01+00:00",
"pending": True,
},
"rem_cycle_count": 4,
}
cycle_calls: list[int] = []
async def _tracking_rem(
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
):
cycle_calls.append(cycle_num)
await asyncio.sleep(0.005)
return {
"cycle": cycle_num,
"summaries_created": 0,
"schemas_induced": 0,
"schema_candidates": 0,
"claude_call_used": False,
"main_insight_text": None,
"timed_out": False,
}
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _tracking_rem)
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
asyncio.run(daemon_mod._tick_body(store, lock, state))
# force_rem bounded to 1 cycle even though rem_cycle_count=4.
assert cycle_calls == [1]
# Both pending flags cleared.
assert state["user_sleep_request"]["pending"] is False
assert state["force_rem_request"]["pending"] is False
# ---------------------------------------------------------------------------
# Test 6: paused=True state persisted AND surfaced via load_state
# ---------------------------------------------------------------------------
def test_paused_skip_persists_to_disk(tick_env, monkeypatch):
"""save_state must persist scheduler_paused+last_tick_skipped_reason so
a daemon restart observes the same state.
"""
from iai_mcp import daemon as daemon_mod
from iai_mcp.daemon_state import load_state
store, lock, state_path, tmp_path = tick_env
state = {
"fsm_state": "WAKE",
"scheduler_paused": True,
}
asyncio.run(daemon_mod._tick_body(store, lock, state))
loaded = load_state()
assert loaded["last_tick_skipped_reason"] == "paused"
assert loaded["scheduler_paused"] is True
# last_tick_at is an ISO string.
datetime.fromisoformat(loaded["last_tick_at"])

View file

@ -0,0 +1,315 @@
"""Phase 07.9 W5 / — cross-cut data-integrity integration soak.
Exercises the W1-W4 hardening fixes *together* under load shapes that no
per-wave unit test reaches. Each case maps 1:1 to the four CONTEXT.md
D-05 sub-requirements:
1. provenance overflow round-trip under sustained load (W1 / D-01)
2. capture drain partial-failure preserves evidence (W2 / D-02)
3. graph-cache encryption round-trip + plaintext absence (W3 / D-03)
4. anti-hits malformed edge does not crash recall (W4 / D-04)
All cases run against a real ``MemoryStore`` in tmp_path with a
deterministic passphrase fallback (no keyring required).
"""
from __future__ import annotations
import json
import logging
import os
import threading
import time
from datetime import datetime, timezone
from pathlib import Path
from uuid import UUID, uuid4
import pytest
# Deterministic passphrase so encryption paths work without a keyring
# backend on this construction host.
os.environ.setdefault("IAI_MCP_CRYPTO_PASSPHRASE", "test-soak-w5-passphrase")
@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
"""Force keyring fail-backend so the passphrase fallback fires."""
import keyring as _keyring
fake: dict[tuple[str, str], str] = {}
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
monkeypatch.setattr(
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
)
monkeypatch.setattr(
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
)
yield fake
# ============================================================================
# Case 1 — provenance overflow round-trip under sustained load (W1 / D-01)
# ============================================================================
def test_w5_provenance_overflow_sustained_load(tmp_path, monkeypatch):
"""W5 / case 1: drive 10 batches into a queue sized for 2 in-memory
slots while the worker is throttled. Assert zero pairs lost; the spill
dir transient (drains to empty after release + flush)."""
from iai_mcp.provenance_queue import ProvenanceWriteQueue
from iai_mcp.store import MemoryStore
from tests.test_store import _make as _make_record
# Init store BEFORE redirecting HOME so MemoryStore uses the real
# keyring resolver path (then falls through to the passphrase since
# the keyring fail-backend is monkeypatched). Spill dir under HOME
# is exactly what we want isolated to tmp.
store = MemoryStore(path=tmp_path / "store")
r = _make_record()
store.insert(r)
monkeypatch.setenv("HOME", str(tmp_path))
flushed: list = []
release = threading.Event()
real_batch = store.append_provenance_batch
def slow_batch(pairs, records_cache=None):
release.wait(timeout=15.0)
flushed.extend(pairs)
return real_batch(pairs, records_cache=records_cache)
store.append_provenance_batch = slow_batch # type: ignore[method-assign]
q = ProvenanceWriteQueue(
store, coalesce_ms=10, max_queue_size=2, max_batch_pairs=1,
)
q.start()
try:
for i in range(10):
q.enqueue([(r.id, {
"ts": f"t{i}", "cue": f"sustained-{i}", "session_id": "soak",
})])
# Some spilled by now.
time.sleep(0.15)
overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
spilled = list(overflow_dir.glob("*.jsonl"))
assert len(spilled) >= 1, (
f"expected ≥1 spilled file under sustained overload; got {spilled}"
)
# Release the worker — drains in-memory items first.
release.set()
# Production: the worker's idle-poll picks up the spill dir
# every _WORKER_IDLE_POLL_S (5s) when _q is empty. For test
# speed we drive the drain explicitly via the internal helper
# — same code path the worker uses on its idle tick.
deadline = time.time() + 15.0
while time.time() < deadline:
# First let the worker drain whatever's currently in _q.
q.flush(timeout=2.0)
# Then explicitly re-enqueue any spilled files. The worker
# will pull them on the next get() in its outer loop.
q._drain_overflow_dir()
q.flush(timeout=2.0)
if not list(overflow_dir.glob("*.jsonl")):
break
time.sleep(0.05)
finally:
q.stop()
cues = [p[1]["cue"] for p in flushed]
assert sorted(cues) == [f"sustained-{i}" for i in range(10)], (
f"MEM-05 violated: expected all 10 cues exactly once; got {sorted(cues)}"
)
overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
assert list(overflow_dir.glob("*.jsonl")) == []
# ============================================================================
# Case 2 — capture drain partial-failure preserves evidence (W2 / D-02)
# ============================================================================
def test_w5_capture_drain_partial_failure_preserves_evidence(tmp_path, monkeypatch):
"""W5 / case 2: a deferred file with a mixed-success transcript
is renamed .failed-<ts>.jsonl when any event hits insert-failed:*.
Pre-07.9 the file was unlinked with the events permanently lost."""
from iai_mcp.capture import drain_deferred_captures
from iai_mcp.store import MemoryStore
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / ".iai-mcp" / "lance"))
deferred = tmp_path / ".iai-mcp" / ".deferred-captures"
deferred.mkdir(parents=True)
fpath = deferred / "soak-mixed-1.jsonl"
fpath.write_text(
json.dumps({
"version": 1,
"deferred_at": "2026-04-30T00:00:00Z",
"session_id": "soak-2",
"cwd": "/tmp",
}) + "\n"
+ json.dumps({
"cue": "good a", "text": "first valid event with ample length here",
"tier": "episodic", "role": "user",
}) + "\n"
+ json.dumps({
"cue": "poison", "text": "INSERT_FAIL_SENTINEL_W5_SOAK middle event",
"tier": "episodic", "role": "user",
}) + "\n"
+ json.dumps({
"cue": "good b", "text": "third valid event with sufficient text",
"tier": "episodic", "role": "user",
}) + "\n"
)
real_insert = MemoryStore.insert
def insert_or_fail(self, rec):
if "INSERT_FAIL_SENTINEL_W5_SOAK" in rec.literal_surface:
raise RuntimeError("simulated lance failure at soak")
return real_insert(self, rec)
monkeypatch.setattr(MemoryStore, "insert", insert_or_fail)
store = MemoryStore()
counts = drain_deferred_captures(store)
assert not fpath.exists()
failed = list(deferred.glob("soak-mixed-1.failed-*.jsonl"))
assert len(failed) == 1, (
f"expected 1 .failed-* file; got {failed} "
f"(deferred contents: {list(deferred.iterdir())})"
)
assert counts["events_inserted"] == 2, counts
assert counts["events_skipped_insert_failed"] == 1, counts
assert counts["files_drained"] == 0, counts
assert counts["files_failed"] == 1, counts
# ============================================================================
# Case 3 — graph-cache encryption round-trip + plaintext absence (W3 / D-03)
# ============================================================================
def test_w5_graph_cache_encryption_no_plaintext_canary(tmp_path):
"""W5 / case 3: save() with surface containing a canary; the
canary must NOT appear anywhere in the on-disk bytes; try_load
decrypts back to the original surface byte-for-byte."""
from iai_mcp import runtime_graph_cache
from iai_mcp.community import CommunityAssignment
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path / "lancedb")
store.root = tmp_path # cache file under tmp_path
rid = uuid4()
canary = "PLAINTEXT_CANARY_W5_SOAK_aaak_07_9"
node_payload = {
str(rid): {
"embedding": [0.1] * 384,
"surface": canary,
"centrality": 0.3,
"tier": "episodic",
"pinned": False,
"tags": [],
"language": "en",
}
}
assignment = CommunityAssignment(
node_to_community={rid: rid},
community_centroids={rid: [0.1] * 384},
modularity=0.4,
backend="leiden",
top_communities=[rid],
mid_regions={rid: [rid]},
)
rich_club = [rid]
ok = runtime_graph_cache.save(
store, assignment, rich_club,
node_payload=node_payload, max_degree=2,
)
assert ok is True
cache_path = tmp_path / "runtime_graph_cache.json"
raw_bytes = cache_path.read_bytes()
assert canary.encode("utf-8") not in raw_bytes, (
"plaintext canary leaked into the on-disk sidecar"
)
assert raw_bytes.startswith(b"iai:enc:v1:")
loaded = runtime_graph_cache.try_load(store)
assert loaded is not None
_, _, payload, _ = loaded
assert payload[str(rid)]["surface"] == canary
# ============================================================================
# Case 4 — anti-hits malformed edge does not crash recall (W4 / D-04)
# ============================================================================
def test_w5_recall_survives_malformed_anti_edge(tmp_path):
"""W5 / case 4: end-to-end through _find_anti_hits with one
valid + one malformed contradicts edge. The recall pipeline must
survive; the valid anti-hit surfaces; the skip is logged."""
from iai_mcp.graph import MemoryGraph
from iai_mcp.pipeline import _find_anti_hits
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryHit, MemoryRecord
store = MemoryStore(path=tmp_path / "lancedb")
rid_hit = uuid4()
rid_anti = uuid4()
now = datetime.now(timezone.utc)
for rid, surface in [(rid_hit, "primary"), (rid_anti, "anti")]:
store.insert(MemoryRecord(
id=rid, tier="episodic", literal_surface=surface,
aaak_index="", embedding=[0.1] * EMBED_DIM,
community_id=None, centrality=0.0, detail_level=2,
pinned=False, stability=0.0, difficulty=0.0,
last_reviewed=None, never_decay=False, never_merge=False,
provenance=[], created_at=now, updated_at=now,
tags=[], language="en",
))
edges = store.db.open_table("edges")
edges.add([
{"src": str(rid_hit), "dst": str(rid_anti),
"edge_type": "contradicts", "weight": 1.0,
"updated_at": now},
{"src": str(rid_hit), "dst": "not-a-uuid-soak",
"edge_type": "contradicts", "weight": 1.0,
"updated_at": now},
])
hit = MemoryHit(
record_id=rid_hit, score=0.9, reason="soak",
literal_surface="primary", adjacent_suggestions=[],
)
caplog_records: list = []
class _Capture(logging.Handler):
def emit(self, record):
caplog_records.append(record.getMessage())
handler = _Capture(level=logging.WARNING)
logging.getLogger("iai_mcp.pipeline").addHandler(handler)
try:
anti = _find_anti_hits(
[hit], store, MemoryGraph(), k=3, records_cache=None,
)
finally:
logging.getLogger("iai_mcp.pipeline").removeHandler(handler)
assert len(anti) == 1
assert anti[0].record_id == rid_anti
assert any("anti_hits_skip_malformed_edge" in m for m in caplog_records), (
f"expected log line; got {caplog_records}"
)

View file

@ -0,0 +1,108 @@
"""Tests for TOK-08 delta encoding (Plan 02-04 Task 2, D-28).
Hash each session-start component (L0, L1, L2, rich_club). Subsequent turns
send only changed components; unchanged ones are represented by their hash.
On hash miss, fall back to full payload.
"""
from __future__ import annotations
import pytest
def test_hash_component_deterministic():
from iai_mcp.delta import hash_component
a = hash_component("hello world")
b = hash_component("hello world")
c = hash_component("hello world!")
assert a == b
assert a != c
def test_hash_component_returns_hex_string():
from iai_mcp.delta import hash_component
h = hash_component("test")
assert isinstance(h, str)
# sha256 truncated to 16 chars per plan
assert len(h) == 16
# Must be valid hex.
int(h, 16)
def test_build_delta_first_session_returns_full_payload():
from iai_mcp.delta import build_delta
payload = {
"l0": "identity",
"l1": "critical facts",
"l2": ["community a", "community b"],
"rich_club": "hubs",
}
delta, new_hashes = build_delta({}, payload)
# First session: delta must contain every component.
assert "l0" in delta
assert "l1" in delta
assert "l2" in delta
assert "rich_club" in delta
# And hashes for every component.
for k in ("l0", "l1", "l2", "rich_club"):
assert k in new_hashes
def test_build_delta_unchanged_is_empty():
from iai_mcp.delta import build_delta, hash_component
payload = {
"l0": "identity",
"l1": "critical facts",
"l2": ["community a"],
"rich_club": "hubs",
}
_first, hashes = build_delta({}, payload)
# Second call with same payload: delta should be empty.
delta2, _hashes2 = build_delta(hashes, payload)
assert delta2 == {}
def test_build_delta_partial_change():
from iai_mcp.delta import build_delta
payload_a = {
"l0": "identity",
"l1": "critical facts",
"l2": ["community a"],
"rich_club": "hubs",
}
_first, hashes = build_delta({}, payload_a)
payload_b = dict(payload_a)
payload_b["l2"] = ["community a", "community b"]
delta, new_hashes = build_delta(hashes, payload_b)
assert "l2" in delta
assert "l0" not in delta
assert "l1" not in delta
assert "rich_club" not in delta
def test_apply_delta_reconstructs():
from iai_mcp.delta import apply_delta, build_delta
base = {"l0": "a", "l1": "b", "l2": ["x"], "rich_club": "c"}
_first, hashes = build_delta({}, base)
# A second payload where only l0 changed
new = {"l0": "z", "l1": "b", "l2": ["x"], "rich_club": "c"}
delta, _ = build_delta(hashes, new)
reconstructed = apply_delta(base, delta)
assert reconstructed == new
def test_delta_on_hash_miss_returns_full_component():
"""Caller's stale hash -> delta contains the full component."""
from iai_mcp.delta import build_delta
stale = {"l0": "deadbeef00000000", "l1": "cafebabe00000000"}
payload = {"l0": "new", "l1": "facts", "l2": [], "rich_club": ""}
delta, _ = build_delta(stale, payload)
assert "l0" in delta
assert delta["l0"] == "new"
assert "l1" in delta

453
tests/test_doctor.py Normal file
View file

@ -0,0 +1,453 @@
"""Phase 10.4 — regression tests for doctor rows (m) and (n).
Tests cover:
- (m) heartbeat scanner row with fresh wrappers + empty wrappers dir.
- (n) HID idle source row in the macOS-tools-available case + the
fallback case where ``ioreg`` is missing (cross-OS portability).
The CONTEXT 10.4 specification requires:
- Row (m): PASS if wrappers dir readable; display "n=X fresh, Y stale,
Z orphan".
- Row (n): PASS if ``available_signals`` includes ``"HIDIdleTime"``;
WARN otherwise; display includes HID idle seconds + pmset state.
All subprocess interactions in this file are mocked so the suite is
deterministic and runs on non-macOS hosts as well (real ioreg / pmset
calls would make the suite host-dependent).
"""
from __future__ import annotations
import json
import os
import subprocess
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import patch
import pytest
from iai_mcp.idle_detector import IdleStatus
# ---------------------------------------------------------------- fixtures
@pytest.fixture
def wrappers_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""``IAI_MCP_STORE`` -> tmp_path; ensure ``<root>/wrappers/`` exists.
The doctor row (m) resolves the wrappers dir from ``IAI_MCP_STORE``
(test isolation pattern carried from check_i). Returns the wrappers
subdirectory so tests can drop heartbeat fixtures directly.
"""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
wdir = tmp_path / "wrappers"
wdir.mkdir(parents=True)
return wdir
def _write_fresh_heartbeat(wrappers_dir: Path, pid: int, uuid: str) -> Path:
"""Drop a heartbeat file with a current PID and now() timestamp.
Uses ``os.getpid()`` by default so ``_is_pid_alive`` returns True
deterministically caller can override with a known-dead PID.
"""
now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
path = wrappers_dir / f"heartbeat-{pid}-{uuid}.json"
path.write_text(
json.dumps(
{
"pid": pid,
"uuid": uuid,
"started_at": now,
"last_refresh": now,
"wrapper_version": "1.0.0",
"schema_version": 1,
}
)
)
return path
# ---------------------------------------------------------------- row (m)
def test_doctor_row_m_heartbeat_scanner_with_fresh_wrappers(
wrappers_dir: Path,
) -> None:
"""Row (m) PASS with display showing the fresh count when wrappers exist."""
own_pid = os.getpid()
_write_fresh_heartbeat(wrappers_dir, own_pid, "uuid-aaa")
_write_fresh_heartbeat(wrappers_dir, own_pid, "uuid-bbb")
from iai_mcp.doctor import check_m_heartbeat_scanner
result = check_m_heartbeat_scanner()
assert result.status == "PASS"
assert result.passed is True
assert "n=2 fresh" in result.detail
assert "0 stale" in result.detail
assert "0 orphan" in result.detail
def test_doctor_row_m_heartbeat_scanner_empty(wrappers_dir: Path) -> None:
"""Row (m) PASS with display 'n=0 fresh' when wrappers dir is empty."""
from iai_mcp.doctor import check_m_heartbeat_scanner
result = check_m_heartbeat_scanner()
assert result.status == "PASS"
assert result.passed is True
assert "n=0 fresh" in result.detail
def test_doctor_row_m_heartbeat_scanner_dir_absent(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Row (m) PASS with 'not present yet' when wrappers dir absent.
This is the steady-state on a fresh install before any wrapper has
refreshed must NOT report FAIL (the daemon is healthy, the dir
just hasn't been created yet).
"""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
# Note: do NOT mkdir wrappers/ — that's the absent-state we're testing.
from iai_mcp.doctor import check_m_heartbeat_scanner
result = check_m_heartbeat_scanner()
assert result.status == "PASS"
assert result.passed is True
assert "not present yet" in result.detail
# ---------------------------------------------------------------- row (n)
def test_doctor_row_n_hid_idle_source_macos() -> None:
"""Row (n) PASS when IdleDetector reports HIDIdleTime available.
Patches ``IdleDetector.status`` to return a synthetic ``IdleStatus``
with both signals available avoids real ioreg/pmset calls so the
test is deterministic on non-macOS CI hosts as well.
"""
fake_status = IdleStatus(
hid_idle_sec=612,
pmset_recent_sleep=False,
available_signals=["HIDIdleTime", "pmset"],
)
with patch(
"iai_mcp.idle_detector.IdleDetector.status",
return_value=fake_status,
):
from iai_mcp.doctor import check_n_hid_idle_source
result = check_n_hid_idle_source()
assert result.status == "PASS"
assert result.passed is True
assert "HIDIdleTime: 612s" in result.detail
assert "pmset: clean" in result.detail
assert "HIDIdleTime" in result.detail
def test_doctor_row_n_hid_idle_source_missing() -> None:
"""Row (n) WARN when no hardware signals are available.
Patches ``IdleDetector.status`` to return an empty signal list
simulates ioreg + pmset both missing (non-macOS host or broken
install). Must report WARN and ``passed=True`` (advisory; does NOT
flip the doctor exit code, mirroring check_i WARN).
"""
fake_status = IdleStatus(
hid_idle_sec=None,
pmset_recent_sleep=False,
available_signals=[],
)
with patch(
"iai_mcp.idle_detector.IdleDetector.status",
return_value=fake_status,
):
from iai_mcp.doctor import check_n_hid_idle_source
result = check_n_hid_idle_source()
assert result.status == "WARN"
# WARN must NOT flip the gate — passed stays True per CheckResult contract.
assert result.passed is True
assert "HIDIdleTime: unavailable" in result.detail
assert "available: none" in result.detail
assert "fall back to heartbeat-idle only" in result.detail
# ---------------------------------------------------------------- run_diagnosis wire-in
def test_run_diagnosis_includes_rows_m_and_n(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Phase 10.4 wire-in: run_diagnosis() now includes rows (m) and (n)."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.doctor import run_diagnosis
results = run_diagnosis()
names = [r.name for r in results]
m_rows = [r for r in results if "(m)" in r.name]
n_rows = [r for r in results if "(n)" in r.name]
assert len(m_rows) == 1, f"expected exactly one (m) row, got {names}"
assert len(n_rows) == 1, f"expected exactly one (n) row, got {names}"
# (m) must come before (n) in the checklist sequence.
assert names.index(m_rows[0].name) < names.index(n_rows[0].name)
# ----------------- Plan 10.6-01 Task 1.3: rows (j), (k), (l) ------
@pytest.fixture
def lifecycle_state_root(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch,
) -> Path:
"""``IAI_MCP_STORE`` -> tmp_path; lets doctor's resolver point to tmp."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
return tmp_path
def test_doctor_row_j_lifecycle_state_default_when_absent(
lifecycle_state_root: Path,
) -> None:
"""Row (j) PASS reporting WAKE when no lifecycle_state.json exists."""
from iai_mcp.doctor import check_j_lifecycle_current_state
result = check_j_lifecycle_current_state()
assert result.status == "PASS"
assert result.passed is True
assert "WAKE" in result.detail
# shadow_run default for default_state() is True; this test does not
# care about its value, only that the row formats it.
assert "shadow_run=" in result.detail
def test_doctor_row_j_lifecycle_state_reports_drowsy(
lifecycle_state_root: Path,
) -> None:
"""Row (j) reports the recorded state when lifecycle_state.json present."""
from iai_mcp.lifecycle_state import save_state
record = {
"current_state": "DROWSY",
"since_ts": "2026-05-02T15:00:00+00:00",
"last_activity_ts": "2026-05-02T15:00:00+00:00",
"wrapper_event_seq": 7,
"sleep_cycle_progress": None,
"quarantine": None,
"shadow_run": False,
}
save_state(record, lifecycle_state_root / "lifecycle_state.json")
from iai_mcp.doctor import check_j_lifecycle_current_state
result = check_j_lifecycle_current_state()
assert result.status == "PASS"
assert "DROWSY" in result.detail
assert "shadow_run=false" in result.detail
def test_doctor_row_k_lifecycle_history_24h_no_log(
lifecycle_state_root: Path,
) -> None:
"""Row (k) PASS with 'no event log yet' when log dir absent."""
from iai_mcp.doctor import check_k_lifecycle_history_24h
result = check_k_lifecycle_history_24h()
assert result.status == "PASS"
assert "no event log" in result.detail
def test_doctor_row_k_lifecycle_history_24h_zero_transitions(
lifecycle_state_root: Path,
) -> None:
"""Row (k) PASS with '0 transitions' when log dir empty."""
(lifecycle_state_root / "logs").mkdir()
from iai_mcp.doctor import check_k_lifecycle_history_24h
result = check_k_lifecycle_history_24h()
assert result.status == "PASS"
assert "0 transitions" in result.detail
def test_doctor_row_k_lifecycle_history_24h_counts_transitions(
lifecycle_state_root: Path,
) -> None:
"""Row (k) sums state_transition events from today's JSONL file."""
from iai_mcp.lifecycle_event_log import LifecycleEventLog
log = LifecycleEventLog(log_dir=lifecycle_state_root / "logs")
# Three transitions: WAKE->DROWSY, DROWSY->WAKE, DROWSY->SLEEP.
log.append(
{"event": "state_transition", "from": "WAKE", "to": "DROWSY",
"trigger": "idle_5min"}
)
log.append(
{"event": "state_transition", "from": "DROWSY", "to": "WAKE",
"trigger": "heartbeat_refresh"}
)
log.append(
{"event": "state_transition", "from": "DROWSY", "to": "SLEEP",
"trigger": "idle_30min"}
)
# Non-transition event must NOT be counted.
log.append({"event": "wrapper_event", "kind": "boot"})
from iai_mcp.doctor import check_k_lifecycle_history_24h
result = check_k_lifecycle_history_24h()
assert result.status == "PASS"
assert "3 transitions" in result.detail
# Bucket summary names destinations.
assert "DROWSY=" in result.detail
assert "WAKE=" in result.detail
assert "SLEEP=" in result.detail
def test_doctor_row_l_quarantine_none_passes(
lifecycle_state_root: Path,
) -> None:
"""Row (l) PASS when no quarantine record present."""
from iai_mcp.doctor import check_l_sleep_cycle_status
result = check_l_sleep_cycle_status()
assert result.status == "PASS"
assert "no quarantine" in result.detail
def test_doctor_row_l_quarantine_active_short_warns(
lifecycle_state_root: Path,
) -> None:
"""Row (l) WARN for an active quarantine younger than 12 hours."""
from datetime import datetime as _dt
from datetime import timedelta as _td
from datetime import timezone as _tz
from iai_mcp.lifecycle_state import save_state
now = _dt.now(_tz.utc)
since = (now - _td(hours=2)).isoformat()
until = (now + _td(hours=22)).isoformat()
record = {
"current_state": "WAKE",
"since_ts": now.isoformat(),
"last_activity_ts": now.isoformat(),
"wrapper_event_seq": 0,
"sleep_cycle_progress": None,
"quarantine": {
"since_ts": since,
"until_ts": until,
"reason": "sleep step 3 (DREAM_DECAY) failed 3x",
},
"shadow_run": False,
}
save_state(record, lifecycle_state_root / "lifecycle_state.json")
from iai_mcp.doctor import check_l_sleep_cycle_status
result = check_l_sleep_cycle_status()
assert result.status == "WARN"
assert result.passed is True # WARN advisory only
assert "quarantined" in result.detail
assert "DREAM_DECAY" in result.detail
def test_doctor_row_l_quarantine_active_long_fails(
lifecycle_state_root: Path,
) -> None:
"""Row (l) FAIL for a quarantine 12+ hours old."""
from datetime import datetime as _dt
from datetime import timedelta as _td
from datetime import timezone as _tz
from iai_mcp.lifecycle_state import save_state
now = _dt.now(_tz.utc)
since = (now - _td(hours=14)).isoformat() # 14h ago
until = (now + _td(hours=10)).isoformat()
record = {
"current_state": "WAKE",
"since_ts": now.isoformat(),
"last_activity_ts": now.isoformat(),
"wrapper_event_seq": 0,
"sleep_cycle_progress": None,
"quarantine": {
"since_ts": since,
"until_ts": until,
"reason": "sleep step 4 (OPTIMIZE_LANCE) failed 3x",
},
"shadow_run": False,
}
save_state(record, lifecycle_state_root / "lifecycle_state.json")
from iai_mcp.doctor import check_l_sleep_cycle_status
result = check_l_sleep_cycle_status()
assert result.status == "FAIL"
assert result.passed is False # FAIL flips the exit code
assert "reset-quarantine" in result.detail
def test_doctor_row_l_quarantine_expired_passes(
lifecycle_state_root: Path,
) -> None:
"""Row (l) PASS for a quarantine whose until_ts is already in the past."""
from datetime import datetime as _dt
from datetime import timedelta as _td
from datetime import timezone as _tz
from iai_mcp.lifecycle_state import save_state
now = _dt.now(_tz.utc)
since = (now - _td(hours=25)).isoformat()
until = (now - _td(hours=1)).isoformat() # already expired
record = {
"current_state": "WAKE",
"since_ts": now.isoformat(),
"last_activity_ts": now.isoformat(),
"wrapper_event_seq": 0,
"sleep_cycle_progress": None,
"quarantine": {
"since_ts": since,
"until_ts": until,
"reason": "sleep step 5 (COMPACT_RECORDS) failed 3x",
},
"shadow_run": False,
}
save_state(record, lifecycle_state_root / "lifecycle_state.json")
from iai_mcp.doctor import check_l_sleep_cycle_status
result = check_l_sleep_cycle_status()
assert result.status == "PASS"
assert "expired" in result.detail
def test_run_diagnosis_includes_rows_j_k_l_in_order(
lifecycle_state_root: Path,
) -> None:
"""Phase 10.6 wire-in: run_diagnosis returns 14 rows in correct order."""
from iai_mcp.doctor import run_diagnosis
results = run_diagnosis()
names = [r.name for r in results]
# Expect 14 rows: a..i (9), j/k/l (3), m/n (2).
assert len(results) == 14, f"expected 14 rows, got {len(results)}: {names}"
# The new rows are present...
j_idx = next(i for i, r in enumerate(results) if "(j)" in r.name)
k_idx = next(i for i, r in enumerate(results) if "(k)" in r.name)
l_idx = next(i for i, r in enumerate(results) if "(l)" in r.name)
m_idx = next(i for i, r in enumerate(results) if "(m)" in r.name)
# ...and ordered j < k < l < m so the lifecycle block is contiguous.
assert j_idx < k_idx < l_idx < m_idx, (
f"row order broken: j={j_idx} k={k_idx} l={l_idx} m={m_idx}"
)

View file

@ -0,0 +1,361 @@
"""Plan 07-05 Wave 5 R9/A11 acceptance — `iai-mcp doctor --apply --yes`
recovers from `kill -9 <daemon_pid>`.
Flow:
1. Spawn a real `python -m iai_mcp.daemon` against an isolated tmp socket
(HIGH-4 LOCK pattern: IAI_DAEMON_SOCKET_PATH + IAI_MCP_STORE + HOME
env propagation isolates state file too).
2. Wait for socket bind + state file with daemon_pid populated.
3. SIGKILL the daemon.
4. Run `cmd_doctor(args)` with apply=True, yes=True.
5. Assert: rc=0, post-recovery checks all PASS, doctor_action events
written to the events ledger, total elapsed time within budget.
A11 budget: SPEC says 5 s recovery on warm cache. Test uses 15 s safety
budget to absorb cold-cache bge-small load (~3-10 s) + LanceDB store open
(~1 s) + harness overhead same precedent as cold-start tests.
"""
from __future__ import annotations
import argparse
import json
import os
import signal
import subprocess
import sys
import time
from pathlib import Path
import psutil
import pytest
# ---------------------------------------------------------------------------
# Fixture: full HIGH-4 LOCK isolation including HOME for state file
# ---------------------------------------------------------------------------
@pytest.fixture
def isolated_daemon_paths(tmp_path, monkeypatch):
"""HOME + socket + store env overrides isolate the daemon completely.
Setting HOME=tmp_path makes both the test process and any spawned
subprocess agree that ~/.iai-mcp/ resolves to tmp_path/.iai-mcp/.
`daemon_state.STATE_PATH` is also monkeypatched in-process because it
was bound at module import time before our HOME override.
Returns (sock_path, state_path, store_dir, lock_path).
"""
# Real ~/.iai-mcp lives outside tmp; create the parallel iai dir under tmp.
iai_dir = tmp_path / ".iai-mcp"
iai_dir.mkdir(parents=True, exist_ok=True)
state_path = iai_dir / ".daemon-state.json"
lock_path = iai_dir / ".lock"
store_dir = iai_dir / "store"
store_dir.mkdir(parents=True, exist_ok=True)
# Socket lives under /tmp/iai-rec-<pid>-<n>/ (AF_UNIX 104-byte cap).
sock_dir = Path(f"/tmp/iai-rec-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
# CRITICAL: capture the user's real HF cache BEFORE we override HOME.
# Otherwise the spawned daemon's prewarm step (sentence-transformers
# bge-small load) sees an empty HF cache under tmp HOME and tries to
# download the model from HuggingFace — a 60+ second hang. By
# propagating HF_HOME explicitly, the daemon reuses the user's already-
# cached model and prewarm completes in <1s.
real_hf_home = Path.home() / ".cache" / "huggingface"
# HOME propagates to subprocesses via os.environ.copy() — daemon's
# daemon_state module reads Path.home() at import, so subprocess sees
# the tmp HOME and writes to tmp_path/.iai-mcp/.daemon-state.json.
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("HF_HOME", str(real_hf_home))
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
monkeypatch.setenv("IAI_MCP_STORE", str(store_dir))
monkeypatch.setenv("IAI_DAEMON_IDLE_SHUTDOWN_SECS", "99999")
# CRITICAL: force the keyring "fail" backend in the test process too,
# so the doctor's `_respawn_daemon` audit-event write — which goes
# through MemoryStore()._key() → crypto.get_or_create() → keyring —
# triggers the D-GUARD passphrase fallback rather than hanging on
# the macOS Security framework's interactive keychain prompt under
# fresh HOME. The fixture's finally clause resets keyring's cached
# backend so this isolation does NOT leak to subsequent tests.
monkeypatch.setenv(
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
)
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-recovery-passphrase")
# Reset keyring's already-imported backend cache so PYTHON_KEYRING_BACKEND
# takes effect in this process (keyring resolves backend at first
# access and caches; without this nudge, the prior cache wins).
# MemoryStore's per-instance _cached_key is fresh on every MemoryStore()
# construction, so no module-level crypto cache reset is needed.
import keyring.core
keyring.core._keyring_backend = None
# In-process: daemon_state.STATE_PATH was bound at import. Override it
# so the doctor (running in this process) reads the same file the
# spawned daemon writes to.
from iai_mcp import cli, daemon_state
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
monkeypatch.setattr(cli, "LOCK_PATH", lock_path)
monkeypatch.setattr(cli, "SOCKET_PATH", sock_path)
try:
yield sock_path, state_path, store_dir, lock_path
finally:
# Aggressive cleanup: kill any test-spawned daemon by env match
# (avoids touching the user's real production daemon).
_kill_test_daemons(sock_path)
try:
if sock_path.exists():
sock_path.unlink()
except OSError:
pass
try:
sock_dir.rmdir()
except OSError:
pass
# Reset keyring backend so the fail-backend cache doesn't leak
# into subsequent tests in the same pytest process. monkeypatch
# already restored the env var; we just need to force keyring to
# re-resolve on next access.
import keyring.core
keyring.core._keyring_backend = None
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _spawn_daemon(sock_path: Path, store_dir: Path, home: Path) -> subprocess.Popen:
"""Spawn `python -m iai_mcp.daemon` with the test's env propagated.
Adds PYTHON_KEYRING_BACKEND + IAI_MCP_CRYPTO_PASSPHRASE explicitly here
(NOT in the test process env) so the spawned daemon's first write_event
call uses the D-GUARD passphrase fallback instead of hanging on the
macOS Security framework's interactive keychain prompt. Setting these
in-process would poison the test's keyring module cache.
"""
env = os.environ.copy()
env["HOME"] = str(home)
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
env["IAI_MCP_STORE"] = str(store_dir)
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "99999"
# Force fail-backend → passphrase fallback in the daemon subprocess.
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-recovery-passphrase"
return subprocess.Popen(
[sys.executable, "-m", "iai_mcp.daemon"],
env=env,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def _wait_for_socket_and_pid(
sock_path: Path, state_path: Path, expected_pid: int, timeout_sec: float = 30.0
) -> bool:
"""Poll until socket binds AND state file has daemon_pid == expected_pid."""
deadline = time.monotonic() + timeout_sec
while time.monotonic() < deadline:
if sock_path.exists() and state_path.exists():
try:
state = json.loads(state_path.read_text())
if state.get("daemon_pid") == expected_pid:
return True
except (OSError, json.JSONDecodeError):
pass
time.sleep(0.1)
return False
def _wait_for_socket_only(sock_path: Path, timeout_sec: float = 15.0) -> bool:
"""Poll until socket binds (used after respawn to detect new daemon)."""
deadline = time.monotonic() + timeout_sec
while time.monotonic() < deadline:
if sock_path.exists():
return True
time.sleep(0.1)
return False
def _kill_test_daemons(sock_path: Path) -> None:
"""Match-by-env cleanup: SIGTERM any iai_mcp.daemon subprocess whose
psutil environ has our IAI_DAEMON_SOCKET_PATH value.
Avoids killing the user's real production daemon (which has no env
override or a different socket path).
"""
target = str(sock_path)
for p in psutil.process_iter(["pid", "cmdline"]):
try:
cl = " ".join(p.info.get("cmdline") or [])
if "iai_mcp.daemon" not in cl:
continue
try:
env = p.environ()
except (psutil.AccessDenied, psutil.NoSuchProcess):
continue
if env.get("IAI_DAEMON_SOCKET_PATH") == target:
try:
p.send_signal(signal.SIGTERM)
p.wait(timeout=3)
except (psutil.NoSuchProcess, psutil.TimeoutExpired):
try:
p.send_signal(signal.SIGKILL)
except psutil.NoSuchProcess:
pass
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# ---------------------------------------------------------------------------
# Test 1: kill -9 → --apply --yes recovers within budget, all PASS, exit 0
# ---------------------------------------------------------------------------
def test_apply_yes_recovers_from_kill(isolated_daemon_paths):
"""R9/A11 acceptance: simulate kill -9 → cmd_doctor(apply=True, yes=True) →
daemon respawns, socket reappears, all 6 checks PASS, exit 0; doctor_action
events emitted to the events ledger.
"""
sock_path, state_path, store_dir, _ = isolated_daemon_paths
# Boot daemon #1.
proc = _spawn_daemon(sock_path, store_dir, home=Path(os.environ["HOME"]))
try:
assert _wait_for_socket_and_pid(
sock_path, state_path, proc.pid, timeout_sec=30
), (
f"daemon never bound socket + stamped daemon_pid={proc.pid} within 30s"
)
original_pid = proc.pid
# Pre-condition: doctor (no flags) should report at least (a) and (b)
# FAIL after the kill (other checks may also fail, but those two are
# the minimum diagnostic surface per A11).
proc.send_signal(signal.SIGKILL)
proc.wait(timeout=5)
time.sleep(0.5) # let psutil reflect death
from iai_mcp.doctor import cmd_doctor, run_diagnosis
pre_results = run_diagnosis()
pre_fail_names = [r.name for r in pre_results if not r.passed]
assert "(a) daemon process alive" in pre_fail_names, (
f"after kill, check (a) should FAIL; got fails: {pre_fail_names}"
)
assert "(b) socket file fresh" in pre_fail_names, (
f"after kill, check (b) should FAIL; got fails: {pre_fail_names}"
)
# Run the recovery and time it.
t0 = time.monotonic()
args = argparse.Namespace(apply=True, yes=True)
rc = cmd_doctor(args)
elapsed = time.monotonic() - t0
assert rc == 0, (
f"doctor recovery returned rc={rc}, elapsed={elapsed:.2f}s "
"— expected exit 0 (all PASS after recovery)"
)
# 15s safety budget covers cold-cache bge-small + LanceDB open +
# harness overhead; SPEC A11 5s budget is verified by Wave 6
# acceptance against the production warm-cache daemon.
assert elapsed < 15.0, (
f"doctor recovery took {elapsed:.2f}s, exceeds 15s safety budget"
)
# Post-condition: state file has a NEW daemon_pid (respawn worked).
# NOTE: relying on run_diagnosis returning all-PASS already guarantees
# check_a found a live iai_mcp.daemon at the stamped PID; the
# original_pid != new_pid sanity check is belt-and-suspenders.
assert state_path.exists(), "respawned daemon never wrote state file"
s2 = json.loads(state_path.read_text())
new_pid = s2.get("daemon_pid")
assert new_pid is not None, "respawned daemon did not stamp daemon_pid"
assert new_pid != original_pid, (
f"daemon was not actually respawned: same PID {new_pid} after recovery"
)
post_results = run_diagnosis()
post_fails = [r.name for r in post_results if not r.passed]
assert post_fails == [], f"post-recovery FAILs remain: {post_fails}"
# Audit events: at least one doctor_action event for the respawn.
from iai_mcp.events import query_events
from iai_mcp.store import MemoryStore
store = MemoryStore()
recent = query_events(store, kind="doctor_action", limit=10)
assert len(recent) >= 1, (
"doctor_action events not written to ledger after --apply"
)
# At minimum the respawn_daemon action must be present.
action_labels = {e["data"].get("action") for e in recent}
assert "respawn_daemon" in action_labels, (
f"respawn_daemon event missing; saw actions: {action_labels}"
)
finally:
# Best-effort cleanup of the original (already dead) + any respawned daemon.
if proc.poll() is None:
try:
proc.send_signal(signal.SIGKILL)
proc.wait(timeout=5)
except (subprocess.TimeoutExpired, ProcessLookupError):
pass
# _kill_test_daemons is also called by the fixture's finally clause.
# ---------------------------------------------------------------------------
# Test 2: --apply WITHOUT --yes prompts for each destructive action;
# 'n' answer skips the action and the FAIL persists → rc=2.
# ---------------------------------------------------------------------------
def test_apply_no_yes_skips_destructive_action_on_n_response(
isolated_daemon_paths, monkeypatch
):
"""R9 UX: --apply without --yes presents [y/N] prompts; user typing 'n'
skips the destructive action; the unfixed FAIL persists rc=2.
Setup: monkeypatch psutil.process_iter to fabricate one orphan
iai_mcp.core hit (so check (d) FAILs and triggers the kill action).
Then patch builtins.input to return 'n' so the [y/N] prompt
deflects.
"""
sock_path, _, _, _ = isolated_daemon_paths
# Synthetic orphan: causes check (d) to FAIL, which schedules the
# kill_orphan_cores destructive action.
import psutil
class _FakeProc:
def __init__(self, pid: int, cmdline: list[str]):
self.info = {"pid": pid, "cmdline": cmdline}
fake = _FakeProc(99_999, ["python", "-m", "iai_mcp.core"])
monkeypatch.setattr(psutil, "process_iter", lambda *a, **kw: [fake])
# Auto-decline every input prompt.
monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
from iai_mcp.doctor import cmd_doctor
args = argparse.Namespace(apply=True, yes=False)
rc = cmd_doctor(args)
# The orphan FAIL persists (we declined to fix it) and check (a)/(b)
# also fail (no daemon running in the tmp env), so re-check still has
# FAILs → rc=2.
assert rc == 2, (
f"declining destructive action should leave FAILs unfixed → rc=2; got {rc}"
)

View file

@ -0,0 +1,166 @@
"""Plan 07.14-03 [Wave2-Option-C] regression test for doctor row (i).
PASS: <=500 manifests. WARN: 501..2000. FAIL: >2000.
The check reads ``IAI_MCP_STORE/lancedb/records.lance/_versions/*.manifest``
(env-var first, ``~/.iai-mcp`` fallback). Tests redirect ``IAI_MCP_STORE``
at a tmp_path to avoid touching the user's real store.
Status mapping is asserted both via direct call and via ``run_diagnosis()``.
The wire-in test below uses name-based lookup rather than positional / count
assertions so future doctor-row additions (e.g. added rows m, n)
do not break this regression test.
"""
from __future__ import annotations
from pathlib import Path
import pytest
# ----------------------------------------------------------------------
# Fixtures
# ----------------------------------------------------------------------
@pytest.fixture
def fake_versions_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
"""IAI_MCP_STORE -> tmp_path, with records.lance/_versions/ pre-created.
The check resolves ``IAI_MCP_STORE/lancedb/records.lance/_versions``;
fixture creates the directory tree so seeding manifest files is direct.
"""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
versions = tmp_path / "lancedb" / "records.lance" / "_versions"
versions.mkdir(parents=True)
return versions
def _seed(versions_dir: Path, count: int) -> None:
"""Create ``count`` distinct fake manifest files."""
for i in range(count):
(versions_dir / f"{i:020d}.manifest").write_bytes(b"x" * 10)
# ----------------------------------------------------------------------
# Direct check_i tests
# ----------------------------------------------------------------------
def test_pass_at_500(fake_versions_dir: Path) -> None:
"""500 manifests -> PASS (boundary inclusive)."""
_seed(fake_versions_dir, 500)
from iai_mcp.doctor import check_i_lance_versions_count
result = check_i_lance_versions_count()
assert result.status == "PASS"
assert result.passed is True
assert "500" in result.detail
def test_pass_at_low_count(fake_versions_dir: Path) -> None:
"""100 manifests -> PASS (typical post-compaction state)."""
_seed(fake_versions_dir, 100)
from iai_mcp.doctor import check_i_lance_versions_count
result = check_i_lance_versions_count()
assert result.status == "PASS"
assert result.passed is True
assert "100" in result.detail
def test_warn_at_1500(fake_versions_dir: Path) -> None:
"""1500 manifests -> WARN with compact-records hint; still passes the gate."""
_seed(fake_versions_dir, 1500)
from iai_mcp.doctor import check_i_lance_versions_count
result = check_i_lance_versions_count()
assert result.status == "WARN"
# WARN must NOT flip the exit code -- advisory only.
assert result.passed is True
assert "compact-records" in result.detail
def test_warn_boundary_at_2000(fake_versions_dir: Path) -> None:
"""2000 manifests -> WARN (boundary inclusive)."""
_seed(fake_versions_dir, 2000)
from iai_mcp.doctor import check_i_lance_versions_count
result = check_i_lance_versions_count()
assert result.status == "WARN"
assert result.passed is True
def test_fail_at_2500(fake_versions_dir: Path) -> None:
"""2500 manifests -> FAIL with daemon-stop recovery instructions."""
_seed(fake_versions_dir, 2500)
from iai_mcp.doctor import check_i_lance_versions_count
result = check_i_lance_versions_count()
assert result.status == "FAIL"
assert result.passed is False
assert "daemon stop" in result.detail
assert "compact-records" in result.detail
def test_fail_boundary_at_2001(fake_versions_dir: Path) -> None:
"""2001 manifests -> FAIL (boundary just over)."""
_seed(fake_versions_dir, 2001)
from iai_mcp.doctor import check_i_lance_versions_count
result = check_i_lance_versions_count()
assert result.status == "FAIL"
assert result.passed is False
def test_pass_when_dir_missing(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""No records.lance/_versions/ directory -> PASS (fresh install)."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.doctor import check_i_lance_versions_count
result = check_i_lance_versions_count()
assert result.status == "PASS"
assert result.passed is True
assert "not present" in result.detail
# ----------------------------------------------------------------------
# run_diagnosis wire-in: row (i) is present and PASS on a clean store.
# Tests use name-based lookup rather than positional indexing so future
# row additions (Phase 10.4 added m + n) do not regress this check.
# ----------------------------------------------------------------------
def test_run_diagnosis_includes_lance_versions_row(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""Plan 07.14-03 wire-in: run_diagnosis() includes row (i) lance versions."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.doctor import run_diagnosis
results = run_diagnosis()
matching = [
r for r in results
if "(i)" in r.name and "lance" in r.name.lower()
]
assert len(matching) == 1, (
f"expected exactly one (i) lance versions row in run_diagnosis(); "
f"got {len(matching)} from {[r.name for r in results]}"
)
def test_run_diagnosis_lance_row_pass_on_clean_state(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""With IAI_MCP_STORE pointing at a fresh tmp dir, (i) reports PASS."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
from iai_mcp.doctor import run_diagnosis
results = run_diagnosis()
matching = [
r for r in results
if "(i)" in r.name and "lance" in r.name.lower()
]
assert len(matching) == 1
assert matching[0].status == "PASS"
assert matching[0].passed is True

View file

@ -0,0 +1,316 @@
"""Plan 07-05 Wave 5 R9 acceptance — doctor 6-row PASS/FAIL checklist.
Each individual failure scenario produces a FAIL on the matching check
and the doctor exits with the documented code (D7-13: 0=all pass,
1=any FAIL no --apply, 2=--apply but FAIL persists).
Checks (D7-11 ordering):
(a) daemon process alive daemon_pid in .daemon-state.json
(b) socket file fresh connect+status round-trip <250ms
(c) lock file healthy fcntl probe doesn't error
(d) no orphan iai_mcp.core procs psutil scan returns 0
(e) daemon state file valid fsm_state {WAKE, SLEEPING, DREAMING}
(f) lancedb store readable MemoryStore() opens without error
Tests use monkeypatching to construct each failure scenario in isolation
without booting a real daemon (test_doctor_apply_recovery.py covers the
end-to-end recovery scenario with a real subprocess daemon).
"""
from __future__ import annotations
import argparse
import io
import json
import os
import sys
from contextlib import redirect_stdout
from pathlib import Path
import pytest
# ---------------------------------------------------------------------------
# Fixtures: tmp socket + state + lock + store paths
# ---------------------------------------------------------------------------
@pytest.fixture
def short_socket_paths(tmp_path, monkeypatch):
"""Yield (lock_path, sock_path, state_path) under tmp dirs.
AF_UNIX on macOS caps socket paths at ~104 bytes; pytest's tmp_path can
be too long under xdist. Use a short /tmp/iai-doc-<pid>-<n>/ fallback
for the socket.
Monkeypatches:
- IAI_DAEMON_SOCKET_PATH env (read by doctor._resolve_socket_path)
- iai_mcp.daemon_state.STATE_PATH (read by check (a)/(e) load_state)
- iai_mcp.cli.LOCK_PATH (read by check (c) ProcessLock)
- IAI_MCP_STORE env (read by check (f) MemoryStore)
"""
lock_path = tmp_path / ".lock"
sock_dir = Path(f"/tmp/iai-doc-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
state_path = tmp_path / ".daemon-state.json"
store_dir = tmp_path / "store"
store_dir.mkdir(parents=True, exist_ok=True)
from iai_mcp import cli, daemon_state
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
monkeypatch.setenv("IAI_MCP_STORE", str(store_dir))
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
monkeypatch.setattr(cli, "LOCK_PATH", lock_path)
# Also patch cli.SOCKET_PATH as a defensive fallback — doctor's
# _resolve_socket_path prefers the env var, but if env propagation is
# ever removed this guarantees test isolation.
monkeypatch.setattr(cli, "SOCKET_PATH", sock_path)
try:
yield lock_path, sock_path, state_path
finally:
try:
if sock_path.exists():
sock_path.unlink()
except OSError:
pass
try:
sock_dir.rmdir()
except OSError:
pass
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_clean_environment_yields_check_a_fail_exit_1(short_socket_paths, capsys):
"""Clean tmp env (no daemon, no state file) → cmd_doctor returns 1.
Check (a) reports ABSENT (no daemon_pid). Check (e) PASSES (no state file
is acceptable daemon never booted). Other FAILs depend on host process
table for (d), but exit code is 1 either way (any FAIL 1 without --apply).
"""
from iai_mcp.doctor import cmd_doctor
args = argparse.Namespace(apply=False, yes=False)
rc = cmd_doctor(args)
captured = capsys.readouterr()
assert rc == 1, f"expected 1 (FAIL no --apply), got {rc}"
assert "IAI-MCP Doctor" in captured.out
assert "(a) daemon process alive" in captured.out
assert "ABSENT" in captured.out, "check (a) should say ABSENT when no daemon_pid"
@pytest.mark.parametrize(
"scenario,expected_fail_check",
[
("no_daemon_pid", "(a) daemon process alive"),
("dead_pid_in_state", "(a) daemon process alive"),
("stale_socket_unconnectable", "(b) socket file fresh"),
("orphan_core_procs", "(d) no orphan iai_mcp.core procs"),
("corrupt_state_fsm", "(e) daemon state file valid"),
],
)
def test_individual_failure_modes(
scenario, expected_fail_check, short_socket_paths, monkeypatch
):
"""R9: each failure scenario produces a FAIL on the matching check.
Cascading FAILs are allowed (e.g. dead daemon check_a + check_b both
fail) but the named expected_fail_check MUST appear in the FAIL list.
"""
_, sock_path, state_path = short_socket_paths
if scenario == "no_daemon_pid":
# State file absent → check (a) FAIL with ABSENT.
# Default fixture state — nothing more to do.
pass
elif scenario == "dead_pid_in_state":
# Stamp a high PID that almost certainly doesn't exist on a fresh
# macOS / Linux box. Stay well under INT_MAX (2^31-1) so os.kill
# doesn't raise OverflowError before the ProcessLookupError path.
# PID_MAX defaults: macOS 99_999, Linux 4_194_304 — value 2_000_000
# is above both default ranges (effectively guaranteed unallocated).
state_path.write_text(json.dumps({"daemon_pid": 2_000_000, "fsm_state": "WAKE"}))
elif scenario == "stale_socket_unconnectable":
# Create the socket file as a regular file (not a real socket) → connect
# raises ConnectionRefusedError or OSError. check (b) FAIL.
sock_path.write_text("")
elif scenario == "orphan_core_procs":
# Monkeypatch psutil.process_iter to return a synthetic orphan hit.
# Avoids actually spawning python -m iai_mcp.core (which would launch
# a real Python core and pollute the process table for sibling tests).
import psutil
class _FakeProc:
def __init__(self, pid: int, cmdline: list[str]):
self.info = {"pid": pid, "cmdline": cmdline}
fake = _FakeProc(99_999, ["python", "-m", "iai_mcp.core"])
monkeypatch.setattr(
psutil, "process_iter", lambda *a, **kw: [fake]
)
elif scenario == "corrupt_state_fsm":
# Write an invalid fsm_state value → check (e) FAIL.
state_path.write_text(json.dumps({"fsm_state": "INVALID_STATE_VALUE"}))
from iai_mcp.doctor import run_diagnosis
results = run_diagnosis()
fail_names = [r.name for r in results if not r.passed]
assert expected_fail_check in fail_names, (
f"Expected FAIL on '{expected_fail_check}' for scenario '{scenario}'; "
f"got fails: {fail_names}"
)
def test_print_checklist_format_six_rows(short_socket_paths, monkeypatch, capsys):
"""R9: print_checklist always emits 6 PASS/FAIL rows with consistent header.
Forces all 6 checks to PASS via monkeypatching to verify the formatter
handles a fully-green checklist (default scenario in the other tests
only verifies the FAIL path).
"""
from iai_mcp import doctor
forced_results = [
doctor.CheckResult("(a) daemon process alive", True, "PID 99999 (iai_mcp.daemon)"),
doctor.CheckResult("(b) socket file fresh", True, "connected in 5 ms"),
doctor.CheckResult("(c) lock file healthy", True, "acquirable"),
doctor.CheckResult("(d) no orphan iai_mcp.core procs", True, "0 found"),
doctor.CheckResult("(e) daemon state file valid", True, "fsm_state=WAKE"),
doctor.CheckResult("(f) lancedb store readable", True, "opens without error"),
]
doctor.print_checklist(forced_results)
out = capsys.readouterr().out
assert "IAI-MCP Doctor" in out
assert out.count("[PASS]") == 6
assert out.count("[FAIL]") == 0
def test_all_pass_returns_exit_0(short_socket_paths, monkeypatch, capsys):
"""D7-13 exit 0: when run_diagnosis returns all PASS, cmd_doctor returns 0.
Monkeypatches run_diagnosis itself rather than constructing a passing
world the latter requires a real daemon subprocess (covered by
test_doctor_apply_recovery.py).
"""
from iai_mcp import doctor
forced_pass = [
doctor.CheckResult(name, True, "synthetic pass") for name in (
"(a) daemon process alive",
"(b) socket file fresh",
"(c) lock file healthy",
"(d) no orphan iai_mcp.core procs",
"(e) daemon state file valid",
"(f) lancedb store readable",
)
]
monkeypatch.setattr(doctor, "run_diagnosis", lambda: forced_pass)
args = argparse.Namespace(apply=False, yes=False)
rc = doctor.cmd_doctor(args)
out = capsys.readouterr().out
assert rc == 0
assert "All checks passed" in out
def test_apply_without_yes_warns_when_yes_alone(short_socket_paths, monkeypatch, capsys):
"""R9 UX: --yes without --apply prints a warning to stderr but still
runs diagnosis (does not block the user).
"""
from iai_mcp import doctor
args = argparse.Namespace(apply=False, yes=True)
rc = doctor.cmd_doctor(args)
captured = capsys.readouterr()
# The warning goes to stderr.
assert "--yes without --apply is meaningless" in captured.err
# Diagnosis still runs — exit code mirrors check outcome (likely 1
# because no daemon is running in the tmp env).
assert rc in (0, 1)
def test_exit_code_2_when_apply_cannot_fix(short_socket_paths, monkeypatch, capsys):
"""D7-13: --apply runs all repair actions but final re-check still has
FAIL exit 2.
Construct a scenario where the FAIL is unfixable: corrupt fsm_state in
the state file. _plan_repair_actions has no action mapped to check (e),
so the FAIL persists through the re-check and cmd_doctor returns 2.
"""
_, _, state_path = short_socket_paths
# Write an invalid fsm_state so check (e) always FAILs.
state_path.write_text(json.dumps({"fsm_state": "TOTALLY_BOGUS"}))
# Also force every other check to PASS via run_diagnosis monkeypatch
# so we isolate check (e) as the persistent FAIL. The first call returns
# the bogus-state results; the second (after --apply) returns the same.
from iai_mcp import doctor
def _forced_fail_e_only():
return [
doctor.CheckResult("(a) daemon process alive", True, "synthetic"),
doctor.CheckResult("(b) socket file fresh", True, "synthetic"),
doctor.CheckResult("(c) lock file healthy", True, "synthetic"),
doctor.CheckResult("(d) no orphan iai_mcp.core procs", True, "synthetic"),
doctor.CheckResult(
"(e) daemon state file valid",
False,
"fsm_state='TOTALLY_BOGUS' not in [...]",
),
doctor.CheckResult("(f) lancedb store readable", True, "synthetic"),
]
monkeypatch.setattr(doctor, "run_diagnosis", _forced_fail_e_only)
args = argparse.Namespace(apply=True, yes=True)
rc = doctor.cmd_doctor(args)
out = capsys.readouterr().out
assert rc == 2, f"expected 2 (--apply tried but FAIL persists), got {rc}"
assert "STILL BROKEN" in out
assert "(e) daemon state file valid" in out
def test_check_b_returns_fail_when_socket_missing(short_socket_paths):
"""Check (b) returns FAIL with explicit "does not exist" diagnosis when
the socket file is missing entirely (not just unreachable).
"""
_, sock_path, _ = short_socket_paths
# Defensive: ensure socket truly absent.
if sock_path.exists():
sock_path.unlink()
from iai_mcp.doctor import check_b_socket_fresh
result = check_b_socket_fresh()
assert result.passed is False
assert "does not exist" in result.detail
def test_check_e_passes_when_state_file_absent(short_socket_paths):
"""Check (e) PASSES when state file is absent (daemon never booted is
not a bug at this layer check (a) catches it as ABSENT).
"""
_, _, state_path = short_socket_paths
if state_path.exists():
state_path.unlink()
from iai_mcp.doctor import check_e_state_file_valid
result = check_e_state_file_valid()
assert result.passed is True
assert "no state file" in result.detail

View file

@ -0,0 +1,316 @@
"""Phase 07.10 W3 / Plan 05: doctor `check_h_crypto_file_state` + top-of-output hint.
Locks the executable spec for the 8th doctor check row + the migration
remediation hint that prints at the very top of doctor's output when the
file-missing-but-Keychain-entry-exists state is detected (Phase 07.10 D-12).
Detection matrix:
| file present + valid | keyring entry | doctor output |
| yes | any | PASS |
| no | yes | WARN + top-of-output hint pointing at `iai-mcp crypto migrate-to-file` |
| no | no/error | PASS (clean fresh-install state) |
| yes (malformed) | any | FAIL: prints the file's CryptoKeyError message |
These tests run independently of the existing `test_doctor_checklist.py`
fixtures (no daemon socket, no lock file): they only exercise
`check_h_crypto_file_state` directly + the top-of-output hint helper.
"""
from __future__ import annotations
import io
import os
import secrets
from contextlib import redirect_stdout
from pathlib import Path
from unittest.mock import patch
import pytest
# ---------------------------------------------------------------- check_h_crypto_file_state
def test_check_h_pass_when_file_present_and_valid(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""D-12 case 1 — valid 0o600 32-byte key file → PASS.
File-backend resolution honors `IAI_MCP_STORE`; pointing it at tmp_path
makes the lazy `_key_file_path()` return `tmp_path/.crypto.key`. No
keyring touch on the file-present branch.
"""
from iai_mcp.doctor import check_h_crypto_file_state
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(secrets.token_bytes(32))
os.chmod(key_path, 0o600)
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
result = check_h_crypto_file_state()
assert result.status == "PASS", f"unexpected status={result.status} detail={result.detail}"
assert result.passed is True
assert ".crypto.key" in result.detail
def test_check_h_warn_when_file_missing_and_keyring_has_key(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""D-12 case 2 — file absent BUT keyring has a key → WARN with migrate-to-file hint.
Monkeypatches the LOCAL `keyring.get_password` import inside the check
so the test does not actually probe the user's macOS Keychain.
"""
from iai_mcp.doctor import check_h_crypto_file_state
# File absent: nothing at tmp_path/.crypto.key.
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
assert not (tmp_path / ".crypto.key").exists()
# Pretend a Keychain entry exists.
import keyring as _keyring
fake_b64 = "Zm9vYmFyZm9vYmFyZm9vYmFyZm9vYmFyZm9vYmFyZm9vYmE=" # 32-byte plausible base64url
def fake_get(service: str, username: str) -> str | None:
return fake_b64
monkeypatch.setattr(_keyring, "get_password", fake_get)
result = check_h_crypto_file_state()
assert result.status == "WARN", f"unexpected status={result.status} detail={result.detail}"
assert "migrate-to-file" in result.detail.lower()
# WARN must NOT report failure — it does not flip exit code to 1.
assert result.passed is True
def test_check_h_pass_when_file_missing_and_no_keyring(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""D-12 case 3 — file absent AND no Keychain entry → PASS (clean fresh install).
Detail mentions both `crypto init` and `IAI_MCP_CRYPTO_PASSPHRASE`
so a fresh-install user has actionable guidance.
"""
from iai_mcp.doctor import check_h_crypto_file_state
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
assert not (tmp_path / ".crypto.key").exists()
# Simulate "no Keychain entry": get_password returns None.
import keyring as _keyring
def fake_get(service: str, username: str) -> str | None:
return None
monkeypatch.setattr(_keyring, "get_password", fake_get)
result = check_h_crypto_file_state()
assert result.status == "PASS", f"unexpected status={result.status} detail={result.detail}"
assert result.passed is True
# Detail should point fresh-install users at `crypto init` or the passphrase env.
detail_l = result.detail.lower()
assert "init" in detail_l or "passphrase" in detail_l
def test_check_h_pass_when_keyring_backend_unavailable(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""D-12 case 3b — file absent AND keyring NoKeyringError → PASS (clean fresh install).
Linux servers without a Secret Service backend should be treated the
same as 'no Keychain entry detected' not a failure, not a warning.
"""
from iai_mcp.doctor import check_h_crypto_file_state
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
assert not (tmp_path / ".crypto.key").exists()
import keyring as _keyring
import keyring.errors as _keyring_errors
def raise_no_backend(service: str, username: str) -> str | None:
raise _keyring_errors.NoKeyringError("no backend available (test-stub)")
monkeypatch.setattr(_keyring, "get_password", raise_no_backend)
result = check_h_crypto_file_state()
assert result.status == "PASS", f"unexpected status={result.status} detail={result.detail}"
assert result.passed is True
def test_check_h_fail_when_file_malformed(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""D-12 case 4 — file exists but has wrong length → FAIL with `wrong length` in detail."""
from iai_mcp.doctor import check_h_crypto_file_state
key_path = tmp_path / ".crypto.key"
# Wrong length: 31 bytes instead of 32.
key_path.write_bytes(b"\x00" * 31)
os.chmod(key_path, 0o600)
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
result = check_h_crypto_file_state()
assert result.status == "FAIL", f"unexpected status={result.status} detail={result.detail}"
assert result.passed is False
assert "wrong length" in result.detail.lower() or "malformed" in result.detail.lower()
# ---------------------------------------------------------------- top-of-output hint helper
def test_format_top_of_output_hint_emits_line_when_check_h_warns() -> None:
"""D-12 — when a WARN row for check_h is present, the helper emits a `> hint:` line
that names `migrate-to-file` so the user sees the fix BEFORE the row-by-row print.
"""
from iai_mcp.doctor import CheckResult, _format_top_of_output_hint
results = [
CheckResult("(a) daemon process alive", True, "PID 12345 (iai_mcp.daemon)", status="PASS"),
CheckResult(
"(h) crypto key file state",
True,
"crypto key file missing at /tmp/x/.crypto.key, but a Keychain entry was found.\n"
" Run `iai-mcp crypto migrate-to-file` from a Terminal to migrate the key.",
status="WARN",
),
]
hint = _format_top_of_output_hint(results)
assert hint is not None, "WARN row for check_h must produce a hint"
assert hint.startswith("> hint:"), f"hint must be prefixed with `> hint:`, got: {hint!r}"
assert "migrate-to-file" in hint, f"hint must name migrate-to-file, got: {hint!r}"
def test_format_top_of_output_hint_returns_none_when_no_warn() -> None:
"""No WARN row → no hint."""
from iai_mcp.doctor import CheckResult, _format_top_of_output_hint
results = [
CheckResult("(a) daemon process alive", True, "PID 12345 (iai_mcp.daemon)", status="PASS"),
CheckResult("(h) crypto key file state", True, "key file present", status="PASS"),
]
assert _format_top_of_output_hint(results) is None
# ---------------------------------------------------------------- run_diagnosis includes check_h
def test_run_diagnosis_includes_check_h(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
"""D-12 wire-in -- `run_diagnosis()` includes the check_h crypto-key row.
Originally a positional assertion (8th row); rewritten to name-based
lookup so subsequent doctor-row additions (Phase 10.4 added m + n)
do not regress this contract. The (h) and (i) rows must both be
present in the returned list.
Uses IAI_MCP_STORE pointing at tmp_path and a valid key file so check_h
returns PASS without hitting the user's real keyring or filesystem.
"""
from iai_mcp.doctor import run_diagnosis
key_path = tmp_path / ".crypto.key"
key_path.write_bytes(secrets.token_bytes(32))
os.chmod(key_path, 0o600)
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
# Other checks may FAIL in this environment (no daemon running) -- that's
# fine, we only assert (h) and (i) are present by name.
results = run_diagnosis()
h_rows = [r for r in results if "(h)" in r.name and "crypto" in r.name.lower()]
assert len(h_rows) == 1, (
f"expected exactly one (h) crypto row in run_diagnosis(); "
f"got {len(h_rows)} from {[r.name for r in results]}"
)
i_rows = [r for r in results if "(i)" in r.name and "lance" in r.name.lower()]
assert len(i_rows) == 1, (
f"expected exactly one (i) lance versions row in run_diagnosis(); "
f"got {len(i_rows)} from {[r.name for r in results]}"
)
# ---------------------------------------------------------------- cmd_doctor wire-in (advisor-driven)
def test_cmd_doctor_prints_hint_at_top_when_check_h_warns(
monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture
) -> None:
"""D-12 wire-in pin (advisor) — cmd_doctor MUST call _format_top_of_output_hint
BEFORE print_checklist so the hint appears at the very top of stdout.
Rationale: helper-level tests verify the helper produces the right string,
and run_diagnosis() returns 8 rows but neither verifies that cmd_doctor
actually wires the helper into the print path. A future refactor that
drops the 3-line `if hint is not None: print(hint); print()` block in
cmd_doctor would not break any other test in this file. This test pins
the placement-at-top guarantee.
Strategy: monkeypatch `doctor.run_diagnosis` to return a synthetic 8-row
list with one WARN row (avoids mocking daemon-state/socket/lock/store/lsof
simultaneously). Capture stdout and assert the `> hint:` line index is
BEFORE the row-by-row checklist header.
"""
import argparse
from iai_mcp import doctor as _doctor
synthetic = [
_doctor.CheckResult("(a) daemon process alive", True, "synthetic", status="PASS"),
_doctor.CheckResult("(b) socket file fresh", True, "synthetic", status="PASS"),
_doctor.CheckResult("(c) lock file healthy", True, "synthetic", status="PASS"),
_doctor.CheckResult("(d) no orphan iai_mcp.core procs", True, "synthetic", status="PASS"),
_doctor.CheckResult("(e) daemon state file valid", True, "synthetic", status="PASS"),
_doctor.CheckResult("(f) lancedb store readable", True, "synthetic", status="PASS"),
_doctor.CheckResult("(g) no dup binders", True, "synthetic", status="PASS"),
_doctor.CheckResult(
"(h) crypto key file state",
True,
(
"crypto key file missing at /tmp/.crypto.key, but a Keychain entry was found.\n"
" Run `iai-mcp crypto migrate-to-file` from a Terminal to migrate the key."
),
status="WARN",
),
]
monkeypatch.setattr(_doctor, "run_diagnosis", lambda: synthetic)
args = argparse.Namespace(apply=False, yes=False)
rc = _doctor.cmd_doctor(args)
captured = capsys.readouterr().out
hint_idx = captured.find("> hint:")
header_idx = captured.find("IAI-MCP Doctor")
assert hint_idx >= 0, f"expected `> hint:` line in stdout, got:\n{captured!r}"
assert header_idx >= 0, f"expected checklist header in stdout, got:\n{captured!r}"
assert hint_idx < header_idx, (
f"hint (idx {hint_idx}) must appear BEFORE checklist header (idx {header_idx})\n"
f"stdout was:\n{captured}"
)
# The hint must name the actionable command.
assert "migrate-to-file" in captured[: header_idx], (
f"hint must name `migrate-to-file` ABOVE the checklist header; "
f"top-of-output region was: {captured[:header_idx]!r}"
)
# Exit code: WARN does NOT flip to 1 (advisory only); rc must be 0.
assert rc == 0, f"WARN rows must not change exit code; got rc={rc}"
# ---------------------------------------------------------------- CheckResult back-compat
def test_check_result_three_arg_constructor_still_works() -> None:
"""Phase 07.10 (Rule 1 deviation): adding `status` to CheckResult must NOT
break existing tests that construct it with 3 positional args
(test_doctor_checklist.py uses the 3-arg form ~14 times).
"""
from iai_mcp.doctor import CheckResult
r_pass = CheckResult("(x) example", True, "ok")
assert r_pass.passed is True
assert r_pass.detail == "ok"
# Default status must be derived from `passed` so legacy 3-arg construction
# produces a sensible value.
assert r_pass.status in ("PASS", "FAIL")
assert r_pass.status == "PASS"
r_fail = CheckResult("(y) example", False, "broken")
assert r_fail.status == "FAIL"

View file

@ -0,0 +1,622 @@
"""Phase 7.1 R6 / D7.1-05 — doctor.py multi-binder detection + repair.
Test matrix (8 tests):
A. _extract_binder_pids parses lsof -F pn output set[int]
B. _extract_binder_pids skips PIDs bound to UNRELATED sockets
C. _extract_binder_pids handles empty input empty set
D. check_g_no_dup_binders skips when socket file absent (PASS-with-skip)
E. check_g_no_dup_binders PASSes with single binder (multiprocessing worker)
F. check_g_no_dup_binders FAILs with two binders (regression-trap centerpiece)
G. _kill_dup_binders keeps oldest, kills the rest (real subprocess daemons)
H. iai-mcp doctor --apply --yes recovers from dup-binder scenario (e2e)
A-D: pure unit tests, no daemon, fast (<1s combined).
E-F: in-process multiprocessing workers distinct PIDs, lsof-visible.
G-H: real iai_mcp.daemon subprocesses required because _kill_dup_binders
filters by 'iai_mcp.daemon' substring in psutil cmdline (wrong-PID-kill
mitigation). Isolated by HIGH-4 LOCK env propagation pattern from
test_doctor_apply_recovery.py:isolated_daemon_paths.
Skip on non-POSIX (AF_UNIX requirement).
"""
from __future__ import annotations
import argparse
import multiprocessing as mp
import os
import platform
import signal
import socket
import subprocess
import sys
import time
from pathlib import Path
import psutil
import pytest
pytestmark = pytest.mark.skipif(
platform.system() == "Windows",
reason="POSIX AF_UNIX required (lsof -U + multiprocessing socket binders)",
)
# ---------------------------------------------------------------------------
# Section 1 — pure unit tests for _extract_binder_pids (A, B, C)
# ---------------------------------------------------------------------------
def test_extract_binder_pids_parses_lsof_output():
"""A: hand-crafted lsof -F pn output → expected PID set.
lsof -F pn format alternates lines `p<pid>` and `n<filename>`. Each
PID is followed by 0+ name entries until the next `p<pid>`.
"""
from iai_mcp.doctor import _extract_binder_pids
target = Path("/tmp/iai-test/d.sock")
lsof_output = "\n".join([
"p12345",
f"n{target}",
"p67890",
f"n{target}",
"p99999",
"n/tmp/other-app/socket",
])
pids = _extract_binder_pids(lsof_output, target)
assert pids == {12345, 67890}, f"expected {{12345, 67890}}, got {pids}"
def test_extract_binder_pids_skips_unrelated_sockets():
"""B: lsof output with multiple sockets; only PIDs holding OUR path are returned."""
from iai_mcp.doctor import _extract_binder_pids
target = Path("/tmp/iai-test/d.sock")
lsof_output = "\n".join([
"p1001",
"n/var/run/some-other-daemon.sock",
"p2002",
f"n{target}",
"p3003",
"n/tmp/X11-unix/X0",
"p4004",
f"n{target}",
"n/some/extra/name/for/p4004", # PID 4004 holds multiple fds
])
pids = _extract_binder_pids(lsof_output, target)
assert pids == {2002, 4004}, f"expected {{2002, 4004}}, got {pids}"
def test_extract_binder_pids_handles_empty_output():
"""C: empty input → empty set (defensive corner case)."""
from iai_mcp.doctor import _extract_binder_pids
target = Path("/tmp/anywhere.sock")
assert _extract_binder_pids("", target) == set()
assert _extract_binder_pids("\n\n\n", target) == set()
# Malformed: PID line without name line; name line without preceding PID.
assert _extract_binder_pids("p123\nXgarbage\np\n", target) == set()
# ---------------------------------------------------------------------------
# Section 2 — check_g_no_dup_binders (D, E, F) using monkeypatched socket path
# ---------------------------------------------------------------------------
@pytest.fixture
def short_socket_path(tmp_path, monkeypatch):
"""Yield a short socket path under /tmp (AF_UNIX 104-byte cap on macOS).
Honors the IAI_DAEMON_SOCKET_PATH env override that doctor._resolve_socket_path
consults. Cleans up the socket file on teardown.
"""
sock_dir = Path(f"/tmp/iai-mb-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
try:
yield sock_path
finally:
try:
if sock_path.exists():
sock_path.unlink()
except OSError:
pass
try:
sock_dir.rmdir()
except OSError:
pass
def test_check_g_no_socket_skips(short_socket_path, monkeypatch):
"""D: socket file absent → PASS-with-skip detail "no socket file (skip)".
Mirrors check_d_no_orphan_core's skip pattern when the resource isn't
present (no false-positive on a clean machine).
"""
from iai_mcp.doctor import check_g_no_dup_binders
# Fixture set the env var; ensure no file exists.
assert not short_socket_path.exists()
result = check_g_no_dup_binders()
assert result.passed is True
assert "no socket file" in result.detail
# --- Multiprocessing worker for Tests E and F (distinct PIDs) ---------------
def _bind_socket_worker(sock_path_str: str, ready_event: mp.Event, exit_event: mp.Event) -> None:
"""Subprocess worker: bind an AF_UNIX socket to sock_path, signal ready,
block until exit_event is set.
Each multiprocessing.Process child has a distinct PID and lsof reports
its socket fd. Used by Tests E (1 binder) and F (2 binders) to construct
deterministic dup-binder scenarios without a real iai_mcp.daemon (whose
boot cost is ~3-10s).
"""
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
try:
# Each worker handles its own bind; for the 2-binder scenario, the
# parent unlinks the path between worker spawns so each worker
# successfully bind()s a fresh inode at the same name.
s.bind(sock_path_str)
s.listen(5)
ready_event.set()
# Block until parent signals shutdown.
exit_event.wait(timeout=30)
finally:
try:
s.close()
except OSError:
pass
def test_check_g_single_binder_passes(short_socket_path):
"""E: ONE binder bound to the socket → check_g returns PASS with "1 binder(s)".
Uses a multiprocessing.Process worker (distinct PID from the pytest
process) so lsof has something to enumerate.
"""
from iai_mcp.doctor import check_g_no_dup_binders
# NOTE: use 'spawn' (not 'fork') even on Darwin — lancedb is not fork-safe
# (UserWarning surfaces with fork on macOS). Workers don't touch lancedb,
# but the parent test process has it imported transitively; spawn isolates.
ctx = mp.get_context("spawn")
ready = ctx.Event()
exit_signal = ctx.Event()
worker = ctx.Process(
target=_bind_socket_worker,
args=(str(short_socket_path), ready, exit_signal),
)
worker.start()
try:
assert ready.wait(timeout=10), "binder worker never signaled ready"
# Tiny settle so lsof's cache reflects the bind.
time.sleep(0.2)
result = check_g_no_dup_binders()
assert result.passed is True, (
f"single-binder scenario should PASS; got detail={result.detail!r}"
)
assert "1 binder" in result.detail, f"unexpected detail: {result.detail!r}"
finally:
exit_signal.set()
worker.join(timeout=5)
if worker.is_alive():
worker.terminate()
worker.join(timeout=2)
def test_check_g_two_binders_fails(short_socket_path):
"""F: TWO binders bound to the same socket path → check_g returns FAIL.
REGRESSION-TRAP CENTERPIECE. Spawns 2 multiprocessing workers, each
binding to the same socket path with an unlink between them so both
bind() calls succeed at the OS level. lsof reports both PIDs as
holding the path; check_g detects the singleton-invariant violation.
This is exactly the failure mode Phase 7.1's launchd architecture
structurally prevents in production the test bypasses launchd by
hand-binding sockets in worker processes. On post-Phase 7.1 production,
this scenario can only occur if a user manually bypasses launchd.
"""
from iai_mcp.doctor import _extract_binder_pids, check_g_no_dup_binders
# NOTE: use 'spawn' (not 'fork') even on Darwin — lancedb is not fork-safe
# (UserWarning surfaces with fork on macOS). Workers don't touch lancedb,
# but the parent test process has it imported transitively; spawn isolates.
ctx = mp.get_context("spawn")
# Worker 1
ready1 = ctx.Event()
exit1 = ctx.Event()
w1 = ctx.Process(
target=_bind_socket_worker,
args=(str(short_socket_path), ready1, exit1),
)
w1.start()
# Worker 2 — race-window simulation: unlink the path so worker 2's bind()
# creates a fresh inode at the same name. Worker 1's fd still holds the
# ORIGINAL inode (unlinked but kept alive by the open fd); worker 2 holds
# the NEW inode at the same path. lsof reports both PIDs.
ready2 = ctx.Event()
exit2 = ctx.Event()
w2 = None
try:
assert ready1.wait(timeout=10), "worker 1 never signaled ready"
# Unlink so the second bind doesn't EADDRINUSE.
try:
short_socket_path.unlink()
except OSError:
pass
w2 = ctx.Process(
target=_bind_socket_worker,
args=(str(short_socket_path), ready2, exit2),
)
w2.start()
assert ready2.wait(timeout=10), "worker 2 never signaled ready"
time.sleep(0.3) # let lsof catch up
# Belt-and-suspenders: confirm via the parser directly that lsof sees both.
lsof_out = subprocess.run(
["lsof", "-U", "-F", "pn"],
capture_output=True,
text=True,
timeout=5,
check=False,
).stdout
binder_pids = _extract_binder_pids(lsof_out, short_socket_path)
assert {w1.pid, w2.pid}.issubset(binder_pids), (
f"lsof should report both worker PIDs as binders; got {binder_pids} "
f"(workers: {w1.pid}, {w2.pid})"
)
# Centerpiece assertion: check_g detects the dup-binder scenario.
result = check_g_no_dup_binders()
assert result.passed is False, (
f"two-binder scenario should FAIL; got detail={result.detail!r}"
)
# Detail mentions both PIDs.
assert str(w1.pid) in result.detail, f"detail missing PID {w1.pid}: {result.detail!r}"
assert str(w2.pid) in result.detail, f"detail missing PID {w2.pid}: {result.detail!r}"
finally:
exit1.set()
if w2 is not None:
exit2.set()
for proc in (w1, w2):
if proc is None:
continue
proc.join(timeout=5)
if proc.is_alive():
proc.terminate()
proc.join(timeout=2)
# ---------------------------------------------------------------------------
# Section 3 — _kill_dup_binders + e2e doctor --apply (G, H)
# ---------------------------------------------------------------------------
@pytest.fixture
def isolated_daemon_paths(tmp_path, monkeypatch):
"""HOME + socket + store + crypto env propagation for real-daemon tests.
Mirrors test_doctor_apply_recovery.py:isolated_daemon_paths verbatim
(HIGH-4 LOCK precedent, Plan 07-04). Required because _kill_dup_binders
filters by 'iai_mcp.daemon' substring in psutil cmdline only real
iai_mcp.daemon subprocesses are killable, so multiprocessing workers
cannot serve Tests G/H.
"""
iai_dir = tmp_path / ".iai-mcp"
iai_dir.mkdir(parents=True, exist_ok=True)
state_path = iai_dir / ".daemon-state.json"
lock_path = iai_dir / ".lock"
store_dir = iai_dir / "store"
store_dir.mkdir(parents=True, exist_ok=True)
sock_dir = Path(f"/tmp/iai-mb2-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
real_hf_home = Path.home() / ".cache" / "huggingface"
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("HF_HOME", str(real_hf_home))
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
monkeypatch.setenv("IAI_MCP_STORE", str(store_dir))
monkeypatch.setenv("IAI_DAEMON_IDLE_SHUTDOWN_SECS", "99999")
monkeypatch.setenv(
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
)
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-mb-passphrase")
import keyring.core
keyring.core._keyring_backend = None
from iai_mcp import cli, daemon_state
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
monkeypatch.setattr(cli, "LOCK_PATH", lock_path)
monkeypatch.setattr(cli, "SOCKET_PATH", sock_path)
try:
yield sock_path, state_path, store_dir, lock_path
finally:
_kill_test_daemons(sock_path)
try:
if sock_path.exists():
sock_path.unlink()
except OSError:
pass
try:
sock_dir.rmdir()
except OSError:
pass
keyring.core._keyring_backend = None
def _spawn_daemon(sock_path: Path, store_dir: Path, home: Path) -> subprocess.Popen:
"""Spawn `python -m iai_mcp.daemon` with the test's env propagated."""
env = os.environ.copy()
env["HOME"] = str(home)
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
env["IAI_MCP_STORE"] = str(store_dir)
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "99999"
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-mb-passphrase"
return subprocess.Popen(
[sys.executable, "-m", "iai_mcp.daemon"],
env=env,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def _wait_for_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool:
deadline = time.monotonic() + timeout_sec
while time.monotonic() < deadline:
if sock_path.exists():
return True
time.sleep(0.1)
return False
def _kill_test_daemons(sock_path: Path) -> None:
"""Match-by-env cleanup: SIGTERM iai_mcp.daemon subprocesses whose
psutil environ has our IAI_DAEMON_SOCKET_PATH value. Avoids touching
the user's real production daemon.
"""
target = str(sock_path)
for p in psutil.process_iter(["pid", "cmdline"]):
try:
cl = " ".join(p.info.get("cmdline") or [])
if "iai_mcp.daemon" not in cl:
continue
try:
env = p.environ()
except (psutil.AccessDenied, psutil.NoSuchProcess):
continue
if env.get("IAI_DAEMON_SOCKET_PATH") == target:
try:
p.send_signal(signal.SIGTERM)
p.wait(timeout=3)
except (psutil.NoSuchProcess, psutil.TimeoutExpired):
try:
p.send_signal(signal.SIGKILL)
except psutil.NoSuchProcess:
pass
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
def _spawn_dup_daemons(
sock_path: Path, store_dir: Path, home: Path
) -> tuple[subprocess.Popen, subprocess.Popen]:
"""Spawn 2 real iai_mcp.daemon subprocesses both bound to sock_path.
Race-window simulation per CONTEXT.md hint: spawn daemon #1, wait for
socket, unlink (so daemon #2 can bind a fresh inode at the same path),
spawn daemon #2, wait for socket. Daemon #1's listening fd still holds
the original (now unlinked) inode; daemon #2 holds the new inode. lsof
reports both PIDs as binders of the same path.
"""
p1 = _spawn_daemon(sock_path, store_dir, home)
if not _wait_for_socket(sock_path, timeout_sec=30):
try:
p1.kill()
except ProcessLookupError:
pass
raise AssertionError("daemon #1 never bound socket within 30s")
# Race-window: unlink so daemon #2's bind() succeeds without EADDRINUSE.
try:
sock_path.unlink()
except OSError:
pass
p2 = _spawn_daemon(sock_path, store_dir, home)
if not _wait_for_socket(sock_path, timeout_sec=30):
try:
p2.kill()
except ProcessLookupError:
pass
try:
p1.kill()
except ProcessLookupError:
pass
raise AssertionError("daemon #2 never bound socket within 30s")
# Settle so lsof reflects both binders.
time.sleep(0.5)
return p1, p2
@pytest.mark.skip(
reason=(
"Phase 10.6 Plan 10.6-01 Task 1.5: single-machine "
"LifecycleLock prevents two daemons from both binding the same "
"IAI_MCP_STORE. Daemon #2 raises LifecycleLockConflict and exits "
"1 before bind. The dup-binder integration scenario is now "
"impossible by design. The unit tests in this file "
"(test_extract_binder_pids_*, test_check_g_*) still cover "
"check_g's detection logic without spawning two real daemons."
)
)
def test_kill_dup_binders_keeps_oldest(isolated_daemon_paths):
"""G: 2 real daemons → _kill_dup_binders kills younger, keeps oldest.
Re-running check_g afterward returns PASS (1 binder remaining).
"""
from iai_mcp.doctor import (
_extract_binder_pids,
_kill_dup_binders,
check_g_no_dup_binders,
)
sock_path, _, store_dir, _ = isolated_daemon_paths
home = Path(os.environ["HOME"])
p1, p2 = _spawn_dup_daemons(sock_path, store_dir, home)
try:
# Pre-condition: both daemons must show up as binders for our socket.
lsof_out = subprocess.run(
["lsof", "-U", "-F", "pn"],
capture_output=True,
text=True,
timeout=5,
check=False,
).stdout
binders = _extract_binder_pids(lsof_out, sock_path)
assert {p1.pid, p2.pid}.issubset(binders), (
f"expected both daemon PIDs in binders; got {binders} "
f"(daemons: {p1.pid}, {p2.pid})"
)
pre_check = check_g_no_dup_binders()
assert pre_check.passed is False, (
f"pre-condition: dup-binder scenario should FAIL check_g; "
f"got {pre_check.detail!r}"
)
# Kill the younger daemon. p1 was spawned first → has greater etime →
# is the keep_pid; p2 should be killed.
ok, msg, ms = _kill_dup_binders()
assert ok is True, f"_kill_dup_binders returned ok=False: {msg}"
assert "kept PID" in msg, f"msg missing 'kept PID': {msg!r}"
assert "killed" in msg, f"msg missing 'killed': {msg!r}"
assert ms < 10_000, f"_kill_dup_binders took {ms}ms (>10s); too slow"
# After kill, a follow-up check_g should report 1 (or 0 — race) binder.
post_check = check_g_no_dup_binders()
assert post_check.passed is True, (
f"post-kill check_g should PASS; got {post_check.detail!r}"
)
# The kept daemon (p1) should still be alive; the other should be dead
# within a generous timeout (kill is SIGKILL, instant on macOS).
assert p1.poll() is None, "expected oldest daemon (p1) to survive"
# Allow up to 2s for SIGKILL signal delivery + reap.
deadline = time.monotonic() + 5.0
while time.monotonic() < deadline and p2.poll() is None:
time.sleep(0.1)
assert p2.poll() is not None, "expected younger daemon (p2) to be dead"
finally:
for proc in (p1, p2):
if proc.poll() is None:
try:
proc.send_signal(signal.SIGKILL)
proc.wait(timeout=3)
except (subprocess.TimeoutExpired, ProcessLookupError):
pass
@pytest.mark.skip(
reason=(
"Phase 10.6 Plan 10.6-01 Task 1.5: single-machine "
"LifecycleLock prevents two daemons from both binding the same "
"IAI_MCP_STORE. Daemon #2 raises LifecycleLockConflict and exits "
"1 before bind. End-to-end recovery from dup-binders cannot run "
"because the dup-binders state is now impossible to construct."
)
)
def test_doctor_apply_yes_recovers_from_dup_binders(isolated_daemon_paths):
"""H: end-to-end. 2 dup-binder daemons → cmd_doctor(apply=True, yes=True)
drives the kill_dup_binders repair re-check returns 0 OR exit 2 only
if a non-related check (e.g., (a) state desync) FAILs.
NB: spawning two real daemons against the same socket inevitably leaves
daemon-state.json pointing at one of the two PIDs (whichever wrote last).
After kill_dup_binders, if the survivor is the one daemon-state recorded,
check_a passes; if the survivor is the OTHER daemon, check_a FAILs and the
respawn action triggers, which (because the surviving daemon already binds
the socket) yields a launchd-react-noop OR a benign respawn-timeout. The
relevant assertion for THIS test is the dup-binder repair specifically:
after recovery, lsof reports exactly 1 binder for our socket path. The
overall rc and check_a status are looser assertions because they depend
on the state-file-vs-survivor coincidence.
"""
from iai_mcp.doctor import (
_extract_binder_pids,
check_g_no_dup_binders,
cmd_doctor,
)
sock_path, _, store_dir, _ = isolated_daemon_paths
home = Path(os.environ["HOME"])
p1, p2 = _spawn_dup_daemons(sock_path, store_dir, home)
try:
# Sanity: dup-binder is detectable.
pre = check_g_no_dup_binders()
assert pre.passed is False, f"pre: dup-binder should FAIL; got {pre.detail!r}"
args = argparse.Namespace(apply=True, yes=True)
rc = cmd_doctor(args)
# The critical observable: dup-binders cleared.
post_check = check_g_no_dup_binders()
assert post_check.passed is True, (
f"post-recovery: check_g should PASS; got {post_check.detail!r}"
)
# rc may be 0 (everything green) or 2 (only check_a survived as FAIL
# because state-file PID points at the killed survivor); both prove
# the dup-binder repair mechanism worked. rc=1 would mean --apply
# never ran the repair (regression).
assert rc in (0, 2), (
f"cmd_doctor rc={rc} unexpected; allowed 0 (full recovery) or 2 "
f"(dup-binders fixed but state-file desync persists)."
)
# Belt-and-suspenders: lsof confirms exactly 1 binder remains.
lsof_out = subprocess.run(
["lsof", "-U", "-F", "pn"],
capture_output=True,
text=True,
timeout=5,
check=False,
).stdout
binders = _extract_binder_pids(lsof_out, sock_path)
assert len(binders) <= 1, (
f"after recovery, expected ≤1 binder for {sock_path}; got {binders}"
)
finally:
for proc in (p1, p2):
if proc.poll() is None:
try:
proc.send_signal(signal.SIGKILL)
proc.wait(timeout=3)
except (subprocess.TimeoutExpired, ProcessLookupError):
pass

View file

@ -0,0 +1,636 @@
"""Phase 7.1 Plan 06 / R3 closure — `drain_deferred_captures(store)` daemon-side.
Plan 07.1-05 shipped the WRITE side (`iai-mcp capture-transcript --no-spawn`
writes JSONL files to ``~/.iai-mcp/.deferred-captures/`` when the daemon
socket is unreachable). This plan ships the READ side: a drain function that
the daemon runs at startup AND on every WAKE-from-SLEEP transition, so
deferred events get ingested into the episodic tier within seconds of the
daemon coming back up.
End-to-end story this module verifies:
user closes 3 sessions while daemon is sleeping
3 Stop hooks fire `iai-mcp capture-transcript --no-spawn`
3 JSONL deferral files appear under ~/.iai-mcp/.deferred-captures/
next MCP call socket-activates the daemon (or wake from idle)
drain runs all 3 transcripts land in the brain
ZERO events lost; ZERO new daemons spawned
NOTE on idle-shutdown (per CONTEXT.md D7-05 inheritance): if the daemon
idle-exits cleanly while many hook deferrals accumulate, the deferred-
captures directory keeps growing until the NEXT non-hook MCP call
socket-activates the daemon. This is by design eliminating the spawn
vector is the whole point. The drain happens whenever the daemon next runs.
Test layout:
A: round-trip write 3 events drain file deleted, store has records
B: malformed event line file renamed to .failed-<ts>, counts.files_failed=1
C: forward-compat version=99 header file left in place + log entry
D: missing dir drain returns zero counts, no error
E: empty file drain unlinks it, counts unchanged
F: multiple files all 3 processed in glob-sort order, all deleted
G: integration daemon startup with malformed file pre-staged daemon
starts, malformed file is .failed-<ts>, daemon doesn't crash
Tests AF are pure-Python unit tests of the drain function (in-process
MemoryStore, monkeypatch HOME/keyring). Test G is the integration check
spawns a real `python -m iai_mcp.daemon` subprocess under env-isolation
(mirroring `test_doctor_apply_recovery.py:isolated_daemon_paths`) with a
malformed JSONL pre-seeded; asserts the daemon binds the socket without
crashing AND the malformed file is renamed to .failed-<ts>.
"""
from __future__ import annotations
import json
import os
import platform
import signal
import subprocess
import sys
import time
from pathlib import Path
import psutil
import pytest
REPO = Path(__file__).resolve().parent.parent
# POSIX-only: AF_UNIX socket + subprocess + Path-based glob semantics.
pytestmark = pytest.mark.skipif(
platform.system() == "Windows",
reason="POSIX subprocess + AF_UNIX socket; HOME isolation pattern",
)
# ---------------------------------------------------------------------------
# Fixture: HOME + keyring isolation for in-process tests (AF)
# ---------------------------------------------------------------------------
@pytest.fixture
def iai_home(tmp_path, monkeypatch):
"""HOME=tmp_path + keyring fail-backend + crypto passphrase.
The drain function uses ``Path.home()`` to find both
``.deferred-captures/`` and ``logs/`` so HOME monkeypatching
isolates from the user's real ~/.iai-mcp/.
Drain calls ``capture_turn`` which calls ``store.insert()`` which
encrypts via ``MemoryStore._key()`` ``crypto.get_or_create()``
keyring. Forcing the fail-backend + a passphrase env var sends us
down the D-GUARD passphrase fallback so the macOS Security
framework's interactive keychain prompt never fires.
Returns ``tmp_path`` (also reachable via ``Path.home()`` thanks to
monkeypatched ``HOME``).
"""
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring")
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-drain-passphrase")
# IAI_MCP_STORE under tmp so a fresh LanceDB is created per test —
# avoids cross-test row leakage.
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / ".iai-mcp" / "lancedb"))
# Force keyring to re-resolve the backend (it caches on first access).
import keyring.core
keyring.core._keyring_backend = None
yield tmp_path
# Reset post-test so the fail-backend cache doesn't leak.
keyring.core._keyring_backend = None
# ---------------------------------------------------------------------------
# Helpers — JSONL fixture builders (D7.1-04 v1 format)
# ---------------------------------------------------------------------------
def _write_deferred_jsonl(
deferred_dir: Path,
session_id: str,
events: list[dict],
*,
version: int = 1,
ts_suffix: int | None = None,
) -> Path:
"""Construct a v1 JSONL file under ``deferred_dir`` and return its Path.
Mirrors the format ``write_deferred_captures`` produces (Plan 07.1-05).
Header on line 1; events on lines 2..N.
"""
deferred_dir.mkdir(parents=True, exist_ok=True)
suffix = ts_suffix if ts_suffix is not None else int(time.time())
out = deferred_dir / f"{session_id}-{suffix}.jsonl"
header = {
"version": version,
"deferred_at": "2026-04-26T00:00:00Z",
"session_id": session_id,
"cwd": "/tmp",
}
lines = [json.dumps(header)] + [json.dumps(e) for e in events]
out.write_text("\n".join(lines) + "\n")
return out
def _make_event(text: str, role: str = "user") -> dict:
return {
"text": text,
"cue": f"test cue: {text[:24]}",
"tier": "episodic",
"role": role,
"ts": "2026-04-26T00:00:00Z",
}
def _open_isolated_store():
"""Construct a MemoryStore that respects the iai_home fixture's env.
Imported lazily because module import touches LanceDB + crypto
config; we want the env overrides in place first.
"""
from iai_mcp.store import MemoryStore
return MemoryStore()
# ---------------------------------------------------------------------------
# Test A — round-trip: write JSONL → drain → file deleted, store has records
# ---------------------------------------------------------------------------
def test_drain_consumes_jsonl_and_deletes_file(iai_home):
"""The happy path: drain reads a v1 JSONL, captures every event via
capture_turn (so encryption + dedup + shield run), and unlinks the file.
"""
from iai_mcp.capture import drain_deferred_captures
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
events = [
_make_event("Alice said: drain test event one — must be at least 12 chars"),
_make_event("assistant reply with sufficient length to pass MIN_CAPTURE", role="assistant"),
_make_event("third event for the round-trip drain count assertion"),
]
fpath = _write_deferred_jsonl(deferred_dir, "session-A", events)
assert fpath.exists()
store = _open_isolated_store()
counts = drain_deferred_captures(store)
# W2 / counts schema split four ways per status.
assert counts["files_drained"] == 1, counts
assert counts["files_failed"] == 0, counts
assert counts["events_inserted"] == 3, counts
assert counts["events_skipped_insert_failed"] == 0, counts
assert not fpath.exists(), "deferred file must be unlinked after drain"
# Verify the events landed in the records table — count_rows is the
# cheapest sanity check that drain actually inserted (capture_turn may
# also reinforce/skip depending on dedup; for a fresh store all three
# are net-new inserts).
n_rows = store.db.open_table("records").count_rows()
assert n_rows >= 3, f"expected ≥3 records inserted, got {n_rows}"
# ---------------------------------------------------------------------------
# Test B — malformed event line → file renamed to .failed-<ts>, count tallied
# ---------------------------------------------------------------------------
def test_drain_handles_malformed_event_line(iai_home):
"""Per-event JSON-decode failure surfaces as a per-FILE failure: drain
catches the exception, renames the offender to .failed-<ts>, logs, and
moves on. The original file MUST NOT exist after drain.
"""
from iai_mcp.capture import drain_deferred_captures
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
deferred_dir.mkdir(parents=True, exist_ok=True)
# Hand-craft so we can inject a non-JSON line in the middle.
fpath = deferred_dir / "session-B-12345.jsonl"
fpath.write_text(
json.dumps({
"version": 1,
"deferred_at": "2026-04-26T00:00:00Z",
"session_id": "session-B",
"cwd": "/tmp",
}) + "\n"
+ json.dumps(_make_event("first valid event with adequate length")) + "\n"
+ "this line is not valid JSON {{{ broken\n"
+ json.dumps(_make_event("never reached because file-level error")) + "\n"
)
assert fpath.exists()
store = _open_isolated_store()
counts = drain_deferred_captures(store)
assert counts["files_failed"] == 1, counts
assert counts["files_drained"] == 0, counts
# Original gone, .failed-<ts>.jsonl present (via with_suffix replacement).
assert not fpath.exists(), "original must be renamed away on per-file error"
failed = list(deferred_dir.glob("session-B-12345.failed-*.jsonl"))
assert len(failed) == 1, f"expected exactly 1 .failed-* file, got {failed}"
# ---------------------------------------------------------------------------
# Test C — forward-compat: version > 1 → file left intact, log entry written
# ---------------------------------------------------------------------------
def test_drain_skips_future_version(iai_home):
"""A future-version header (version=99) is left in place so a newer
daemon can handle it. Drain logs a "skip" line for forensic visibility.
"""
from iai_mcp.capture import drain_deferred_captures
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
fpath = _write_deferred_jsonl(
deferred_dir,
"session-C",
[_make_event("event from a future format version that we cannot parse")],
version=99,
)
store = _open_isolated_store()
counts = drain_deferred_captures(store)
# W2 / counts schema split four ways per status.
assert counts["files_drained"] == 0, counts
assert counts["files_failed"] == 0, counts
assert counts["events_inserted"] == 0, counts
assert counts["events_skipped_insert_failed"] == 0, counts
assert fpath.exists(), "version>1 file must remain for a future daemon to handle"
# No .failed-* either.
assert not list(deferred_dir.glob("*.failed-*.jsonl"))
# Log line should mention the file basename + version.
log_dir = iai_home / ".iai-mcp" / "logs"
log_files = list(log_dir.glob("deferred-drain-*.log"))
assert log_files, "drain must create a log file when it skips a future version"
log_content = log_files[0].read_text()
assert "skip" in log_content
assert "session-C" in log_content
assert "version=99" in log_content
# ---------------------------------------------------------------------------
# Test D — no deferred dir → drain returns zero counts, no error
# ---------------------------------------------------------------------------
def test_drain_no_deferred_dir(iai_home):
"""Cold-boot path: ~/.iai-mcp/.deferred-captures/ doesn't exist yet.
Drain must return zero counts cleanly without trying to mkdir or raise.
"""
from iai_mcp.capture import drain_deferred_captures
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
assert not deferred_dir.exists()
store = _open_isolated_store()
counts = drain_deferred_captures(store)
# W2 / counts schema split four ways per status.
assert counts["files_drained"] == 0, counts
assert counts["files_failed"] == 0, counts
assert counts["events_inserted"] == 0, counts
assert counts["events_skipped_insert_failed"] == 0, counts
# Drain MUST NOT auto-create the deferred dir — only the writer creates it.
assert not deferred_dir.exists(), "drain should not create .deferred-captures/"
# ---------------------------------------------------------------------------
# Test E — empty (0-byte) file → drain unlinks it, counts unchanged
# ---------------------------------------------------------------------------
def test_drain_empty_jsonl(iai_home):
"""A 0-byte deferral file (e.g. from a writer that crashed before any
line landed) is unlinked silently no insert, no failure, no log.
"""
from iai_mcp.capture import drain_deferred_captures
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
deferred_dir.mkdir(parents=True, exist_ok=True)
fpath = deferred_dir / "session-E-empty.jsonl"
fpath.write_text("") # 0 bytes
assert fpath.exists()
store = _open_isolated_store()
counts = drain_deferred_captures(store)
# W2 / counts schema split four ways per status.
assert counts["files_drained"] == 0, counts
assert counts["files_failed"] == 0, counts
assert counts["events_inserted"] == 0, counts
assert counts["events_skipped_insert_failed"] == 0, counts
assert not fpath.exists(), "0-byte file must be unlinked"
# ---------------------------------------------------------------------------
# Test F — multiple files processed in glob-sort order, all deleted
# ---------------------------------------------------------------------------
def test_drain_multiple_files_processed_in_order(iai_home):
"""Three deferral files (sorted by name = sorted by unix_ts within a
single session) are all drained in one pass. Counts aggregate correctly.
"""
from iai_mcp.capture import drain_deferred_captures
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
# NOTE: 07.11-01 Rule 1 deviation -- before Plan 07.11-01 these three
# lexically-near cues all looked unique because the dedup branch in
# capture_turn was unreachable dead code (Bugs A/B/C). After the dedup
# fix, bge-small-en-v1.5 places "test cue: event from file 0/1/2" above
# the 0.95 cosine threshold and the second + third capture get correctly
# de-duplicated -> events_inserted=1, events_reinforced=2.
# The fix is to give each event a SEMANTICALLY divergent topic so cosine
# genuinely separates them (matches the divergence pattern in
# tests/test_capture_dedup_contract.py::test_capture_turn_inserts_on_low_cos).
distinct_texts = [
"apples are red and grow on trees in orchards across the world",
"quantum chromodynamics describes the strong nuclear force precisely",
"hummingbirds beat their wings about eighty times per second in flight",
]
paths = []
for i, base_ts in enumerate([1000, 2000, 3000]):
events = [_make_event(distinct_texts[i])]
paths.append(
_write_deferred_jsonl(
deferred_dir, f"session-F-{i}", events, ts_suffix=base_ts,
)
)
assert all(p.exists() for p in paths)
store = _open_isolated_store()
counts = drain_deferred_captures(store)
# W2 / counts schema split four ways per status.
assert counts["files_drained"] == 3, counts
assert counts["events_inserted"] == 3, counts
assert counts["events_skipped_insert_failed"] == 0, counts
assert counts["files_failed"] == 0, counts
for p in paths:
assert not p.exists(), f"{p} must be unlinked after drain"
# ---------------------------------------------------------------------------
# Test H — W2 / per-event insert failure preserves the file
# ---------------------------------------------------------------------------
def test_drain_partial_insert_failure_preserves_file(iai_home, monkeypatch):
"""W2 / when ANY event in a file returns status=skipped reason=
insert-failed:* (capture_turn swallowed a store.insert exception), the
drain MUST rename the file to .failed-<ts>.jsonl and NOT unlink it.
Pre-07.9 the file was deleted with the events permanently lost."""
from iai_mcp.capture import drain_deferred_captures
from iai_mcp.store import MemoryStore
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
# File with three events: good, poison-sentinel (will fail insert), good.
fpath = _write_deferred_jsonl(
deferred_dir,
"session-H",
[
_make_event("first good event with adequate length here"),
_make_event("INSERT_FAIL_SENTINEL_07_9 — this event triggers a failure"),
_make_event("third good event after the failing one in the middle"),
],
ts_suffix=42,
)
assert fpath.exists()
# Patch MemoryStore.insert to raise when literal_surface contains the
# sentinel string. This drives capture_turn into its insert-failed
# return path (capture.py:169-171).
real_insert = MemoryStore.insert
def insert_or_fail(self, rec):
if "INSERT_FAIL_SENTINEL_07_9" in rec.literal_surface:
raise RuntimeError("simulated lance write failure")
return real_insert(self, rec)
monkeypatch.setattr(MemoryStore, "insert", insert_or_fail)
store = _open_isolated_store()
counts = drain_deferred_captures(store)
# File NOT unlinked — renamed to .failed-<ts>.jsonl, evidence preserved.
assert not fpath.exists(), "original file must be renamed when any insert fails"
failed_files = list(deferred_dir.glob("session-H-42.failed-*.jsonl"))
assert len(failed_files) == 1, (
f"expected 1 .failed-* file; got {failed_files} "
f"(deferred_dir contents: {list(deferred_dir.iterdir())})"
)
# Counts split four ways: 2 inserted (good ones), 1 insert-failed
# (the sentinel), file marked failed (not drained).
assert counts["events_inserted"] == 2, counts
assert counts["events_skipped_insert_failed"] == 1, counts
assert counts["events_skipped_intentional"] == 0, counts
assert counts["files_drained"] == 0, counts
assert counts["files_failed"] == 1, counts
# Log carries the insert-failed marker and the first error reason.
log_dir = iai_home / ".iai-mcp" / "logs"
log_files = list(log_dir.glob("deferred-drain-*.log"))
assert log_files, "log file must record the insert-failed event"
log_content = log_files[0].read_text()
assert "insert-failed" in log_content
assert "session-H" in log_content
# ---------------------------------------------------------------------------
# Test I — W2 / intentional skips do NOT fail the file
# ---------------------------------------------------------------------------
def test_drain_intentional_skip_does_not_fail_file(iai_home):
"""W2 / an event whose text is too short returns status=skipped
reason='too short' that's an INTENTIONAL skip, not an insert
failure. The file must be unlinked normally; counts.files_failed=0;
counts.events_skipped_intentional incremented."""
from iai_mcp.capture import drain_deferred_captures
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
fpath = _write_deferred_jsonl(
deferred_dir,
"session-I",
[
_make_event("ok this is a long enough event for the min-length gate"),
# Too short event: will return status=skipped reason="too short".
{"cue": "x", "text": "tiny", "tier": "episodic", "role": "user",
"ts": "2026-04-26T00:00:00Z"},
],
ts_suffix=43,
)
assert fpath.exists()
store = _open_isolated_store()
counts = drain_deferred_captures(store)
# File unlinked: intentional skips DO NOT mark a file as failed.
assert not fpath.exists()
assert list(deferred_dir.glob("*.failed-*.jsonl")) == []
assert counts["files_drained"] == 1, counts
assert counts["files_failed"] == 0, counts
assert counts["events_inserted"] == 1, counts
assert counts["events_skipped_intentional"] == 1, counts
assert counts["events_skipped_insert_failed"] == 0, counts
# ---------------------------------------------------------------------------
# Test G — integration: daemon startup with malformed file → daemon stays up,
# file is renamed to .failed-<ts>
# ---------------------------------------------------------------------------
# Mirror test_doctor_apply_recovery.py:isolated_daemon_paths so the spawned
# daemon writes its state + LanceDB + logs under tmp_path. Crucially this
# also propagates HF_HOME so the daemon's prewarm step (bge-small load)
# reuses the user's already-cached model and prewarm completes in <1s
# instead of trying to download from HuggingFace under an empty tmp HOME.
def _spawn_daemon(sock_path: Path, store_dir: Path, home: Path) -> subprocess.Popen:
"""Spawn `python -m iai_mcp.daemon` with full env-isolation."""
env = os.environ.copy()
env["HOME"] = str(home)
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
env["IAI_MCP_STORE"] = str(store_dir)
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "99999"
# Reuse user's HF cache so bge-small doesn't redownload (pattern from
# test_doctor_apply_recovery.py:69-89).
env["HF_HOME"] = str(Path.home() / ".cache" / "huggingface")
# Force keyring fail-backend → passphrase fallback in the daemon
# subprocess (otherwise macOS Security framework prompts interactively).
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-drain-integration-pass"
return subprocess.Popen(
[sys.executable, "-m", "iai_mcp.daemon"],
env=env,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def _wait_for_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool:
deadline = time.monotonic() + timeout_sec
while time.monotonic() < deadline:
if sock_path.exists():
return True
time.sleep(0.1)
return False
def _kill_daemon_by_socket(sock_path: Path) -> None:
"""Match-by-env cleanup so we never touch the user's real daemon."""
target = str(sock_path)
for p in psutil.process_iter(["pid", "cmdline"]):
try:
cl = " ".join(p.info.get("cmdline") or [])
if "iai_mcp.daemon" not in cl:
continue
try:
env = p.environ()
except (psutil.AccessDenied, psutil.NoSuchProcess):
continue
if env.get("IAI_DAEMON_SOCKET_PATH") == target:
try:
p.send_signal(signal.SIGTERM)
p.wait(timeout=3)
except (psutil.NoSuchProcess, psutil.TimeoutExpired):
try:
p.send_signal(signal.SIGKILL)
except psutil.NoSuchProcess:
pass
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
def test_daemon_main_drain_does_not_crash_on_bad_file(tmp_path, monkeypatch):
"""Pre-seed a malformed JSONL under .deferred-captures/ → spawn daemon.
Daemon must (a) bind socket and stay alive, (b) rename the bad file to
.failed-<ts>.jsonl. Confirms startup-drain's per-file try/except shields
daemon main from a malformed input.
"""
# Build the same env scaffolding as _spawn_daemon, applied to in-process
# too so any pre-seed Path.home() lookups resolve to tmp_path.
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("HF_HOME", str(Path.home() / ".cache" / "huggingface"))
monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring")
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-drain-integration-pass")
iai_dir = tmp_path / ".iai-mcp"
iai_dir.mkdir(parents=True, exist_ok=True)
store_dir = iai_dir / "lancedb"
store_dir.mkdir(parents=True, exist_ok=True)
deferred_dir = iai_dir / ".deferred-captures"
deferred_dir.mkdir(parents=True, exist_ok=True)
# Pre-seed a malformed file BEFORE the daemon spawns.
bad = deferred_dir / "session-G-99999.jsonl"
bad.write_text(
json.dumps({"version": 1, "session_id": "session-G",
"deferred_at": "2026-04-26T00:00:00Z", "cwd": "/tmp"}) + "\n"
+ "totally not JSON ===invalid===\n"
)
assert bad.exists()
# Short socket path (macOS AF_UNIX 104-byte cap).
sock_dir = Path(f"/tmp/iai-drn-{os.getpid()}-{id(tmp_path)}")
sock_dir.mkdir(parents=True, exist_ok=True)
sock_path = sock_dir / "d.sock"
proc = None
try:
proc = _spawn_daemon(
sock_path, store_dir, home=Path(os.environ["HOME"])
)
assert _wait_for_socket(sock_path, timeout_sec=30), (
f"daemon never bound socket within 30s; pid={proc.pid} "
f"poll_status={proc.poll()}"
)
# Brief settle for startup-drain to run (asyncio.to_thread
# immediately after daemon_started write_event).
time.sleep(2.0)
# Daemon process MUST still be alive (drain didn't crash it).
assert proc.poll() is None, (
f"daemon exited unexpectedly with code {proc.returncode}"
f"startup-drain probably propagated an exception"
)
# Bad file MUST be renamed to .failed-<ts>.jsonl.
assert not bad.exists(), (
"malformed file should have been renamed away by drain"
)
failed = list(deferred_dir.glob("session-G-99999.failed-*.jsonl"))
assert len(failed) == 1, (
f"expected exactly 1 .failed-* file, got {failed}"
)
finally:
if proc is not None and proc.poll() is None:
proc.send_signal(signal.SIGTERM)
try:
proc.wait(timeout=10)
except subprocess.TimeoutExpired:
proc.send_signal(signal.SIGKILL)
proc.wait(timeout=3)
_kill_daemon_by_socket(sock_path)
try:
if sock_path.exists():
sock_path.unlink()
except OSError:
pass
try:
sock_dir.rmdir()
except OSError:
pass
# Reset keyring cache.
import keyring.core
keyring.core._keyring_backend = None

373
tests/test_dream.py Normal file
View file

@ -0,0 +1,373 @@
"""Tests for iai_mcp.dream -- Task 1.
Covers 9 behaviours from the plan:
1. run_rem_cycle calls sleep.run_heavy_consolidation with SleepConfig(llm_enabled=False)
and has_api_key=False.
2. run_rem_cycle calls schema.induce_schemas_tier1 with llm_enabled=False (Tier-0).
3. Non-last cycle does NOT invoke insight.generate_overnight_insight even if
claude_enabled=True.
4. Last cycle WITH claude_enabled=True invokes insight.generate_overnight_insight
and surfaces text into result.
5. Last cycle with claude_enabled=False does NOT invoke insight.
6. rem_cycle_started + rem_cycle_completed events emitted.
7. 15min cap enforced via asyncio.timeout; emits rem_cycle_timeout and returns
timed_out=True.
8. Exception inside run_heavy_consolidation is caught; rem_cycle_error event
emitted; function returns a partial result dict (daemon never dies).
9. literal preservation -- no daemon-side code path mutates
MemoryRecord.literal_surface during a cycle (static assertion on dream.py).
"""
from __future__ import annotations
import asyncio
import re
import time
from pathlib import Path
import pytest
# ---------------------------------------------------------------------------
# helpers: lightweight store stub + event capture
# ---------------------------------------------------------------------------
class _EventLog:
"""In-memory capture of write_event calls for test assertions."""
def __init__(self) -> None:
self.events: list[tuple[str, dict, str | None]] = []
def capture(self, store, kind, data, *, severity=None, **kwargs):
self.events.append((kind, dict(data), severity))
return None
def kinds(self) -> list[str]:
return [k for (k, _d, _s) in self.events]
def _fresh_store(tmp_path, monkeypatch):
"""Minimal MemoryStore tied to a tmp path (pattern reused from tests)."""
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
from iai_mcp.store import MemoryStore
return MemoryStore()
def _install_stubs(
monkeypatch,
*,
heavy_return=None,
heavy_raises=None,
heavy_sleep_sec: float | None = None,
candidates_return=None,
insight_return=None,
event_log: _EventLog | None = None,
):
"""Monkeypatch the three external callables dream.run_rem_cycle invokes.
Returns the (heavy_calls, schema_calls, insight_calls) recorders.
"""
heavy_calls: list[tuple] = []
schema_calls: list[tuple] = []
insight_calls: list[tuple] = []
def fake_heavy(store, session_id, cfg, budget, rate, has_api_key):
heavy_calls.append((session_id, cfg, has_api_key))
if heavy_sleep_sec is not None:
time.sleep(heavy_sleep_sec)
if heavy_raises is not None:
raise heavy_raises
return heavy_return if heavy_return is not None else {
"mode": "heavy", "tier": "tier0",
"summaries_created": 3, "schemas_induced": 1,
"decay_result": {"decayed": 0, "pruned": 0},
"schema_candidates": [],
}
def fake_induce(store, budget, rate, llm_enabled):
schema_calls.append((llm_enabled,))
return candidates_return if candidates_return is not None else []
async def fake_insight(store, session_id):
insight_calls.append((session_id,))
return insight_return if insight_return is not None else {
"ok": True, "text": "test insight"
}
monkeypatch.setattr("iai_mcp.dream.run_heavy_consolidation", fake_heavy)
monkeypatch.setattr("iai_mcp.dream.induce_schemas_tier1", fake_induce)
monkeypatch.setattr("iai_mcp.insight.generate_overnight_insight", fake_insight)
if event_log is not None:
monkeypatch.setattr("iai_mcp.dream.write_event", event_log.capture)
# Stub BudgetLedger / RateLimitLedger ctors so a bare store object works.
class _NoOp:
def __init__(self, *a, **kw):
pass
monkeypatch.setattr("iai_mcp.dream.BudgetLedger", _NoOp)
monkeypatch.setattr("iai_mcp.dream.RateLimitLedger", _NoOp)
return heavy_calls, schema_calls, insight_calls
# ---------------------------------------------------------------------------
# Test 1: heavy consolidation called with llm_enabled=False + has_api_key=False
# ---------------------------------------------------------------------------
def test_rem_cycle_invokes_heavy(tmp_path, monkeypatch):
from iai_mcp import dream
event_log = _EventLog()
heavy_calls, _schema_calls, _insight_calls = _install_stubs(
monkeypatch, event_log=event_log,
)
store = object() # dream.py never touches store directly; stubs handle it.
async def runner():
return await dream.run_rem_cycle(
store, 1, 4, "sess-X",
is_last=False, claude_enabled=False,
)
result = asyncio.run(runner())
assert len(heavy_calls) == 1, "run_heavy_consolidation not called"
session_id, cfg, has_api_key = heavy_calls[0]
assert session_id == "sess-X"
assert has_api_key is False, "daemon must pass has_api_key=False"
assert getattr(cfg, "llm_enabled", None) is False, "llm_enabled must be False"
# The heavy result stub returns summaries_created=3.
assert result["summaries_created"] == 3
assert result["timed_out"] is False
# ---------------------------------------------------------------------------
# Test 2: Tier-0 schema induction (llm_enabled=False)
# ---------------------------------------------------------------------------
def test_rem_cycle_invokes_tier0_induction(tmp_path, monkeypatch):
from iai_mcp import dream
event_log = _EventLog()
_h, schema_calls, _i = _install_stubs(
monkeypatch, event_log=event_log,
candidates_return=[{"pattern": "foo"}, {"pattern": "bar"}],
)
store = object()
async def runner():
return await dream.run_rem_cycle(
store, 2, 4, "sess-Y",
is_last=False, claude_enabled=False,
)
result = asyncio.run(runner())
assert len(schema_calls) == 1, "induce_schemas_tier1 not called"
(llm_enabled,) = schema_calls[0]
assert llm_enabled is False, "Tier-0 path requires llm_enabled=False"
assert result["schema_candidates"] == 2
# ---------------------------------------------------------------------------
# Test 3: non-last cycle with claude_enabled=True does NOT invoke insight
# ---------------------------------------------------------------------------
def test_non_last_cycle_does_not_invoke_insight(tmp_path, monkeypatch):
from iai_mcp import dream
event_log = _EventLog()
_h, _s, insight_calls = _install_stubs(
monkeypatch, event_log=event_log,
)
store = object()
async def runner():
return await dream.run_rem_cycle(
store, 2, 4, "sess-Y",
is_last=False, claude_enabled=True,
)
result = asyncio.run(runner())
assert insight_calls == [], "insight called on non-last cycle (D-08 violation)"
assert result["claude_call_used"] is False
# ---------------------------------------------------------------------------
# Test 4: last cycle with claude_enabled=True invokes insight and surfaces text
# ---------------------------------------------------------------------------
def test_last_cycle_triggers_insight(tmp_path, monkeypatch):
from iai_mcp import dream
event_log = _EventLog()
_h, _s, insight_calls = _install_stubs(
monkeypatch, event_log=event_log,
insight_return={"ok": True, "text": "unified insight about patterns"},
)
store = object()
async def runner():
return await dream.run_rem_cycle(
store, 4, 4, "sess-Z",
is_last=True, claude_enabled=True,
)
result = asyncio.run(runner())
assert len(insight_calls) == 1, "last cycle must invoke insight"
assert insight_calls[0] == ("sess-Z",)
assert result["claude_call_used"] is True
assert result["main_insight_text"] == "unified insight about patterns"
# ---------------------------------------------------------------------------
# Test 5: last cycle with claude_enabled=False does NOT invoke insight
# ---------------------------------------------------------------------------
def test_last_cycle_respects_host_disabled(tmp_path, monkeypatch):
from iai_mcp import dream
event_log = _EventLog()
_h, _s, insight_calls = _install_stubs(
monkeypatch, event_log=event_log,
)
store = object()
async def runner():
return await dream.run_rem_cycle(
store, 4, 4, "sess-W",
is_last=True, claude_enabled=False,
)
result = asyncio.run(runner())
assert insight_calls == [], "claude_enabled=False must gate insight call"
assert result["claude_call_used"] is False
assert result["main_insight_text"] is None
# ---------------------------------------------------------------------------
# Test 6: rem_cycle_started + rem_cycle_completed events emitted
# ---------------------------------------------------------------------------
def test_cycle_start_and_completed_events(tmp_path, monkeypatch):
from iai_mcp import dream
event_log = _EventLog()
_install_stubs(monkeypatch, event_log=event_log)
store = object()
async def runner():
return await dream.run_rem_cycle(
store, 1, 4, "sess-E",
is_last=False, claude_enabled=False,
)
asyncio.run(runner())
kinds = event_log.kinds()
assert "rem_cycle_started" in kinds
assert "rem_cycle_completed" in kinds
assert kinds.index("rem_cycle_started") < kinds.index("rem_cycle_completed")
# rem_cycle_started payload shape
started = next(e for e in event_log.events if e[0] == "rem_cycle_started")
assert started[1] == {"n": 1, "of": 4}
# ---------------------------------------------------------------------------
# Test 7: 15min cap enforced; timeout emits rem_cycle_timeout, timed_out=True
# ---------------------------------------------------------------------------
def test_rem_cycle_respects_15min_cap(tmp_path, monkeypatch):
from iai_mcp import dream
# Shrink the cap so the test is fast; make run_heavy_consolidation slow
# enough (sleep 0.3s) to trigger the timeout.
monkeypatch.setattr(dream, "REM_CYCLE_MAX_SEC", 0.1)
event_log = _EventLog()
_install_stubs(
monkeypatch, event_log=event_log,
heavy_sleep_sec=0.3,
)
store = object()
async def runner():
return await dream.run_rem_cycle(
store, 3, 4, "sess-T",
is_last=False, claude_enabled=False,
)
result = asyncio.run(runner())
assert result["timed_out"] is True
kinds = event_log.kinds()
assert "rem_cycle_timeout" in kinds, f"missing rem_cycle_timeout; kinds={kinds}"
# Timeout still completes with rem_cycle_completed (non-crashing).
assert "rem_cycle_completed" in kinds
# ---------------------------------------------------------------------------
# Test 8: exception inside heavy-consolidation is caught, error event emitted
# ---------------------------------------------------------------------------
def test_rem_cycle_exception_does_not_crash_daemon(tmp_path, monkeypatch):
from iai_mcp import dream
event_log = _EventLog()
_install_stubs(
monkeypatch, event_log=event_log,
heavy_raises=RuntimeError("boom from heavy"),
)
store = object()
async def runner():
# Must NOT raise -- daemon's outer loop relies on this invariant.
return await dream.run_rem_cycle(
store, 1, 4, "sess-X",
is_last=False, claude_enabled=False,
)
result = asyncio.run(runner())
kinds = event_log.kinds()
assert "rem_cycle_error" in kinds, (
f"rem_cycle_error must be emitted on exception; got {kinds}"
)
err_event = next(e for e in event_log.events if e[0] == "rem_cycle_error")
assert "boom from heavy" in err_event[1]["error"]
# Partial result still returned (no exception propagates).
assert "cycle" in result
assert result["cycle"] == 1
# ---------------------------------------------------------------------------
# Test 9: literal preservation -- dream.py does not mutate literal_surface
# ---------------------------------------------------------------------------
def test_dream_does_not_mutate_literal_surface():
"""C5 static guard. dream.py must contain zero writes to
record.literal_surface (read-access is fine but assignment is forbidden)."""
dream_src = (
Path(__file__).resolve().parent.parent
/ "src" / "iai_mcp" / "dream.py"
).read_text()
pattern = re.compile(r"\.literal_surface\s*=")
assert not pattern.search(dream_src), (
"C5 violation: dream.py assigns to literal_surface"
)

59
tests/test_embed.py Normal file
View file

@ -0,0 +1,59 @@
"""Tests for iai_mcp.embed -- bge-small-en-v1.5 path (legacy model).
Plan 02-01 made bge-m3 the default. The 3-model registry still exposes
bge-small-en-v1.5 (384d, English-only) for English-only deployments. These
tests exercise the Phase-1 model explicitly via `Embedder(model_key=...)` so
they remain valid regression gates.
Multilingual behaviour is covered by tests/test_embed_multilingual.py.
"""
from __future__ import annotations
import pytest
from iai_mcp.embed import Embedder
def test_embed_returns_384_dim_vector() -> None:
emb = Embedder(model_key="bge-small-en-v1.5")
v = emb.embed("hello world")
assert len(v) == 384
assert all(isinstance(x, float) for x in v)
def test_embed_is_deterministic() -> None:
emb = Embedder(model_key="bge-small-en-v1.5")
a = emb.embed("exact same text")
b = emb.embed("exact same text")
assert a == b
def test_embed_batch_preserves_order_and_dim() -> None:
emb = Embedder(model_key="bge-small-en-v1.5")
texts = ["one", "two", "three"]
vecs = emb.embed_batch(texts)
assert len(vecs) == 3
assert all(len(v) == 384 for v in vecs)
# Batch must equal sequential calls (determinism across batching path too).
assert vecs[0] == emb.embed("one")
def test_embed_empty_string_still_returns_384d() -> None:
emb = Embedder(model_key="bge-small-en-v1.5")
v = emb.embed("")
assert len(v) == 384
def test_embedder_dim_matches_output() -> None:
emb = Embedder(model_key="bge-small-en-v1.5")
assert emb.DIM == 384
v = emb.embed("anything")
assert len(v) == emb.DIM
def test_bge_small_en_still_registered_for_legacy() -> None:
"""D-02a keeps the model in the registry for English-only deployments."""
from iai_mcp.embed import MODEL_REGISTRY
assert "bge-small-en-v1.5" in MODEL_REGISTRY
assert MODEL_REGISTRY["bge-small-en-v1.5"]["dim"] == 384

View file

@ -0,0 +1,151 @@
"""Tests for the multilingual embedder path in the 3-model registry.
Plan 05-08 (2026-04-20) flipped the DEFAULT to bge-small-en-v1.5 (384d
English-only). bge-m3 remains selectable via env var or explicit
``Embedder(model_key="bge-m3")`` these tests pin the key explicitly
so the multilingual coverage keeps running under the new default.
These tests import SentenceTransformer and pull the bge-m3 weights once on
first run (HuggingFace cache is re-used thereafter). If bge-m3 is already
cached by any previous dev session the test runs in seconds.
"""
from __future__ import annotations
import os
import numpy as np
import pytest
# ------------------------------------------------------------- bge-m3 opt-in
def test_bge_m3_opt_in_produces_1024d() -> None:
"""Explicit Embedder(model_key="bge-m3") still yields the multilingual
1024d path after Plan 05-08's default revert."""
from iai_mcp.embed import Embedder
e = Embedder(model_key="bge-m3")
assert e.model_key == "bge-m3"
assert e.model_name == "BAAI/bge-m3"
assert e.DIM == 1024
def test_bge_m3_embeds_english() -> None:
from iai_mcp.embed import Embedder
e = Embedder(model_key="bge-m3")
v = e.embed("Hello, how are you?")
assert len(v) == 1024
# bge-m3 returns normalised vectors (|v| == 1)
n = float(np.linalg.norm(np.asarray(v)))
assert abs(n - 1.0) < 1e-4
def test_bge_m3_embeds_russian() -> None:
from iai_mcp.embed import Embedder
e = Embedder(model_key="bge-m3")
v = e.embed("Привет, как дела?")
assert len(v) == 1024
n = float(np.linalg.norm(np.asarray(v)))
assert abs(n - 1.0) < 1e-4
def test_bge_m3_embeds_japanese() -> None:
from iai_mcp.embed import Embedder
e = Embedder(model_key="bge-m3")
v = e.embed("こんにちは、今日は元気ですか?")
assert len(v) == 1024
n = float(np.linalg.norm(np.asarray(v)))
assert abs(n - 1.0) < 1e-4
def test_bge_m3_cross_language_similarity() -> None:
"""bge-m3 encodes cross-lingual concepts. Pinned explicitly because
Plan 05-08's default is now English-only bge-small."""
from iai_mcp.embed import Embedder
e = Embedder(model_key="bge-m3")
en = np.asarray(e.embed("hello"))
ru = np.asarray(e.embed("привет"))
cos = float(en @ ru / (np.linalg.norm(en) * np.linalg.norm(ru)))
assert cos > 0.5, f"cross-language cosine too low: {cos}"
# ----------------------------------------------------------- env-var selection
def test_embed_model_selectable_via_env(monkeypatch) -> None:
"""IAI_MCP_EMBED_MODEL selects from the 3-model registry."""
import importlib
# Clear the process-level cache so re-import exposes the correct default.
import iai_mcp.embed as embed_mod
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "bge-small-en-v1.5")
importlib.reload(embed_mod)
e = embed_mod.Embedder()
assert e.model_key == "bge-small-en-v1.5"
assert e.DIM == 384
# Restore default for remaining tests.
monkeypatch.delenv("IAI_MCP_EMBED_MODEL", raising=False)
importlib.reload(embed_mod)
def test_embed_model_explicit_key_overrides_env(monkeypatch) -> None:
from iai_mcp.embed import Embedder
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "bge-m3")
e = Embedder(model_key="bge-small-en-v1.5")
# Explicit key wins over env.
assert e.model_key == "bge-small-en-v1.5"
assert e.DIM == 384
def test_embed_model_dimension_registered() -> None:
"""Registry reports the correct DIM for every entry."""
from iai_mcp.embed import MODEL_REGISTRY
assert MODEL_REGISTRY["bge-m3"]["dim"] == 1024
assert MODEL_REGISTRY["multilingual-e5-small"]["dim"] == 384
assert MODEL_REGISTRY["bge-small-en-v1.5"]["dim"] == 384
def test_embed_model_rejects_unknown_key() -> None:
from iai_mcp.embed import Embedder
with pytest.raises(ValueError):
Embedder(model_key="this-model-does-not-exist")
def test_embed_model_rejects_unknown_env(monkeypatch) -> None:
from iai_mcp.embed import Embedder
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "garbage")
with pytest.raises(ValueError):
Embedder()
# ------------------------------------------------------- batch + determinism
def test_embed_batch_preserves_order_and_dim() -> None:
from iai_mcp.embed import Embedder
e = Embedder(model_key="bge-m3")
texts = ["one", "два", ""]
vecs = e.embed_batch(texts)
assert len(vecs) == 3
assert all(len(v) == 1024 for v in vecs)
def test_embed_deterministic_same_input() -> None:
from iai_mcp.embed import Embedder
e = Embedder()
a = e.embed("deterministic test")
b = e.embed("deterministic test")
assert a == b

View file

@ -0,0 +1,73 @@
"""Phase 9.1 — Registry invariant tests for the all-MiniLM-L6-v2 additive entry.
Locks (additive-only registry expansion) and (source-freeze-modulo-registry)
from internal architecture spec Verifies that:
- the new MODEL_REGISTRY entry exists with the correct HF id and dimension,
- DEFAULT_MODEL_KEY remains bge-small-en-v1.5 (English-Only Brain lock from
/ holds),
- the 3 pre-existing entries are byte-identical to v3,
- the new entry is functionally usable (loads, produces normalized 384d vectors),
- production zero-arg Embedder() still resolves to the default.
"""
from __future__ import annotations
from iai_mcp.embed import DEFAULT_MODEL_KEY, MODEL_REGISTRY, Embedder
def test_registry_has_minilm_entry() -> None:
"""MODEL_REGISTRY contains the additive all-MiniLM-L6-v2 entry."""
assert "all-MiniLM-L6-v2" in MODEL_REGISTRY
spec = MODEL_REGISTRY["all-MiniLM-L6-v2"]
assert spec["hf"] == "sentence-transformers/all-MiniLM-L6-v2"
assert spec["dim"] == 384
def test_default_model_key_unchanged() -> None:
"""D-02 + English-Only Brain lock: DEFAULT_MODEL_KEY is still bge-small-en-v1.5."""
assert DEFAULT_MODEL_KEY == "bge-small-en-v1.5"
def test_registry_has_exactly_four_entries() -> None:
"""D-02 + source-freeze-modulo-registry — exactly 1 additive entry vs v3."""
expected_keys = {
"bge-m3",
"multilingual-e5-small",
"bge-small-en-v1.5",
"all-MiniLM-L6-v2",
}
assert set(MODEL_REGISTRY.keys()) == expected_keys
def test_existing_entries_byte_identical_to_v3() -> None:
"""the 3 pre-existing entries are unchanged from pre-registered-lme500-v3."""
assert MODEL_REGISTRY["bge-m3"] == {"hf": "BAAI/bge-m3", "dim": 1024}
assert MODEL_REGISTRY["multilingual-e5-small"] == {
"hf": "intfloat/multilingual-e5-small",
"dim": 384,
}
assert MODEL_REGISTRY["bge-small-en-v1.5"] == {
"hf": "BAAI/bge-small-en-v1.5",
"dim": 384,
}
def test_minilm_embedder_loads_and_produces_normalized_384d() -> None:
"""D-02 functional check: Embedder(model_key='all-MiniLM-L6-v2') is usable."""
emb = Embedder(model_key="all-MiniLM-L6-v2")
assert emb.model_key == "all-MiniLM-L6-v2"
assert emb.DIM == 384
assert emb.model_name == "sentence-transformers/all-MiniLM-L6-v2"
vec = emb.embed("hello world")
assert isinstance(vec, list)
assert len(vec) == 384
# normalized: L2 norm ≈ 1.0 (within float32 tolerance)
l2 = sum(v * v for v in vec) ** 0.5
assert abs(l2 - 1.0) < 1e-3, f"vector not normalized: L2={l2}"
def test_default_embedder_still_resolves_to_bge_small() -> None:
"""production zero-arg Embedder() still picks bge-small-en-v1.5."""
emb = Embedder()
assert emb.model_key == "bge-small-en-v1.5"
assert emb.DIM == 384
assert emb.model_name == "BAAI/bge-small-en-v1.5"

View file

@ -0,0 +1,176 @@
"""Tests for enforce_language_tagged (Plan 02-01, constitutional).
Phase 1's enforce_english_raw gated storage to English-only. amends to
native-language storage: every record carries a language tag; the guard
function only raises if the tag is missing or auto-detection is low confidence.
enforce_english_raw is retained as a backward-compat shim for callers.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import uuid4
import pytest
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _rec(text: str, language: str = "", tags: list[str] | None = None) -> MemoryRecord:
"""Build a MemoryRecord with an overridable language tag.
When language="" we would normally fail __post_init__, but we need to
exercise the "missing tag" enforcement path. So we set a placeholder
language="XX" when the caller asks for empty and the guard will fail
accordingly via its own checks.
"""
# For tests that probe missing language, pass "XX" (still valid non-empty)
# and then zero it out on the record after construction.
actual_lang = language if language else "XX"
r = MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=text,
aaak_index="",
embedding=[0.1] * EMBED_DIM,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=datetime.now(timezone.utc),
updated_at=datetime.now(timezone.utc),
tags=list(tags) if tags else [],
language=actual_lang,
)
if not language:
# Post-construction: simulate "record missing language" for the guard.
r.language = ""
return r
# ---------------------------------------------------- enforce_language_tagged
def test_enforce_language_tagged_accepts_english_with_tag():
from iai_mcp.aaak import enforce_language_tagged
r = _rec("hello world", language="en")
enforce_language_tagged(r) # should not raise
def test_enforce_language_tagged_accepts_russian_with_tag():
from iai_mcp.aaak import enforce_language_tagged
r = _rec("привет мир", language="ru")
enforce_language_tagged(r)
def test_enforce_language_tagged_accepts_japanese_with_tag():
from iai_mcp.aaak import enforce_language_tagged
r = _rec("こんにちは", language="ja")
enforce_language_tagged(r)
def test_enforce_language_tagged_accepts_arabic_with_tag():
from iai_mcp.aaak import enforce_language_tagged
r = _rec("مرحبا بالعالم", language="ar")
enforce_language_tagged(r)
def test_enforce_language_tagged_rejects_missing_language_no_detect():
"""record.language="" without detect=True must raise."""
from iai_mcp.aaak import enforce_language_tagged
r = _rec("some text", language="") # simulates un-tagged record
with pytest.raises(ValueError) as exc:
enforce_language_tagged(r)
assert "constitutional" in str(exc.value).lower()
def test_enforce_language_tagged_auto_detect_sets_language():
"""When detect=True and language empty, runs langdetect and mutates record."""
from iai_mcp.aaak import enforce_language_tagged
r = _rec(
"This is a reasonable English sentence with enough words for detection.",
language="",
)
enforce_language_tagged(r, detect=True)
assert r.language == "en"
def test_enforce_language_tagged_auto_detect_russian():
from iai_mcp.aaak import enforce_language_tagged
r = _rec(
"Это осмысленное предложение на русском языке с достаточным количеством слов.",
language="",
)
enforce_language_tagged(r, detect=True)
assert r.language == "ru"
def test_enforce_language_tagged_empty_text_gets_default_en():
"""Empty literal_surface + detect=True falls through to 'en' default."""
from iai_mcp.aaak import enforce_language_tagged
r = _rec("", language="")
enforce_language_tagged(r, detect=True)
assert r.language == "en"
# ------------------------------------------------ enforce_english_raw shim
def test_enforce_english_raw_still_importable():
"""Backward compat: the Phase-1 guard is still a valid import."""
from iai_mcp.aaak import enforce_english_raw
assert callable(enforce_english_raw)
def test_enforce_english_raw_with_language_tag_still_phase1_semantics():
"""The shim preserves semantics: even with language='ru' set,
untagged Cyrillic literal_surface WITHOUT 'raw:<lang>' tag still raises.
callers who want native-language storage should call
`enforce_language_tagged` instead of this shim.
"""
from iai_mcp.aaak import enforce_english_raw
r = _rec("привет мир", language="ru")
with pytest.raises(ValueError):
enforce_english_raw(r)
def test_enforce_english_raw_still_blocks_untagged_cyrillic():
"""Phase 1 behaviour preserved for untagged records (language="")."""
from iai_mcp.aaak import enforce_english_raw
r = _rec("привет мир", language="")
with pytest.raises(ValueError) as exc:
enforce_english_raw(r)
assert "constitutional" in str(exc.value).lower()
def test_enforce_english_raw_accepts_cyrillic_with_raw_tag():
"""Phase-1 raw:<lang> tag exception still works through the shim."""
from iai_mcp.aaak import enforce_english_raw
r = _rec("привет мир", language="", tags=["raw:ru"])
enforce_english_raw(r)
def test_enforce_english_raw_accepts_pure_english():
from iai_mcp.aaak import enforce_english_raw
r = _rec("hello world", language="")
enforce_english_raw(r)

View file

@ -0,0 +1,161 @@
"""Plan 05-08 — revert the Phase-2 deviation and restore the
PROJECT.md original embedder default: ``bge-small-en-v1.5`` (384d
English-only). bge-m3 (1024d multilingual) remains opt-in via the
``IAI_MCP_EMBED_MODEL`` env var or the ``model_key`` kwarg on Embedder.
Phase 9.1 (2026-04-29): MODEL_REGISTRY grew by ONE additive entry
for ``all-MiniLM-L6-v2`` (legacy alternative embedder; bench-only ablation).
DEFAULT_MODEL_KEY remains ``bge-small-en-v1.5``; production callers
unaffected. The "registry retains all original entries" contract here is
relaxed to "registry retains all original entries + at most 1 additive
entry per the source-freeze-modulo-registry invariant".
Covered contracts (9 tests):
1. DEFAULT_MODEL_KEY is "bge-small-en-v1.5"
2. Embedder() with no args builds the 384d bge-small embedder
3. DEFAULT_EMBED_DIM (and legacy EMBED_DIM alias) is 384
4. MODEL_REGISTRY retains the original 3 entries; D-02
allows the additive all-MiniLM-L6-v2 entry without breaking the
English-Only Brain lock
5. IAI_MCP_EMBED_MODEL=bge-m3 env var still selects bge-m3
6. embedder_for_store on a 1024d store returns bge-m3 (back-compat)
7. embedder_for_store on a 384d store returns bge-small-en-v1.5
8. PROJECT.md line 125 still mentions bge-small-en-v1.5 (constraint)
9. importing the package does NOT auto-download bge-m3 weights
"""
from __future__ import annotations
from pathlib import Path
from types import SimpleNamespace
from unittest import mock
import pytest
@pytest.fixture(autouse=True)
def _clear_env(monkeypatch: pytest.MonkeyPatch):
"""Every test starts without an IAI_MCP_EMBED_MODEL override."""
monkeypatch.delenv("IAI_MCP_EMBED_MODEL", raising=False)
yield
# --------------------------------------------------------------------------- tests
def test_default_model_key_is_bge_small():
from iai_mcp.embed import DEFAULT_MODEL_KEY
assert DEFAULT_MODEL_KEY == "bge-small-en-v1.5"
def test_embedder_defaults_to_384d_small():
from iai_mcp.embed import Embedder
assert Embedder.DEFAULT_MODEL_KEY == "bge-small-en-v1.5"
assert Embedder.DEFAULT_DIM == 384
assert Embedder.DIM == 384
def test_types_embed_dim_defaults_to_384():
from iai_mcp.types import DEFAULT_EMBED_DIM, EMBED_DIM
assert DEFAULT_EMBED_DIM == 384
assert EMBED_DIM == 384
def test_model_registry_retains_original_three_entries():
"""The 3 original entries must remain unchanged. D-02
allows additive entries (currently: all-MiniLM-L6-v2) but the original
contract bge-m3 / multilingual-e5-small / bge-small-en-v1.5 with their
canonical dims is non-negotiable."""
from iai_mcp.embed import MODEL_REGISTRY
# Original 3 entries must be present and byte-identical to Plan 05-08.
assert "bge-m3" in MODEL_REGISTRY
assert "multilingual-e5-small" in MODEL_REGISTRY
assert "bge-small-en-v1.5" in MODEL_REGISTRY
assert MODEL_REGISTRY["bge-m3"] == {"hf": "BAAI/bge-m3", "dim": 1024}
assert MODEL_REGISTRY["bge-small-en-v1.5"] == {
"hf": "BAAI/bge-small-en-v1.5",
"dim": 384,
}
assert MODEL_REGISTRY["multilingual-e5-small"] == {
"hf": "intfloat/multilingual-e5-small",
"dim": 384,
}
# additive entries are allowed, but the original 3 must
# never be removed or mutated. Guard explicitly against pruning.
assert {"bge-m3", "multilingual-e5-small", "bge-small-en-v1.5"}.issubset(
set(MODEL_REGISTRY)
)
def test_env_var_still_selects_bge_m3(monkeypatch):
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "bge-m3")
from iai_mcp.embed import _resolve_model_key
assert _resolve_model_key() == "bge-m3"
def test_embedder_for_store_picks_bge_m3_for_1024d_store():
"""Back-compat: existing 1024d user stores keep working after the
default flip. The factory routes around the flip transparently."""
from iai_mcp.embed import embedder_for_store
store = SimpleNamespace(embed_dim=1024)
with mock.patch("iai_mcp.embed._get_model") as mock_get:
mock_get.return_value = mock.MagicMock()
e = embedder_for_store(store)
assert e.model_key == "bge-m3"
assert e.DIM == 1024
def test_embedder_for_store_picks_bge_small_for_384d_store():
from iai_mcp.embed import embedder_for_store
store = SimpleNamespace(embed_dim=384)
with mock.patch("iai_mcp.embed._get_model") as mock_get:
mock_get.return_value = mock.MagicMock()
e = embedder_for_store(store)
assert e.model_key == "bge-small-en-v1.5"
assert e.DIM == 384
def test_project_md_still_pins_bge_small_constraint():
"""PROJECT.md line 125 was the source of truth all along. This plan
merely reverts the Phase-2 deviation. Asserting the file content
here guards against someone silently flipping the spec in the future."""
p = Path(__file__).resolve().parents[1] / ".planning" / "PROJECT.md"
if not p.exists():
pytest.skip(".planning is gitignored; PROJECT.md not present in this checkout")
content = p.read_text()
assert "bge-small-en-v1.5" in content
assert "384d embeddings" in content or "384d" in content
def test_package_import_does_not_auto_download_models():
"""Importing iai_mcp must not trigger a SentenceTransformer download
for ANY model. The weights pull should happen lazily on first
Embedder() instantiation, not at import time. Otherwise a fresh
install spends minutes pulling bge-m3 before the user has even
decided which model they want."""
import sys
# Pretend sentence_transformers is absent so any early reference to
# SentenceTransformer() would raise. If the import path is clean, this
# should succeed even without the package loaded.
with mock.patch.dict(sys.modules):
# Drop cached iai_mcp modules so the import actually re-runs.
for name in list(sys.modules):
if name.startswith("iai_mcp"):
sys.modules.pop(name, None)
# Track SentenceTransformer construction attempts.
from sentence_transformers import SentenceTransformer
with mock.patch.object(
SentenceTransformer, "__init__",
side_effect=AssertionError("model instantiated at import time"),
):
import iai_mcp.embed # noqa: F401
import iai_mcp.types # noqa: F401

187
tests/test_events.py Normal file
View file

@ -0,0 +1,187 @@
"""Tests for the events LanceDB table + events.py module (Plan 02-01, D-STORAGE).
Covers:
- events table created on MemoryStore instantiation
- write_event / query_events round-trip
- kind/severity/since filters
- ordering (newest first)
- limit default + explicit
"""
from __future__ import annotations
import json
from datetime import datetime, timedelta, timezone
from uuid import UUID, uuid4
import pytest
# ----------------------------------------------------------- table creation
def test_events_table_created_on_store_init(tmp_path):
"""MemoryStore() creates events table with the D-STORAGE schema."""
from iai_mcp.store import EVENTS_TABLE, MemoryStore
store = MemoryStore(path=tmp_path)
assert EVENTS_TABLE in store._table_names()
def test_budget_ledger_table_created(tmp_path):
from iai_mcp.store import BUDGET_TABLE, MemoryStore
store = MemoryStore(path=tmp_path)
assert BUDGET_TABLE in store._table_names()
def test_ratelimit_ledger_table_created(tmp_path):
from iai_mcp.store import MemoryStore, RATELIMIT_TABLE
store = MemoryStore(path=tmp_path)
assert RATELIMIT_TABLE in store._table_names()
# ------------------------------------------------------ write_event / query
def test_events_write_and_query_roundtrip(tmp_path):
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
event_id = write_event(store, kind="test", data={"x": 1}, session_id="s1")
assert isinstance(event_id, UUID)
results = query_events(store, kind="test")
assert len(results) == 1
assert results[0]["kind"] == "test"
assert results[0]["data"]["x"] == 1
assert results[0]["session_id"] == "s1"
def test_events_write_returns_uuid(tmp_path):
from iai_mcp.events import write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
ev = write_event(store, kind="k", data={})
assert isinstance(ev, UUID)
def test_events_query_filter_kind(tmp_path):
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
write_event(store, kind="a", data={})
write_event(store, kind="b", data={})
write_event(store, kind="c", data={})
assert len(query_events(store, kind="a")) == 1
assert len(query_events(store, kind="b")) == 1
assert len(query_events(store)) == 3
def test_events_query_filter_since(tmp_path, monkeypatch):
"""Events at different timestamps; since=30min-ago returns only the newer."""
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
# We can't easily freeze time; instead write both events, then query with
# since = far-future-past to confirm filter works (both return).
write_event(store, kind="t", data={"old": True})
write_event(store, kind="t", data={"new": True})
# since in the future -> no results
future = datetime.now(timezone.utc) + timedelta(hours=1)
assert query_events(store, kind="t", since=future) == []
# since well in the past -> 2 results
past = datetime.now(timezone.utc) - timedelta(hours=1)
assert len(query_events(store, kind="t", since=past)) == 2
def test_events_query_filter_severity(tmp_path):
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
write_event(store, kind="k", data={}, severity="info")
write_event(store, kind="k", data={}, severity="warning")
write_event(store, kind="k", data={}, severity="critical")
assert len(query_events(store, severity="critical")) == 1
assert len(query_events(store, severity="warning")) == 1
assert len(query_events(store, severity="info")) == 1
def test_events_query_limit_default_100(tmp_path):
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
for i in range(150):
write_event(store, kind="bulk", data={"i": i})
# Default limit
results = query_events(store, kind="bulk")
assert len(results) == 100
# Explicit limit
results = query_events(store, kind="bulk", limit=50)
assert len(results) == 50
def test_events_query_ordering_newest_first(tmp_path):
"""Events must come back in descending ts order (newest first)."""
import time
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
write_event(store, kind="ord", data={"i": 0})
time.sleep(0.01)
write_event(store, kind="ord", data={"i": 1})
time.sleep(0.01)
write_event(store, kind="ord", data={"i": 2})
results = query_events(store, kind="ord")
# Newest (i=2) first
ordered_is = [r["data"]["i"] for r in results]
assert ordered_is == [2, 1, 0]
def test_events_source_ids_roundtrip(tmp_path):
"""source_ids list[UUID] is preserved as JSON array of strings."""
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
ids = [uuid4(), uuid4()]
write_event(store, kind="s", data={}, source_ids=ids)
results = query_events(store, kind="s")
assert len(results) == 1
src = results[0]["source_ids"]
assert set(src) == {str(i) for i in ids}
def test_events_domain_roundtrip(tmp_path):
from iai_mcp.events import query_events, write_event
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
write_event(store, kind="k", data={}, domain="coding")
results = query_events(store, kind="k")
assert len(results) == 1
assert results[0]["domain"] == "coding"
def test_events_empty_store_returns_empty(tmp_path):
from iai_mcp.events import query_events
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
assert query_events(store) == []
assert query_events(store, kind="nothing") == []

View file

@ -0,0 +1,116 @@
"""Phase 07.2-02 R3 unit tests for prune_first_turn_pending pure helper.
Distinct from tests/test_daemon_state.py::test_prune_* which covers the
24h-default `prune_stale_first_turn`. This file covers the new 1h-default
`prune_first_turn_pending` (tuple return + dropped session_ids list).
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from iai_mcp.daemon_state import (
FIRST_TURN_PENDING_TTL_SEC_DEFAULT,
prune_first_turn_pending,
)
NOW = datetime(2026, 4, 27, 12, 0, tzinfo=timezone.utc)
def test_default_ttl_is_3600_seconds() -> None:
"""D7.2-08: default TTL is 3600s (1h)."""
assert FIRST_TURN_PENDING_TTL_SEC_DEFAULT == 3600.0
def test_keeps_fresh_evicts_stale_returns_dropped_ids() -> None:
"""Mixed input: some entries < ttl_sec, some > ttl_sec."""
fresh_ts = (NOW - timedelta(seconds=1800)).isoformat() # 30min — keep
stale_ts = (NOW - timedelta(seconds=7200)).isoformat() # 2h — evict
state = {
"first_turn_pending": {
"sess-fresh": fresh_ts,
"sess-stale": stale_ts,
},
}
new_state, dropped = prune_first_turn_pending(state, now=NOW, ttl_sec=3600.0)
assert new_state["first_turn_pending"] == {"sess-fresh": fresh_ts}
assert dropped == ["sess-stale"]
def test_legacy_bool_entries_evict_with_no_timestamp() -> None:
"""D7.2-07 contract: non-string values treated as stale."""
state = {
"first_turn_pending": {"sess-1": True, "sess-2": False, "sess-3": None},
}
new_state, dropped = prune_first_turn_pending(state, now=NOW)
assert new_state["first_turn_pending"] == {}
assert sorted(dropped) == ["sess-1", "sess-2", "sess-3"]
def test_malformed_iso_string_evicts() -> None:
"""Defensive: corrupt ISO strings evict rather than crash."""
state = {
"first_turn_pending": {
"sess-bad": "not-an-iso-string-2026-99-99",
"sess-good": (NOW - timedelta(seconds=60)).isoformat(),
},
}
new_state, dropped = prune_first_turn_pending(state, now=NOW)
assert "sess-bad" in dropped
assert "sess-good" in new_state["first_turn_pending"]
def test_naive_timestamps_treated_as_utc() -> None:
"""Naive ISO strings (no tzinfo) get assumed UTC at parse time."""
# A naive ISO string for "2 hours ago" — must evict at 1h TTL.
naive_stale = (NOW - timedelta(seconds=7200)).replace(tzinfo=None).isoformat()
state = {"first_turn_pending": {"sess-naive": naive_stale}}
new_state, dropped = prune_first_turn_pending(state, now=NOW, ttl_sec=3600.0)
assert dropped == ["sess-naive"]
assert new_state["first_turn_pending"] == {}
def test_empty_or_missing_pending_returns_no_drops() -> None:
"""Idempotent on empty/missing first_turn_pending key."""
# Missing key.
new_state, dropped = prune_first_turn_pending({}, now=NOW)
assert new_state == {"first_turn_pending": {}} or new_state == {}
# Implementation contract: when the key is missing, return state
# unchanged (we set "first_turn_pending" only when there was a dict
# to prune). Both shapes are acceptable; the important property is
# `dropped == []`.
assert dropped == []
# Present-but-empty dict.
new_state2, dropped2 = prune_first_turn_pending(
{"first_turn_pending": {}}, now=NOW,
)
assert dropped2 == []
assert new_state2["first_turn_pending"] == {}
# Present-but-None.
new_state3, dropped3 = prune_first_turn_pending(
{"first_turn_pending": None}, now=NOW,
)
assert dropped3 == []
def test_does_not_mutate_state_outside_first_turn_pending() -> None:
"""Pure function discipline: only first_turn_pending should change."""
unrelated = {"unrelated_key": "unrelated_value", "fsm_state": "WAKE"}
state = dict(unrelated)
state["first_turn_pending"] = {
"sess-stale": (NOW - timedelta(hours=2)).isoformat(),
}
new_state, _ = prune_first_turn_pending(state, now=NOW)
for k, v in unrelated.items():
assert new_state.get(k) == v

View file

@ -0,0 +1,146 @@
"""Phase 07.2-04 R3 / A3 integration test — startup + per-tick TTL drain wired into daemon.
Strategy: Plan 04 Task 1 threads an explicit `now=datetime.now(timezone.utc)`
kwarg from BOTH wire-in call sites into `prune_first_turn_pending`. This
means the helper is fully testable by passing a fixed `NOW` directly
no datetime monkeypatching dance.
Three checks:
1. Direct helper invocation with mixed stale/fresh state proves the
eviction contract (5 stale evict, 5 fresh keep, dropped IDs returned).
2. Smoke import confirms the names daemon.py imports are reachable.
3. Source-grep on daemon.py confirms both wire-in sites pass the explicit
`now=` kwarg (Task 1's structural contract).
Project async-test idiom (mandatory): sync `def test_*`. No
`@pytest.mark.asyncio`. The helper itself is sync, so all tests here
are plain sync `def test_*` with no `asyncio.run` needed.
"""
from __future__ import annotations
import re
from datetime import datetime, timedelta, timezone
from pathlib import Path
NOW = datetime(2026, 4, 27, 12, 0, tzinfo=timezone.utc)
def _make_mixed_state() -> dict:
"""Return a state dict with 5 stale + 5 fresh first_turn_pending entries.
Stale = 2 h old (well past the 1 h TTL).
Fresh = 30 s old (well within the TTL).
Both timestamps are RELATIVE TO `NOW` so the test is deterministic
regardless of when it runs `prune_first_turn_pending` only sees the
explicit `now` we pass in.
"""
stale_entries = {
f"sess-stale-{i}": (NOW - timedelta(hours=2)).isoformat()
for i in range(5)
}
fresh_entries = {
f"sess-fresh-{i}": (NOW - timedelta(seconds=30)).isoformat()
for i in range(5)
}
return {
"fsm_state": "WAKE",
"first_turn_pending": {**stale_entries, **fresh_entries},
}
def test_prune_helper_drops_5_stale_keeps_5_fresh_with_fixed_now():
"""A3 acceptance (helper contract): with NOW fixed and 5 stale + 5 fresh
entries, the helper returns 5 dropped IDs and a state holding only the
fresh entries. This is exactly the contract Plan 04's wire-in invokes
at startup and per-tick.
"""
from iai_mcp.daemon_state import (
FIRST_TURN_PENDING_TTL_SEC_DEFAULT,
prune_first_turn_pending,
)
state = _make_mixed_state()
# Plan 04 Task 1 calls this with the EXACT signature shown below at
# both wire-in sites. The test mirrors the wire-in call shape so any
# future signature drift breaks BOTH sides at once.
new_state, dropped = prune_first_turn_pending(state, now=NOW)
# 5 stale IDs evict.
assert sorted(dropped) == sorted(f"sess-stale-{i}" for i in range(5)), (
f"Expected exactly 5 stale session_ids dropped; got {dropped}"
)
# 5 fresh IDs survive.
kept = new_state["first_turn_pending"]
assert len(kept) == 5
for k in kept:
assert k.startswith("sess-fresh-"), f"unexpected key kept: {k}"
# Helper exposes the TTL constant Plan 04 wire-in uses for the event
# payload — sanity-check it has the documented value (1 h).
assert FIRST_TURN_PENDING_TTL_SEC_DEFAULT == 3600.0
def test_prune_helper_no_drop_when_only_fresh_entries():
"""Control: NOW fixed and only fresh entries → 0 dropped, 5 kept,
state.first_turn_pending unchanged in shape."""
from iai_mcp.daemon_state import prune_first_turn_pending
state = {
"fsm_state": "WAKE",
"first_turn_pending": {
f"sess-fresh-{i}": (NOW - timedelta(seconds=30)).isoformat()
for i in range(5)
},
}
new_state, dropped = prune_first_turn_pending(state, now=NOW)
assert dropped == [], f"Expected zero drops on all-fresh state; got {dropped}"
assert len(new_state["first_turn_pending"]) == 5
def test_first_turn_pending_drain_helper_imported_in_daemon_main():
"""Smoke: daemon.main() can import the helper without error.
If Plan 04's import block is wrong (typo, wrong module, etc.), this
fails fast.
"""
from iai_mcp.daemon_state import (
FIRST_TURN_PENDING_TTL_SEC_DEFAULT,
prune_first_turn_pending,
)
assert FIRST_TURN_PENDING_TTL_SEC_DEFAULT == 3600.0
assert callable(prune_first_turn_pending)
def test_daemon_wire_in_passes_explicit_now_kwarg_at_both_sites():
"""Structural check: read daemon.py source and confirm BOTH wire-in
sites pass `now=datetime.now(timezone.utc)` explicitly.
This is the wire-up half of A3 without it, Task 2 only proves the
helper works, not that Task 1 wired it in correctly. Plan 04 Task 1's
contract is that BOTH call sites thread `now=` explicitly so the
helper is testable without datetime mocking.
"""
daemon_src = Path(__file__).resolve().parent.parent / "src" / "iai_mcp" / "daemon.py"
text = daemon_src.read_text()
# Match `prune_first_turn_pending(\n state, now=datetime.now(timezone.utc)`
# tolerantly across whitespace + line breaks.
pat = re.compile(
r"prune_first_turn_pending\s*\(\s*state\s*,\s*now\s*=\s*datetime\.now\(\s*timezone\.utc\s*\)",
re.MULTILINE,
)
matches = pat.findall(text)
assert len(matches) >= 2, (
f"Expected >= 2 wire-in sites with explicit `now=datetime.now(timezone.utc)` "
f"kwarg in daemon.py; found {len(matches)}. Plan 04 Task 1 contract:"
f" both startup-prune (in main()) and tick-prune (in _tick_body Step 0.5)"
f" must thread `now=` explicitly."
)
# Both event-emit phases ("startup" and "tick") must be present.
assert '"phase": "startup"' in text or "'phase': 'startup'" in text, (
"Startup-side event emit missing `phase: startup` in payload."
)
assert '"phase": "tick"' in text or "'phase': 'tick'" in text, (
"Tick-side event emit missing `phase: tick` in payload."
)

View file

@ -0,0 +1,192 @@
"""Phase 5 RED-state test scaffold. Tasks 2-5 turn these GREEN.
Covers TOK-12 / D5-03: first-turn auto-recall hook in core.dispatch that fires
exactly once per session and injects a scoped recall into the response.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import uuid4
import pytest
from iai_mcp import core
from iai_mcp.store import MemoryStore
from iai_mcp.types import EMBED_DIM, MemoryRecord
def _seed_one_record(store: MemoryStore, text: str = "reference content") -> None:
now = datetime.now(timezone.utc)
rec = MemoryRecord(
id=uuid4(),
tier="semantic",
literal_surface=text,
aaak_index="",
embedding=[0.1] * EMBED_DIM,
community_id=None,
centrality=0.5,
detail_level=3,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=[],
language="en",
)
store.insert(rec)
def test_first_turn_fires_exactly_once(tmp_path, monkeypatch):
"""D5-03: first dispatch injects first_turn_recall; second dispatch does not."""
# Patch daemon_state to emulate first-turn-pending for session s1 exactly once.
pending = {"s1": True}
def _load_state():
return {"first_turn_pending": dict(pending)}
def _save_state(state):
# Update the outer dict state per what the test sets.
fresh = state.get("first_turn_pending", {})
pending.clear()
pending.update(fresh)
monkeypatch.setattr("iai_mcp.daemon_state.load_state", _load_state)
monkeypatch.setattr("iai_mcp.daemon_state.save_state", _save_state)
store = MemoryStore(path=tmp_path)
_seed_one_record(store, "session one reference content")
params = {
"cue": "reference content",
"session_id": "s1",
"cue_embedding": [0.1] * EMBED_DIM,
}
resp1 = core.dispatch(store, "memory_recall", params)
resp2 = core.dispatch(store, "memory_recall", params)
assert "first_turn_recall" in resp1, f"first dispatch missing hook: {resp1.keys()}"
assert "first_turn_recall" not in resp2, (
f"second dispatch should NOT have hook: {resp2.keys()}"
)
def test_first_turn_budget_capped_at_400(tmp_path, monkeypatch):
"""D5-03: first_turn_recall budget_tokens ≤ 400."""
pending = {"s2": True}
monkeypatch.setattr(
"iai_mcp.daemon_state.load_state",
lambda: {"first_turn_pending": dict(pending)},
)
monkeypatch.setattr(
"iai_mcp.daemon_state.save_state",
lambda s: pending.clear(),
)
store = MemoryStore(path=tmp_path)
_seed_one_record(store)
resp = core.dispatch(store, "memory_recall", {
"cue": "X",
"session_id": "s2",
"cue_embedding": [0.1] * EMBED_DIM,
})
ftr = resp.get("first_turn_recall")
assert ftr is not None, f"first_turn_recall missing: {resp.keys()}"
assert ftr.get("budget_tokens", 0) <= 400, f"budget too high: {ftr}"
def test_daemon_unreachable_falls_back_silently(tmp_path, monkeypatch):
"""D5-03 silent-fail: daemon_state read error must not break dispatch."""
def _boom():
raise RuntimeError("synthetic daemon_state failure")
monkeypatch.setattr("iai_mcp.daemon_state.load_state", _boom)
store = MemoryStore(path=tmp_path)
_seed_one_record(store)
# Must not raise.
resp = core.dispatch(store, "memory_recall", {
"cue": "X",
"session_id": "s3",
"cue_embedding": [0.1] * EMBED_DIM,
})
# Normal response shape preserved; first_turn_recall absent.
assert "hits" in resp
assert "first_turn_recall" not in resp
def test_first_turn_emits_event(tmp_path, monkeypatch):
"""D5-03: first_turn hook writes kind=first_turn_recall event."""
from iai_mcp.events import query_events
pending = {"s4": True}
monkeypatch.setattr(
"iai_mcp.daemon_state.load_state",
lambda: {"first_turn_pending": dict(pending)},
)
monkeypatch.setattr(
"iai_mcp.daemon_state.save_state",
lambda s: pending.clear(),
)
store = MemoryStore(path=tmp_path)
_seed_one_record(store)
core.dispatch(store, "memory_recall", {
"cue": "something",
"session_id": "s4",
"cue_embedding": [0.1] * EMBED_DIM,
})
events = query_events(store, kind="first_turn_recall", limit=10)
assert len(events) >= 1, "first_turn_recall event should have been emitted"
def test_input_length_clamp_2000(tmp_path, monkeypatch):
"""V5 security: first-turn cue clamped to 2000 chars before recall."""
pending = {"s5": True}
monkeypatch.setattr(
"iai_mcp.daemon_state.load_state",
lambda: {"first_turn_pending": dict(pending)},
)
monkeypatch.setattr(
"iai_mcp.daemon_state.save_state",
lambda s: pending.clear(),
)
store = MemoryStore(path=tmp_path)
_seed_one_record(store)
# Huge cue — should be clamped by the hook.
huge_cue = "X" * 5000
# Wrap retrieve.recall to capture the cue_text arg.
seen_cues: list[str] = []
from iai_mcp import retrieve as _retrieve
orig = _retrieve.recall
def _spy(*args, **kwargs):
cue = kwargs.get("cue_text", "")
if "first-turn" not in cue[:20]: # avoid capturing the outer dispatch
seen_cues.append(cue)
return orig(*args, **kwargs)
monkeypatch.setattr("iai_mcp.retrieve.recall", _spy)
core.dispatch(store, "memory_recall", {
"cue": huge_cue,
"session_id": "s5",
"cue_embedding": [0.1] * EMBED_DIM,
})
# The hook must have called recall with a clamped cue — any cue longer than
# 2000 chars indicates the clamp failed.
assert any(len(c) <= 2000 for c in seen_cues), (
f"no clamped cue observed; len spread: {[len(c) for c in seen_cues]}"
)

View file

@ -0,0 +1,105 @@
"""Plan 03-03 Task 1 RED + Task 2 GREEN — surface-feature formality scorer.
Validates the formality scorer against a RU+EN fixture of ~50 formal/informal pairs.
Constitutional guard: the scorer observes ONLY the user's surface text. There is no
user-internal-state signal anywhere in this test or in the module it tests.
"""
from __future__ import annotations
import json
import warnings
from pathlib import Path
import pytest
FIXTURE_PATH = Path(__file__).parent / "fixtures" / "formality_ru_en_50pairs.json"
def _load_fixture():
with FIXTURE_PATH.open() as f:
return json.load(f)
# ------------------------------------------------------------- fixture integrity
def test_fixture_loads_and_has_enough_pairs():
pairs = _load_fixture()
assert len(pairs) >= 45, f"expected ~50 pairs, got {len(pairs)}"
langs = {p["lang"] for p in pairs}
assert "en" in langs and "ru" in langs
def test_fixture_shape():
pairs = _load_fixture()
for p in pairs:
assert set(p.keys()) >= {"id", "lang", "formal", "informal"}
assert isinstance(p["formal"], str) and p["formal"].strip()
assert isinstance(p["informal"], str) and p["informal"].strip()
# ------------------------------------------------------------- scorer contract
def test_formality_score_fixture_accuracy_at_least_85_percent():
"""Formal text must score > informal text on >= 85% of pairs."""
from iai_mcp.formality import formality_score
pairs = _load_fixture()
wins = sum(
1
for p in pairs
if formality_score(p["formal"], p["lang"]) > formality_score(p["informal"], p["lang"])
)
accuracy = wins / len(pairs)
assert accuracy >= 0.85, f"accuracy {accuracy:.2%} ({wins}/{len(pairs)}) below 85% floor"
def test_formality_score_en_formal_anchor():
from iai_mcp.formality import formality_score
score = formality_score("The proposal is, therefore, accepted.", "en")
assert score >= 0.6, f"expected highly formal sentence >= 0.6, got {score:.3f}"
def test_formality_score_en_informal_anchor():
from iai_mcp.formality import formality_score
score = formality_score("yo, works for me lol", "en")
assert score <= 0.3, f"expected clearly informal <= 0.3, got {score:.3f}"
def test_formality_score_unknown_lang_returns_neutral_with_warning():
"""MEMORY.md global-product mandate: unknown lang degrades gracefully."""
from iai_mcp.formality import formality_score
with warnings.catch_warnings(record=True) as w_list:
warnings.simplefilter("always")
score = formality_score("some test text", "zz")
assert score == 0.5
# A warning must have been issued.
assert any("formality_score" in str(w.message).lower() or "zz" in str(w.message) for w in w_list)
def test_formality_score_unknown_lang_never_raises():
from iai_mcp.formality import formality_score
# Must never raise, regardless of the lang string.
for bad_lang in ("", "zz", "xx", "de", "fr"):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
_ = formality_score("test", bad_lang)
def test_formality_score_empty_text_returns_zero():
from iai_mcp.formality import formality_score
assert formality_score("", "en") == 0.0
assert formality_score(" ", "en") == 0.0
def test_formality_score_range_bounded_in_0_1():
from iai_mcp.formality import formality_score
pairs = _load_fixture()
for p in pairs:
for txt in (p["formal"], p["informal"]):
s = formality_score(txt, p["lang"])
assert 0.0 <= s <= 1.0, f"score {s} out of [0, 1] for {txt!r}"

189
tests/test_fsrs_decay.py Normal file
View file

@ -0,0 +1,189 @@
"""Tests for FSRS-style edge decay sweep inside sleep._decay_edges.
Behaviour:
- hebbian edges with last updated > 90d ago and weight < ε after decay are pruned.
- hebbian edges above ε are updated with the decayed weight.
- NON-hebbian edges (contradicts, invariant_anchor, consolidated_from, etc.)
are NEVER pruned by the sweep. This is load-bearing for S5 identity protection
: invariant anchors must survive decay.
- never_decay records are unaffected on the records side (Plan 02-01 __post_init__
already enforces this on detail_level>=3; decay loop here targets edges only).
- DECAY_EPSILON defaults to 0.01.
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from uuid import UUID, uuid4
import pytest
def _insert_stale_edge(store, edge_type: str, weight: float, days_old: int):
"""Directly insert an aged edge for decay testing. Bypasses boost_edges
which always stamps now() as updated_at."""
import pandas as pd
tbl = store.db.open_table("edges")
old = datetime.now(timezone.utc) - timedelta(days=days_old)
src_id, dst_id = str(uuid4()), str(uuid4())
tbl.add([
{
"src": src_id,
"dst": dst_id,
"edge_type": edge_type,
"weight": float(weight),
"updated_at": old,
}
])
return src_id, dst_id
# ---- constants
def test_decay_epsilon_default():
from iai_mcp import sleep as sleep_mod
assert sleep_mod.DECAY_EPSILON == 0.01
# ---- sweep behaviour
def test_decay_edges_preserves_fresh_hebbian_edges(tmp_path):
"""Edges <= 90d old are untouched by the sweep."""
from iai_mcp.sleep import _decay_edges
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
src, dst = _insert_stale_edge(store, "hebbian", weight=0.5, days_old=30)
result = _decay_edges(store)
assert result["decayed"] == 0
assert result["pruned"] == 0
# Edge still exists at original weight
df = store.db.open_table("edges").to_pandas()
row = df[(df["src"] == src) & (df["dst"] == dst)]
assert not row.empty
assert float(row.iloc[0]["weight"]) == 0.5
def test_decay_edges_decays_stale_hebbian_edges(tmp_path):
"""Edge >90d old and weight above ε is decayed, not pruned."""
from iai_mcp.sleep import _decay_edges
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
src, dst = _insert_stale_edge(store, "hebbian", weight=0.8, days_old=100)
result = _decay_edges(store)
assert result["decayed"] >= 1
df = store.db.open_table("edges").to_pandas()
row = df[(df["src"] == src) & (df["dst"] == dst)]
assert not row.empty
assert float(row.iloc[0]["weight"]) < 0.8
def test_decay_edges_prunes_below_epsilon(tmp_path):
"""Edge decayed to weight < ε is removed."""
from iai_mcp.sleep import _decay_edges
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
# Very old + already tiny weight -> decays below 0.01
src, dst = _insert_stale_edge(store, "hebbian", weight=0.02, days_old=200)
result = _decay_edges(store)
assert result["pruned"] >= 1
df = store.db.open_table("edges").to_pandas()
gone = df[(df["src"] == src) & (df["dst"] == dst) & (df["edge_type"] == "hebbian")]
assert gone.empty
def test_decay_edges_spares_contradicts(tmp_path):
"""Decay sweep only touches hebbian edges; contradicts edges survive forever."""
from iai_mcp.sleep import _decay_edges
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
src, dst = _insert_stale_edge(store, "contradicts", weight=0.5, days_old=1000)
_decay_edges(store)
df = store.db.open_table("edges").to_pandas()
row = df[
(df["src"] == src)
& (df["dst"] == dst)
& (df["edge_type"] == "contradicts")
]
assert not row.empty
assert float(row.iloc[0]["weight"]) == 0.5
def test_decay_edges_spares_invariant_anchor(tmp_path):
"""S5 invariant_anchor edges MUST NOT be pruned."""
from iai_mcp.sleep import _decay_edges
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
src, dst = _insert_stale_edge(store, "invariant_anchor", weight=0.001, days_old=5000)
_decay_edges(store)
df = store.db.open_table("edges").to_pandas()
row = df[
(df["src"] == src)
& (df["dst"] == dst)
& (df["edge_type"] == "invariant_anchor")
]
assert not row.empty # survived
def test_decay_edges_spares_consolidated_from(tmp_path):
"""consolidated_from (semantic<-episode) edges must survive decay."""
from iai_mcp.sleep import _decay_edges
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
src, dst = _insert_stale_edge(store, "consolidated_from", weight=0.01, days_old=2000)
_decay_edges(store)
df = store.db.open_table("edges").to_pandas()
row = df[
(df["src"] == src)
& (df["dst"] == dst)
& (df["edge_type"] == "consolidated_from")
]
assert not row.empty
def test_decay_edges_custom_epsilon(tmp_path):
"""Epsilon can be overridden per-call."""
from iai_mcp.sleep import _decay_edges
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
src, dst = _insert_stale_edge(store, "hebbian", weight=0.05, days_old=95)
# Default ε=0.01 -> likely not pruned after only 5 days of decay beyond 90
result_default = _decay_edges(store, epsilon=0.01)
# High ε=0.5 -> should prune anything below 0.5
# Re-insert since we may have been decayed
df = store.db.open_table("edges").to_pandas()
remaining = df[(df["src"] == src) & (df["dst"] == dst) & (df["edge_type"] == "hebbian")]
# Reset for clean experiment
if not remaining.empty:
store.db.open_table("edges").delete(
f"src = '{src}' AND dst = '{dst}' AND edge_type = 'hebbian'"
)
src2, dst2 = _insert_stale_edge(store, "hebbian", weight=0.3, days_old=95)
result_custom = _decay_edges(store, epsilon=0.5)
df2 = store.db.open_table("edges").to_pandas()
row = df2[(df2["src"] == src2) & (df2["dst"] == dst2) & (df2["edge_type"] == "hebbian")]
# With epsilon=0.5 and starting weight 0.3, prune should happen immediately.
assert row.empty
assert result_custom["pruned"] >= 1

View file

@ -0,0 +1,200 @@
"""Tests for 02-REVIEW.md H-01 (FSRS tick not persisted across restart).
Bug: `run_light_consolidation` calls `_apply_fsrs(r, now)` which mutates
record.stability and record.last_reviewed in-place on the in-memory
MemoryRecord object. The updated record was never written back to the store.
Every process restart reset all FSRS fields to their previous checkpoint.
Fix:
- Add MemoryStore.update_record(record) that rewrites ONLY the FSRS
columns (stability, difficulty, last_reviewed, updated_at) via
_uuid_literal-safe WHERE predicate. No embedding / provenance /
tags / community_id changes -- avoids clobbering concurrent
boost_edges / append_provenance writers.
- Call store.update_record(r) inside run_light_consolidation after
_apply_fsrs mutates r.
Constitutional contract (MEM-07 FSRS biological fidelity + D-STORAGE):
FSRS stability is the biological decay curve state. Losing it on every
restart equivalates to wiping short-term memory at every session
switch -- unacceptable for a system whose promise is "Claude remembers
every word".
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from uuid import uuid4
import pytest
from iai_mcp.types import EMBED_DIM, MemoryRecord
# ---------------------------------------------------------------- helpers
def _record(
*,
text: str = "fsrs-target",
stability: float = 0.1,
prov_seconds_ago: int = 30,
) -> MemoryRecord:
"""Build a record with a fresh provenance entry so run_light_consolidation
will actually tick it (the light pass only nudges records whose last
provenance entry is < 1h old)."""
now = datetime.now(timezone.utc)
prov_ts = (now - timedelta(seconds=prov_seconds_ago)).isoformat()
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=text,
aaak_index="",
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=stability,
difficulty=0.3,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[{"ts": prov_ts, "cue": "recall", "session_id": "s1"}],
created_at=now,
updated_at=now,
tags=[],
language="en",
)
# ============================================== update_record API unit tests
def test_update_record_writes_back_fsrs_columns(tmp_path):
"""MemoryStore.update_record persists stability/difficulty/last_reviewed."""
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
rec = _record(stability=0.1)
store.insert(rec)
# Mutate the in-memory copy then write it back
rec.stability = 0.55
rec.difficulty = 0.42
new_reviewed = datetime.now(timezone.utc)
rec.last_reviewed = new_reviewed
store.update_record(rec)
fresh = store.get(rec.id)
assert fresh is not None
assert fresh.stability == pytest.approx(0.55, abs=1e-3)
assert fresh.difficulty == pytest.approx(0.42, abs=1e-3)
assert fresh.last_reviewed is not None
def test_update_record_rejects_unknown_id(tmp_path):
"""Calling update_record on a record id that is not in the table must be
a no-op (no exception, no table growth)."""
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
# No insert -- record never existed
phantom = _record(stability=0.9)
# Row count before
before = store.db.open_table("records").count_rows()
# Must not raise
store.update_record(phantom)
# Row count unchanged (no row was inserted)
after = store.db.open_table("records").count_rows()
assert after == before
def test_update_record_does_not_touch_untouched_columns(tmp_path):
"""update_record must only rewrite FSRS-relevant columns. Embedding,
provenance, tags, community_id must survive unchanged."""
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
rec = _record(stability=0.1)
rec.tags = ["important", "keep-me"]
rec.provenance = [
{"ts": "2026-04-16T00:00:00Z", "cue": "seed", "session_id": "s0"},
]
store.insert(rec)
# Only change FSRS fields in-memory; leave rec.tags / rec.provenance alone.
rec.stability = 0.6
rec.last_reviewed = datetime.now(timezone.utc)
store.update_record(rec)
fresh = store.get(rec.id)
assert fresh is not None
# FSRS columns updated
assert fresh.stability == pytest.approx(0.6, abs=1e-3)
# Unrelated columns preserved
assert fresh.tags == ["important", "keep-me"]
assert len(fresh.provenance) == 1
assert fresh.provenance[0]["cue"] == "seed"
# ============================================== H-01 end-to-end persistence
def test_fsrs_state_persists_across_store_reopen(tmp_path):
"""H-01 end-to-end: after run_light_consolidation, a NEW MemoryStore
instance at the same tmp_path must see updated stability + last_reviewed.
Pre-fix: stability stayed at 0.1 because _apply_fsrs only mutated the
in-memory object; nothing was written back.
Post-fix: stability >= 0.1 + FSRS_STABILITY_BOOST (0.3 cap at 1.0).
"""
from iai_mcp.sleep import FSRS_STABILITY_BOOST, run_light_consolidation
from iai_mcp.store import MemoryStore
# Phase A: create, insert with fresh provenance, run light cycle
store = MemoryStore(path=tmp_path)
rec = _record(stability=0.1, prov_seconds_ago=30)
rec_id = rec.id
store.insert(rec)
result = run_light_consolidation(store, session_id="persist-test")
assert result["fsrs_ticked"] >= 1
# Phase B: close (via new instance on the same path) and re-read
del store
store2 = MemoryStore(path=tmp_path)
fresh = store2.get(rec_id)
assert fresh is not None
# Stability boosted and persisted
expected_min = 0.1 + FSRS_STABILITY_BOOST - 1e-3
assert fresh.stability >= expected_min, (
f"FSRS stability not persisted: expected >= {expected_min}, "
f"got {fresh.stability}"
)
# last_reviewed populated
assert fresh.last_reviewed is not None
def test_fsrs_persistence_only_fresh_provenance(tmp_path):
"""Records with STALE provenance (>1h old) must NOT be FSRS-ticked. This
preserves the current sleep.py light-phase gating; our update_record fix
must not widen that surface.
"""
from iai_mcp.sleep import run_light_consolidation
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
# 2h-old provenance -- outside the 1h tick window
rec = _record(stability=0.1, prov_seconds_ago=7200)
store.insert(rec)
run_light_consolidation(store, session_id="no-tick")
fresh = store.get(rec.id)
assert fresh is not None
# Stability unchanged
assert fresh.stability == pytest.approx(0.1, abs=1e-3)

112
tests/test_graph.py Normal file
View file

@ -0,0 +1,112 @@
"""Tests for iai_mcp.graph (D-04 dual-library wrapper, CONN-03 2-hop spread)."""
from __future__ import annotations
from uuid import uuid4
import pytest
from iai_mcp.graph import IGRAPH_THRESHOLD, MemoryGraph, _HAS_IGRAPH
def test_small_graph_uses_networkx() -> None:
g = MemoryGraph()
for _ in range(10):
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
assert g.backend == "networkx"
@pytest.mark.skipif(not _HAS_IGRAPH, reason="igraph optional on some boxes")
def test_large_graph_switches_to_igraph() -> None:
g = MemoryGraph()
for _ in range(IGRAPH_THRESHOLD + 1):
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
assert g.backend == "igraph"
def test_backend_stays_networkx_just_below_threshold() -> None:
g = MemoryGraph()
for _ in range(IGRAPH_THRESHOLD - 1):
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
assert g.backend == "networkx"
def test_two_hop_reaches_exactly_two_hops() -> None:
"""CONN-03: linear chain A-B-C-D seeded at A returns {B, C} -- D is 3 hops."""
g = MemoryGraph()
a, b, c, d = uuid4(), uuid4(), uuid4(), uuid4()
for n in (a, b, c, d):
g.add_node(n, community_id=None, embedding=[0.0] * 384)
g.add_edge(a, b)
g.add_edge(b, c)
g.add_edge(c, d)
reached = set(g.two_hop_neighborhood([a], top_k=5))
assert b in reached
assert c in reached
assert d not in reached # 3 hops away
assert a not in reached # seed excluded
def test_two_hop_multiple_seeds_deduped() -> None:
g = MemoryGraph()
a, b, c = uuid4(), uuid4(), uuid4()
for n in (a, b, c):
g.add_node(n, community_id=None, embedding=[0.0] * 384)
g.add_edge(a, b)
g.add_edge(b, c)
# Both a and c as seeds: 2-hop from a reaches {b,c}, from c reaches {b,a};
# union minus seeds should be {b}.
reached = set(g.two_hop_neighborhood([a, c], top_k=5))
assert reached == {b}
def test_two_hop_empty_seeds_returns_empty_list() -> None:
g = MemoryGraph()
assert g.two_hop_neighborhood([], top_k=5) == []
def test_centrality_hub_beats_leaves() -> None:
"""5-node star: hub's betweenness strictly greater than any leaf's."""
g = MemoryGraph()
hub = uuid4()
leaves = [uuid4() for _ in range(4)]
g.add_node(hub, community_id=None, embedding=[0.0] * 384)
for leaf in leaves:
g.add_node(leaf, community_id=None, embedding=[0.0] * 384)
g.add_edge(hub, leaf)
c = g.centrality()
for leaf in leaves:
assert c[hub] > c[leaf]
def test_centrality_no_edges_all_zero() -> None:
g = MemoryGraph()
for _ in range(5):
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
c = g.centrality()
assert all(v == 0.0 for v in c.values())
assert len(c) == 5
def test_get_embedding_returns_stored_vector() -> None:
g = MemoryGraph()
nid = uuid4()
emb = [1.0] + [0.0] * 383
g.add_node(nid, community_id=None, embedding=emb)
assert g.get_embedding(nid) == emb
assert g.get_embedding(uuid4()) is None
def test_rich_club_coefficient_on_star_graph() -> None:
"""Star has hub with degree 4; coefficient well-defined."""
g = MemoryGraph()
hub = uuid4()
leaves = [uuid4() for _ in range(4)]
g.add_node(hub, community_id=None, embedding=[0.0] * 384)
for leaf in leaves:
g.add_node(leaf, community_id=None, embedding=[0.0] * 384)
g.add_edge(hub, leaf)
# Should not raise; returns a float.
coef = g.rich_club_coefficient()
assert isinstance(coef, float)
assert coef >= 0.0

View file

@ -0,0 +1,340 @@
"""Plan 05-12 — graph-native recall tests (RED scaffold).
Close the latency gap by switching recall_for_response's seed + spread
stages from per-id ``store.get(rid)`` LanceDB round-trips to in-RAM
``G.nodes[rid]`` attribute lookups. ``build_runtime_graph`` attaches the
record payload (embedding, surface, centrality, tier) to every graph
node so the recall hot path never touches disk for a graph-resident id.
Covered contracts:
A1 every node in G carries embedding + surface + centrality + tier
after ``build_runtime_graph``.
A2 seed stage does NOT call ``store.get`` (patch raises if invoked).
A3 spread stage (rank/reachable walk) does NOT call ``store.get``.
A4 verbatim L0 fast path (cue_text exact-match / gate skip) still
hits ``store.get`` invariant path is untouched.
A5 partial sync / missing attribute on a node falls back to
``store.get`` without crashing; recall still returns hits.
A6 correctness fence: recall returns the seeded records with
high cosine similarity (no correctness regression).
"""
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
from unittest import mock
from uuid import uuid4
import pytest
from iai_mcp import retrieve
from iai_mcp.pipeline import recall_for_response
from iai_mcp.store import MemoryStore
from iai_mcp.types import MemoryRecord
# --------------------------------------------------------------------------- fixtures
@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
"""Swap macOS Keychain for an in-memory dict so tests don't prompt."""
import keyring as _keyring
fake: dict[tuple[str, str], str] = {}
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
monkeypatch.setattr(
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
)
monkeypatch.setattr(
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
)
yield fake
class _DetEmbedder:
"""Deterministic embedder — seeds record vectors by text hash."""
def __init__(self, dim: int = 384) -> None:
self.DIM = dim
self.DEFAULT_DIM = dim
self.DEFAULT_MODEL_KEY = "test"
def embed(self, text: str) -> list[float]:
import hashlib
import random
digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
rng = random.Random(int(digest[:16], 16))
v = [rng.random() * 2 - 1 for _ in range(self.DIM)]
n = sum(x * x for x in v) ** 0.5
return [x / n for x in v] if n > 0 else v
def _make_record(vec: list[float], text: str) -> MemoryRecord:
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=text,
aaak_index="",
embedding=vec,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=["t"],
language="en",
)
@pytest.fixture
def seeded_store(tmp_path: Path) -> tuple[MemoryStore, _DetEmbedder, list[MemoryRecord]]:
"""Fresh store with 12 records so the seed+spread stages have enough
material to exercise the graph-native read path."""
store = MemoryStore(path=tmp_path / "lancedb")
store.root = tmp_path
emb = _DetEmbedder(dim=store.embed_dim)
recs = []
for i in range(12):
vec = emb.embed(f"fact-{i}")
rec = _make_record(vec, f"synthetic fact {i}")
store.insert(rec)
recs.append(rec)
return store, emb, recs
# ---------------------------------------------------------------- A1: node payload
def test_A1_build_runtime_graph_attaches_node_payload(seeded_store):
"""A1: every node carries embedding + surface + centrality + tier."""
store, _emb, recs = seeded_store
graph, _assignment, _rc = retrieve.build_runtime_graph(store)
# Use the underlying NetworkX graph directly; adds the
# payload as NetworkX node attributes via G.add_node(id, **payload).
G = graph._nx
assert G.number_of_nodes() == len(recs)
for rec in recs:
node = G.nodes[str(rec.id)]
assert "embedding" in node, f"node {rec.id} missing embedding attr"
assert "surface" in node, f"node {rec.id} missing surface attr"
assert "centrality" in node, f"node {rec.id} missing centrality attr"
assert "tier" in node, f"node {rec.id} missing tier attr"
# Embedding list matches the record's embedding.
assert list(node["embedding"]) == list(rec.embedding)
assert node["surface"] == rec.literal_surface
assert node["tier"] == rec.tier
# ---------------------------------------------------------------- A2: seed stage
def test_A2_seed_stage_reads_from_graph_not_store(seeded_store):
"""A2: seed stage (top-K by cosine) must NOT call store.get.
We patch MemoryStore.get to raise; if recall_for_response still returns
a non-empty RecallResponse, the seed stage is graph-native.
"""
store, emb, _recs = seeded_store
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
# The verbatim L0 fast-path (gate skip) calls store.get too — disable
# the skip by choosing a cue that the gate will NOT classify as trivial.
cue = "explain the authentication migration for long-running deployments"
# AllowedError raises ONLY on the hot-path store.get; the L0 fast-path
# is known not to fire for this cue.
class _Boom(RuntimeError):
pass
original_get = store.get
def _explode(rid):
# Allow the verbatim L0 UUID fetch to pass through so the fast-path
# check (no L0 record seeded) is a clean miss — but any OTHER store.get
# call blows up.
from uuid import UUID
l0 = UUID("00000000-0000-0000-0000-000000000001")
if rid == l0:
return None
raise _Boom(f"store.get({rid}) — seed stage should not call this")
with mock.patch.object(MemoryStore, "get", side_effect=_explode):
resp = recall_for_response(
store=store,
graph=graph,
assignment=assignment,
rich_club=rich_club,
embedder=emb,
cue=cue,
session_id="s",
budget_tokens=1500,
)
assert len(resp.hits) >= 1
# ---------------------------------------------------------------- A3: spread stage
def test_A3_spread_stage_reads_from_graph_not_store(seeded_store):
"""A3: rank+spread stages do NOT call store.get either.
Same shape as A2 but asserts over the full reachable-union not just
seeds.
"""
store, emb, _recs = seeded_store
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
cue = "network stack changes for the web cache"
class _Boom(RuntimeError):
pass
def _explode(rid):
from uuid import UUID
l0 = UUID("00000000-0000-0000-0000-000000000001")
if rid == l0:
return None
raise _Boom(f"store.get({rid}) during spread/rank")
with mock.patch.object(MemoryStore, "get", side_effect=_explode):
resp = recall_for_response(
store=store,
graph=graph,
assignment=assignment,
rich_club=rich_club,
embedder=emb,
cue=cue,
session_id="s",
budget_tokens=1500,
)
# If spread/rank was using store.get, we would have exploded above.
assert isinstance(resp.hits, list)
# ---------------------------------------------------------------- A4: L0 fast path
def test_A4_verbatim_l0_fast_path_still_calls_store_get(seeded_store):
"""A4: the L0 (gate-skip) fast path still hits store.get — unchanged.
invariant: verbatim recall path is NOT touched.
"""
store, emb, _recs = seeded_store
# Seed the deterministic L0 record so the gate-skip branch fires.
from uuid import UUID
l0_id = UUID("00000000-0000-0000-0000-000000000001")
l0_vec = emb.embed("l0-identity")
now = datetime.now(timezone.utc)
l0_rec = MemoryRecord(
id=l0_id,
tier="semantic",
literal_surface="L0 identity kernel",
aaak_index="",
embedding=l0_vec,
community_id=None,
centrality=0.0,
detail_level=5, # never_decay
pinned=True,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=True,
never_merge=True,
provenance=[],
created_at=now,
updated_at=now,
tags=["identity"],
language="en",
)
store.insert(l0_rec)
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
# Pick a cue that the gate treats as trivial (short / who-am-i style).
cue = "hi"
with mock.patch.object(MemoryStore, "get", wraps=store.get) as spy:
_ = recall_for_response(
store=store,
graph=graph,
assignment=assignment,
rich_club=rich_club,
embedder=emb,
cue=cue,
session_id="s",
budget_tokens=1500,
)
# At LEAST one store.get call on the L0 fast path (verbatim invariant).
assert spy.call_count >= 1
# ---------------------------------------------------------------- A5: fallback
def test_A5_missing_node_attr_falls_back_to_store_get(seeded_store):
"""A5: if a node somehow lacks the embedding attr (race / partial
sync), _read_record_payload falls back to store.get and recall still
returns correct hits no crash."""
store, emb, recs = seeded_store
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
# Blow away the embedding attr on half the nodes.
G = graph._nx
victims = [str(r.id) for r in recs[:6]]
for nid in victims:
if "embedding" in G.nodes[nid]:
del G.nodes[nid]["embedding"]
cue = "summary of cli subcommand changes for the auth token rotation"
resp = recall_for_response(
store=store,
graph=graph,
assignment=assignment,
rich_club=rich_club,
embedder=emb,
cue=cue,
session_id="s",
budget_tokens=1500,
)
assert len(resp.hits) >= 1
# ---------------------------------------------------------------- A6: correctness
def test_A6_m04_correctness_no_regression(seeded_store):
"""A6: recall returns the seeded record whose text matches the cue.
Minimal correctness fence inside this file (the heavyweight
bench.verbatim sweep covers gap=5/20/100 elsewhere; this guards the
happy-path-does-not-regress invariant inside the unit suite).
"""
store, emb, recs = seeded_store
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
# Query with text similar to record 7 — its cosine should dominate.
resp = recall_for_response(
store=store,
graph=graph,
assignment=assignment,
rich_club=rich_club,
embedder=emb,
cue="synthetic fact 7",
session_id="s",
budget_tokens=1500,
)
# At least one hit comes back.
assert len(resp.hits) >= 1
# All hit record ids are in the seeded record id set.
seeded_ids = {r.id for r in recs}
assert all(h.record_id in seeded_ids for h in resp.hits)

View file

@ -0,0 +1,247 @@
"""Plan 05-12 — store <-> graph write-sync hook tests (RED scaffold).
``build_runtime_graph`` registers a ``_graph_sync_hook`` on the store so
every ``insert`` / ``update`` / ``delete`` mutates the in-RAM graph's
node payload. Hook exceptions are logged to stderr as structured events
but NEVER break the underlying store write the store is authoritative.
Covered contracts:
B1 ``store.insert`` with registered hook adds the graph node + payload.
B2 ``store.update`` mutates the node's embedding / surface payload.
B3 ``store.delete`` removes the node from the graph.
B4 hook that raises does not break ``store.insert`` write
completes, stderr carries a structured ``graph_sync_failed`` event.
B5 cold start: after save/try_load round-trip the node payload blob
restores every node attribute from cache.
B6 CACHE_VERSION bump from "05-09-v1" -> "05-12-v1" invalidates the
old cache cleanly (forward-compat fence).
"""
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from uuid import uuid4
import pytest
from iai_mcp import retrieve, runtime_graph_cache
from iai_mcp.store import MemoryStore
from iai_mcp.types import MemoryRecord
# --------------------------------------------------------------------------- fixtures
@pytest.fixture(autouse=True)
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
import keyring as _keyring
fake: dict[tuple[str, str], str] = {}
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
monkeypatch.setattr(
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
)
monkeypatch.setattr(
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
)
yield fake
@pytest.fixture
def store(tmp_path: Path) -> MemoryStore:
s = MemoryStore(path=tmp_path / "lancedb")
s.root = tmp_path
return s
def _make_record(
store: MemoryStore,
text: str = "hello",
vec_seed: float = 0.1,
) -> MemoryRecord:
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=text,
aaak_index="",
embedding=[vec_seed] * store.embed_dim,
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.0,
difficulty=0.0,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=["t"],
language="en",
)
# ---------------------------------------------------------------- B1: insert
def test_B1_insert_updates_graph_node(store):
"""B1: store.insert while a hook is registered adds node + payload."""
# Seed one record so build_runtime_graph has something to register with.
seed = _make_record(store, "seed", 0.5)
store.insert(seed)
graph, _a, _rc = retrieve.build_runtime_graph(store)
assert str(seed.id) in graph._nx.nodes
# Now insert a second record; the hook should mirror it to the graph.
new_rec = _make_record(store, "freshly-inserted", 0.3)
store.insert(new_rec)
assert str(new_rec.id) in graph._nx.nodes
node = graph._nx.nodes[str(new_rec.id)]
assert node.get("surface") == "freshly-inserted"
assert "embedding" in node
# ---------------------------------------------------------------- B2: update
def test_B2_update_mutates_node_payload(store):
"""B2: store.update rewrites the node's embedding + surface on the graph."""
rec = _make_record(store, "before-update", 0.2)
store.insert(rec)
graph, _a, _rc = retrieve.build_runtime_graph(store)
node_before = graph._nx.nodes[str(rec.id)]
assert node_before["surface"] == "before-update"
# Mutate surface and embedding.
rec.literal_surface = "after-update"
rec.embedding = [0.9] * store.embed_dim
store.update(rec)
node_after = graph._nx.nodes[str(rec.id)]
assert node_after["surface"] == "after-update"
# embedding replaced (first element is 0.9 now)
assert list(node_after["embedding"])[0] == pytest.approx(0.9)
# ---------------------------------------------------------------- B3: delete
def test_B3_delete_removes_node(store):
"""B3: store.delete drops the node from the graph."""
rec = _make_record(store, "to-be-deleted", 0.4)
store.insert(rec)
graph, _a, _rc = retrieve.build_runtime_graph(store)
assert str(rec.id) in graph._nx.nodes
store.delete(rec.id)
assert str(rec.id) not in graph._nx.nodes
# ---------------------------------------------------------------- B4: hook robustness
def test_B4_hook_exception_does_not_break_store_insert(store, capsys):
"""B4: a raising hook must never break store.insert; stderr logs a
structured ``graph_sync_failed`` event."""
def _bad_hook(op, record):
raise RuntimeError("hook is sad")
store.register_graph_sync_hook(_bad_hook)
rec = _make_record(store, "store-write-is-authoritative", 0.15)
store.insert(rec) # must not raise
# Verify the record actually landed in LanceDB.
roundtrip = store.get(rec.id)
assert roundtrip is not None
assert roundtrip.literal_surface == "store-write-is-authoritative"
# Structured stderr event logged.
captured = capsys.readouterr()
assert "graph_sync_failed" in captured.err
# JSON parseability of at least one stderr line.
found = False
for line in captured.err.splitlines():
try:
payload = json.loads(line)
if payload.get("event") == "graph_sync_failed":
assert payload.get("op") == "insert"
found = True
break
except (ValueError, TypeError):
continue
assert found, "expected a JSON graph_sync_failed event on stderr"
# ---------------------------------------------------------------- B5: cold start
def test_B5_cold_start_restores_node_payload_from_cache(store):
"""B5: after save/try_load, build_runtime_graph rehydrates node
attrs from the cache without re-reading all records."""
rec = _make_record(store, "cached-payload", 0.25)
store.insert(rec)
# First build — writes the v2 cache with node_payload blob.
graph1, _a, _rc = retrieve.build_runtime_graph(store)
node1 = graph1._nx.nodes[str(rec.id)]
expected_surface = node1["surface"]
expected_emb = list(node1["embedding"])
# Inspect via try_load (cache is encrypted under v3 sidecar per Phase 07.9
# W3 / D-03; raw file is ciphertext, so json.load on it would fail).
loaded = runtime_graph_cache.try_load(store)
assert loaded is not None, "cache must be loadable"
_assignment, _rich_club, node_payload, _max_degree = loaded
assert node_payload is not None, "cache is missing node_payload blob"
assert str(rec.id) in node_payload
# Rebuild — cache HIT must rehydrate payload without scanning store.all_records.
graph2, _a, _rc = retrieve.build_runtime_graph(store)
node2 = graph2._nx.nodes[str(rec.id)]
assert node2["surface"] == expected_surface
assert list(node2["embedding"]) == expected_emb
# ---------------------------------------------------------------- B6: version bump
def test_B6_cache_version_bump_invalidates_old_cache(store):
"""B6: CACHE_VERSION is "05-12-v1" — old "05-09-v1" caches invalidate
cleanly on try_load.
"""
# Plant an old-format cache file manually.
cache_path = runtime_graph_cache._cache_path(store)
cache_path.parent.mkdir(parents=True, exist_ok=True)
with cache_path.open("w") as f:
json.dump(
{
"cache_version": "05-09-v1", # legacy
"key": [0, 0, 4, store.embed_dim, "05-09-v1"],
"assignment": {
"node_to_community": {},
"community_centroids": {},
"modularity": 0.0,
"backend": "flat",
"top_communities": [],
"mid_regions": {},
},
"rich_club": [],
"saved_at": "2026-01-01T00:00:00+00:00",
},
f,
)
# CACHE_VERSION constant is the current one (Phase 07.9 W3 / bump
# to "07-09-v3" with AES-256-GCM sidecar). Legacy 05-09 / 05-12 / 05-13
# / 06-02 cache files are rejected.
assert runtime_graph_cache.CACHE_VERSION == "07-09-v3"
# try_load on the old cache returns None (mismatch).
assert runtime_graph_cache.try_load(store) is None

255
tests/test_guard.py Normal file
View file

@ -0,0 +1,255 @@
"""Tests for D-GUARD (BudgetLedger + RateLimitLedger + should_call_llm).
Covers:
- BudgetLedger daily/monthly caps + rollover
- RateLimitLedger cooldown window
- should_call_llm 7-step ladder ordering per CONTEXT.md D-GUARD
- Persistence across store reopen
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from uuid import uuid4
import pytest
# ------------------------------------------------------------- BudgetLedger
def test_budget_ledger_daily_cap_enforced(tmp_path):
from iai_mcp.guard import BudgetLedger
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store, daily_usd_cap=0.10, monthly_usd_cap=3.00)
ok, _ = bl.can_spend(0.05)
assert ok is True
bl.record_spend(0.08)
ok, _ = bl.can_spend(0.03)
# 0.08 + 0.03 = 0.11 > 0.10 -> NOT ok
ok2, reason = bl.can_spend(0.03)
assert ok2 is False
assert "daily" in reason.lower()
def test_budget_ledger_daily_allows_under_cap(tmp_path):
from iai_mcp.guard import BudgetLedger
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store, daily_usd_cap=0.10)
bl.record_spend(0.05)
ok, _ = bl.can_spend(0.04)
assert ok is True
def test_budget_ledger_monthly_cap_enforced(tmp_path):
"""Daily small spends accumulate to monthly cap."""
from iai_mcp.guard import BudgetLedger
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store, daily_usd_cap=10.0, monthly_usd_cap=0.20)
bl.record_spend(0.15)
ok, reason = bl.can_spend(0.10)
# 0.15 + 0.10 = 0.25 > 0.20 -> NOT ok, but reason is monthly (daily cap 10.0 is fine)
assert ok is False
assert "monthly" in reason.lower()
def test_budget_ledger_daily_used(tmp_path):
from iai_mcp.guard import BudgetLedger
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store)
assert bl.daily_used() == 0.0
bl.record_spend(0.01)
bl.record_spend(0.02)
assert abs(bl.daily_used() - 0.03) < 1e-5
def test_budget_ledger_monthly_used(tmp_path):
from iai_mcp.guard import BudgetLedger
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store)
bl.record_spend(0.05)
bl.record_spend(0.03)
assert abs(bl.monthly_used() - 0.08) < 1e-5
def test_budget_ledger_persists_across_reopen(tmp_path):
"""Ledger-backed by LanceDB -> survives store close/reopen (D-GUARD repudiation)."""
from iai_mcp.guard import BudgetLedger
from iai_mcp.store import MemoryStore
store1 = MemoryStore(path=tmp_path)
BudgetLedger(store1).record_spend(0.05)
del store1
store2 = MemoryStore(path=tmp_path)
bl = BudgetLedger(store2)
assert abs(bl.daily_used() - 0.05) < 1e-5
# ----------------------------------------------------------- RateLimitLedger
def test_ratelimit_ledger_no_history_not_in_cooldown(tmp_path):
from iai_mcp.guard import RateLimitLedger
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
rl = RateLimitLedger(store)
assert rl.in_cooldown() is False
def test_ratelimit_ledger_record_429_enters_cooldown(tmp_path):
from iai_mcp.guard import RateLimitLedger
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
rl = RateLimitLedger(store)
rl.record_429()
assert rl.in_cooldown() is True
def test_ratelimit_ledger_persists_across_reopen(tmp_path):
from iai_mcp.guard import RateLimitLedger
from iai_mcp.store import MemoryStore
store1 = MemoryStore(path=tmp_path)
RateLimitLedger(store1).record_429()
del store1
store2 = MemoryStore(path=tmp_path)
assert RateLimitLedger(store2).in_cooldown() is True
# -------------------------------------------------- should_call_llm ladder
def test_should_call_llm_tier_0_fallback_llm_disabled(tmp_path):
"""Step 1: llm_enabled=False -> (False, 'sleep.llm_enabled=false')."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store)
rl = RateLimitLedger(store)
ok, reason = should_call_llm(bl, rl, llm_enabled=False, has_api_key=True)
assert ok is False
assert "llm_enabled" in reason
def test_should_call_llm_no_api_key(tmp_path):
"""Step 2: no api key -> (False, 'no api key')."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store)
rl = RateLimitLedger(store)
ok, reason = should_call_llm(bl, rl, llm_enabled=True, has_api_key=False)
assert ok is False
assert "api key" in reason.lower()
def test_should_call_llm_daily_cap_hit(tmp_path):
"""Step 3: daily cap exhausted -> (False, ... daily cap ...)."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store, daily_usd_cap=0.01, monthly_usd_cap=3.0)
bl.record_spend(0.009)
rl = RateLimitLedger(store)
ok, reason = should_call_llm(
bl, rl, llm_enabled=True, has_api_key=True, estimated_usd=0.005
)
assert ok is False
assert "daily" in reason.lower()
def test_should_call_llm_monthly_cap_hit(tmp_path):
"""Step 4: daily ok, monthly cap exhausted."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store, daily_usd_cap=10.0, monthly_usd_cap=0.02)
bl.record_spend(0.015)
rl = RateLimitLedger(store)
ok, reason = should_call_llm(
bl, rl, llm_enabled=True, has_api_key=True, estimated_usd=0.01
)
assert ok is False
assert "monthly" in reason.lower()
def test_should_call_llm_in_cooldown(tmp_path):
"""Step 5: budget ok, but rate limiter in cooldown."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store)
rl = RateLimitLedger(store)
rl.record_429()
ok, reason = should_call_llm(bl, rl, llm_enabled=True, has_api_key=True)
assert ok is False
assert "cooldown" in reason.lower()
def test_should_call_llm_all_green(tmp_path):
"""All 7 steps pass -> (True, 'ok')."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store)
rl = RateLimitLedger(store)
ok, reason = should_call_llm(bl, rl, llm_enabled=True, has_api_key=True)
assert ok is True
assert reason == "ok"
def test_should_call_llm_ordering_llm_enabled_first(tmp_path):
"""Ladder ordering: llm_enabled takes precedence over budget+cooldown+apikey."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store, daily_usd_cap=0.01)
bl.record_spend(0.02) # over cap
rl = RateLimitLedger(store)
rl.record_429() # in cooldown
# llm_enabled=False short-circuits BEFORE cap + cooldown checks
ok, reason = should_call_llm(bl, rl, llm_enabled=False, has_api_key=False)
assert ok is False
assert "llm_enabled" in reason
def test_should_call_llm_ordering_cap_before_cooldown(tmp_path):
"""With llm_enabled+api_key, budget cap check precedes cooldown."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
bl = BudgetLedger(store, daily_usd_cap=0.01)
bl.record_spend(0.02) # over cap
rl = RateLimitLedger(store)
rl.record_429() # also in cooldown
ok, reason = should_call_llm(
bl, rl, llm_enabled=True, has_api_key=True, estimated_usd=0.001
)
assert ok is False
# "daily" message means cap was checked before cooldown
assert "daily" in reason.lower()

View file

@ -0,0 +1,287 @@
"""Phase 10.4 — comprehensive tests for ``HeartbeatScanner``.
Covers the 9-test matrix from CONTEXT 10.4:
- Empty dir scan returns [].
- Single fresh heartbeat is FRESH (PID = current process, just-now refresh).
- Stale heartbeat (last_refresh older than M) is STALE even if PID alive.
- Orphan heartbeat (PID dead, fresh refresh) is ORPHAN.
- Five simultaneous fresh heartbeats: ``fresh_count`` == 5; ``is_active`` True.
- ``cleanup_stale_orphans`` deletes 3 of 4, leaves the fresh one.
- ``heartbeat_idle_30min`` False when at least one fresh exists.
- ``heartbeat_idle_30min`` True when only stale + orphan remain.
- Concurrent scan tolerates a writer adding a heartbeat mid-scan.
Tests use ``os.getpid()`` for live-PID fixtures (deterministic) and a
known-dead PID 99999 for orphan fixtures (verified dead at session start
by the implementation's ``_is_pid_alive``).
"""
from __future__ import annotations
import json
import os
import threading
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
from iai_mcp.heartbeat_scanner import (
DEFAULT_STALE_THRESHOLD_SEC,
HeartbeatScanner,
HeartbeatStatus,
_is_pid_alive,
)
# ---------------------------------------------------------------- fixtures
@pytest.fixture
def wrappers_dir(tmp_path: Path) -> Path:
"""Empty wrappers directory under a fresh tmp_path."""
wdir = tmp_path / "wrappers"
wdir.mkdir()
return wdir
def _write_heartbeat(
wrappers_dir: Path,
pid: int,
uuid: str,
last_refresh: datetime,
) -> Path:
"""Write a heartbeat file with the given pid/uuid/last_refresh.
Returns the file path so tests can assert presence/absence after
``cleanup_stale_orphans``.
"""
path = wrappers_dir / f"heartbeat-{pid}-{uuid}.json"
payload = {
"pid": pid,
"uuid": uuid,
"started_at": last_refresh.isoformat().replace("+00:00", "Z"),
"last_refresh": last_refresh.isoformat().replace("+00:00", "Z"),
"wrapper_version": "1.0.0",
"schema_version": 1,
}
path.write_text(json.dumps(payload))
return path
# Known-dead PID — verified by ``_is_pid_alive`` in the test below.
# 99999 is above macOS's PID ceiling (typically <99998) so it is a stable
# choice for orphan fixtures. The verification test runs first to fail
# loudly if this assumption is wrong on a future host.
_DEAD_PID = 99999
# ---------------------------------------------------------------- sanity
def test_dead_pid_fixture_is_actually_dead() -> None:
"""Sanity: confirm PID 99999 is dead before relying on it in fixtures.
If a future host happens to allocate PID 99999, the orphan-status
fixture would silently degrade into a FRESH classification. This
test fails loudly so we notice the collision.
"""
assert _is_pid_alive(_DEAD_PID) is False
# ---------------------------------------------------------------- scan / classify
def test_scan_empty_dir_returns_empty(wrappers_dir: Path) -> None:
"""Empty wrappers dir yields an empty entries list."""
scanner = HeartbeatScanner(wrappers_dir)
entries = scanner.scan()
assert entries == []
assert scanner.fresh_count() == 0
assert scanner.is_active() is False
def test_scan_single_fresh_heartbeat(wrappers_dir: Path) -> None:
"""Heartbeat with current PID + just-now refresh classifies FRESH."""
own_pid = os.getpid()
now = datetime.now(timezone.utc)
_write_heartbeat(wrappers_dir, own_pid, "uuid-aaa", now)
scanner = HeartbeatScanner(wrappers_dir)
entries = scanner.scan()
assert len(entries) == 1
entry = entries[0]
assert entry.pid == own_pid
assert entry.uuid == "uuid-aaa"
assert entry.status is HeartbeatStatus.FRESH
assert scanner.is_active() is True
def test_scan_stale_heartbeat(wrappers_dir: Path) -> None:
"""last_refresh older than threshold is STALE even if PID alive."""
own_pid = os.getpid()
stale_ts = datetime.now(timezone.utc) - timedelta(
seconds=DEFAULT_STALE_THRESHOLD_SEC + 10
)
_write_heartbeat(wrappers_dir, own_pid, "uuid-bbb", stale_ts)
scanner = HeartbeatScanner(wrappers_dir)
entries = scanner.scan()
assert len(entries) == 1
assert entries[0].status is HeartbeatStatus.STALE
assert scanner.fresh_count() == 0
assert scanner.is_active() is False
def test_scan_orphan_heartbeat(wrappers_dir: Path) -> None:
"""Fresh refresh + dead PID classifies ORPHAN."""
now = datetime.now(timezone.utc)
_write_heartbeat(wrappers_dir, _DEAD_PID, "uuid-ccc", now)
scanner = HeartbeatScanner(wrappers_dir)
entries = scanner.scan()
assert len(entries) == 1
assert entries[0].status is HeartbeatStatus.ORPHAN
assert scanner.fresh_count() == 0
def test_scan_5_simultaneous_wrappers(wrappers_dir: Path) -> None:
"""Five fresh heartbeats: fresh_count == 5; is_active True."""
own_pid = os.getpid()
now = datetime.now(timezone.utc)
for i in range(5):
_write_heartbeat(wrappers_dir, own_pid, f"uuid-{i}", now)
scanner = HeartbeatScanner(wrappers_dir)
assert scanner.fresh_count() == 5
assert scanner.is_active() is True
# ---------------------------------------------------------------- cleanup
def test_cleanup_stale_orphans_deletes_files(wrappers_dir: Path) -> None:
"""2 stale + 1 orphan + 1 fresh; cleanup returns 3; fresh remains."""
own_pid = os.getpid()
now = datetime.now(timezone.utc)
stale_ts = now - timedelta(seconds=DEFAULT_STALE_THRESHOLD_SEC + 10)
fresh_path = _write_heartbeat(wrappers_dir, own_pid, "uuid-fresh", now)
stale_path1 = _write_heartbeat(wrappers_dir, own_pid, "uuid-s1", stale_ts)
stale_path2 = _write_heartbeat(wrappers_dir, own_pid, "uuid-s2", stale_ts)
orphan_path = _write_heartbeat(wrappers_dir, _DEAD_PID, "uuid-orphan", now)
scanner = HeartbeatScanner(wrappers_dir)
deleted = scanner.cleanup_stale_orphans()
assert deleted == 3
# Only the fresh file should still be on disk.
assert fresh_path.exists()
assert not stale_path1.exists()
assert not stale_path2.exists()
assert not orphan_path.exists()
# Subsequent scan reflects the cleanup.
remaining = scanner.scan()
assert len(remaining) == 1
assert remaining[0].uuid == "uuid-fresh"
# ---------------------------------------------------------------- heartbeat_idle_30min
def test_heartbeat_idle_30min_with_recent_fresh_returns_false(
wrappers_dir: Path,
) -> None:
"""A single fresh heartbeat suppresses the idle predicate."""
own_pid = os.getpid()
now = datetime.now(timezone.utc)
_write_heartbeat(wrappers_dir, own_pid, "uuid-fresh", now)
scanner = HeartbeatScanner(wrappers_dir)
assert scanner.heartbeat_idle_30min() is False
def test_heartbeat_idle_30min_no_fresh_returns_true(wrappers_dir: Path) -> None:
"""Only stale + orphan entries: predicate returns True (no live wrapper)."""
own_pid = os.getpid()
now = datetime.now(timezone.utc)
stale_ts = now - timedelta(seconds=DEFAULT_STALE_THRESHOLD_SEC + 10)
_write_heartbeat(wrappers_dir, own_pid, "uuid-s", stale_ts)
_write_heartbeat(wrappers_dir, _DEAD_PID, "uuid-o", now)
scanner = HeartbeatScanner(wrappers_dir)
assert scanner.heartbeat_idle_30min() is True
# ---------------------------------------------------------------- concurrency
def test_concurrent_scan_safe(wrappers_dir: Path) -> None:
"""A scan running concurrently with a writer must not raise.
Spawns a background writer that drops new heartbeat files in tight
succession while the main thread runs ``scan()`` repeatedly. The
contract is "no exception" final fresh count after the writer
finishes equals the number of files actually written.
"""
own_pid = os.getpid()
now = datetime.now(timezone.utc)
write_count = 50
written: list[Path] = []
errors: list[BaseException] = []
stop = threading.Event()
def writer() -> None:
try:
for i in range(write_count):
if stop.is_set():
return
p = _write_heartbeat(
wrappers_dir, own_pid, f"uuid-cc-{i}", now
)
written.append(p)
except BaseException as exc: # noqa: BLE001 — surface in test
errors.append(exc)
scanner = HeartbeatScanner(wrappers_dir)
t = threading.Thread(target=writer)
t.start()
try:
# Spin scans while the writer adds files. The race we are testing
# is "scanner glob includes a file that vanishes" or "writer
# half-writes JSON" — both must be tolerated silently.
for _ in range(20):
scanner.scan() # must not raise
time.sleep(0.001)
finally:
stop.set()
t.join(timeout=5)
assert errors == [], f"writer raised: {errors!r}"
final = scanner.scan()
assert len(final) == len(written), (
f"final scan count {len(final)} != written count {len(written)}"
)
assert all(e.status is HeartbeatStatus.FRESH for e in final)
# ---------------------------------------------------------------- corruption tolerance
def test_torn_write_falls_back_to_mtime(wrappers_dir: Path) -> None:
"""Half-written JSON falls back to filename + mtime parse.
Drops a file containing only the opening brace ``{`` (simulating a
crash mid-write). The scanner must still classify the file by its
filesystem mtime + filename PID rather than dropping the entry.
"""
path = wrappers_dir / f"heartbeat-{os.getpid()}-uuid-torn.json"
path.write_text("{") # invalid JSON
scanner = HeartbeatScanner(wrappers_dir)
entries = scanner.scan()
assert len(entries) == 1
# Mtime is "now" by default so this should be FRESH (alive PID).
assert entries[0].status is HeartbeatStatus.FRESH
assert entries[0].pid == os.getpid()

131
tests/test_hebbian.py Normal file
View file

@ -0,0 +1,131 @@
"""Tests for Hebbian reinforcement, L0 seed, profile knobs, consolidate stub."""
from __future__ import annotations
from uuid import UUID
from iai_mcp.core import DEFERRED_KNOBS, L0_ID, LIVE_KNOBS, _seed_l0_identity, dispatch
from iai_mcp.store import MemoryStore
from tests.test_store import _make
def test_reinforce_creates_pairwise_edges(tmp_path):
"""C(3,2) = 3 pairwise edges on three-way co-retrieval."""
store = MemoryStore(path=tmp_path)
recs = [_make() for _ in range(3)]
for r in recs:
store.insert(r)
ids = [str(r.id) for r in recs]
result = dispatch(store, "memory_reinforce", {"ids": ids})
assert result["edges_boosted"] == 3
def test_reinforce_twice_doubles_weight(tmp_path):
"""calling reinforce twice on same ids stacks the delta (0.1 + 0.1 = 0.2)."""
store = MemoryStore(path=tmp_path)
recs = [_make() for _ in range(2)]
for r in recs:
store.insert(r)
ids = [str(r.id) for r in recs]
dispatch(store, "memory_reinforce", {"ids": ids})
r2 = dispatch(store, "memory_reinforce", {"ids": ids})
assert len(r2["new_weights"]) == 1
key = next(iter(r2["new_weights"]))
assert abs(r2["new_weights"][key] - 0.2) < 1e-5
def test_l0_identity_seeded(tmp_path):
"""D-14 + pinned L0 record exists with immutability flags."""
store = MemoryStore(path=tmp_path)
_seed_l0_identity(store)
l0 = store.get(L0_ID)
assert l0 is not None
assert l0.pinned is True
assert l0.never_decay is True
assert l0.never_merge is True
assert l0.detail_level == 5
assert l0.tier == "semantic"
assert "IAI-MCP" in l0.literal_surface
def test_l0_seed_is_idempotent(tmp_path):
"""Multiple boots of the core must not duplicate the L0 record."""
store = MemoryStore(path=tmp_path)
_seed_l0_identity(store)
_seed_l0_identity(store)
_seed_l0_identity(store)
all_records = store.all_records()
l0_count = sum(1 for r in all_records if r.id == L0_ID)
assert l0_count == 1
def test_profile_get_returns_live_knobs(tmp_path):
"""15 live (14 autistic-kernel + wake_depth MCP-12) + 0 deferred."""
store = MemoryStore(path=tmp_path)
result = dispatch(store, "profile_get", {})
assert result["live"]["literal_preservation"] == "strong" # AUTIST-04
assert result["live"]["masking_off"] is True # AUTIST-06
assert result["live"]["task_support"] == "cued_recognition" # AUTIST-07
assert result["live"]["scene_construction_scaffold"] is True # AUTIST-14
assert result["live"]["wake_depth"] == "minimal" # MCP-12
# Plan 07.12-02: 10 autistic-kernel + wake_depth = 11 live (AUTIST-02/08/11/12 removed).
assert len(result["live"]) == 11
assert len(result["deferred"]) == 0
def test_profile_get_specific_live_knob(tmp_path):
store = MemoryStore(path=tmp_path)
result = dispatch(store, "profile_get", {"knob": "literal_preservation"})
assert result["knob"] == "literal_preservation"
assert result["value"] == "strong"
def test_profile_get_camouflaging_now_live_after_autist13_flip(tmp_path):
"""AUTIST-13 camouflaging_relaxation is live; profile_get returns value."""
# Reset per-process state in case earlier tests (e.g. relax_register) moved the knob.
import iai_mcp.core as core
core._profile_state["camouflaging_relaxation"] = 0.0
store = MemoryStore(path=tmp_path)
result = dispatch(store, "profile_get", {"knob": "camouflaging_relaxation"})
assert result["knob"] == "camouflaging_relaxation"
assert result["value"] == 0.0 # D-AUTIST13 default
def test_profile_set_camouflaging_relaxation_now_succeeds(tmp_path):
"""camouflaging_relaxation is live; profile_set accepts in-range float."""
store = MemoryStore(path=tmp_path)
result = dispatch(store, "profile_set", {"knob": "camouflaging_relaxation", "value": 0.3})
assert result["status"] == "ok"
# Reset for other tests
dispatch(store, "profile_set", {"knob": "camouflaging_relaxation", "value": 0.0})
def test_profile_set_live_knob_succeeds(tmp_path):
"""live knob accepts valid enum values ("loose" is in the schema)."""
store = MemoryStore(path=tmp_path)
# Reset default before test to avoid test ordering issues
LIVE_KNOBS["literal_preservation"] = "strong"
# Plan 03 introduced schema validation (enum:strong|medium|loose).
# Plan 01 accepted any value; now we use a valid enum entry.
result = dispatch(store, "profile_set", {"knob": "literal_preservation", "value": "loose"})
assert result["status"] == "ok"
assert LIVE_KNOBS["literal_preservation"] == "loose"
# Restore so other tests aren't affected
LIVE_KNOBS["literal_preservation"] = "strong"
def test_memory_consolidate_real(tmp_path):
"""Plan 02-02 memory_consolidate now runs real heavy consolidation.
The stub returned {"status": "queued", "phase": "placeholder"};
replaces that with actual sleep-cycle output:
{"mode": "heavy", "tier": "tier0"|"tier1", "summaries_created": int,
"decay_result": {...}, "schema_candidates": [...]}.
"""
store = MemoryStore(path=tmp_path)
result = dispatch(store, "memory_consolidate", {})
assert result["mode"] == "heavy"
assert result["tier"] in ("tier0", "tier1")
assert "summaries_created" in result
assert "decay_result" in result
assert "schema_candidates" in result

View file

@ -0,0 +1,391 @@
"""Phase 7.4 — Hebbian write-batching coverage.
Eight sync tests (project does NOT use pytest-asyncio):
R1 / A2 `test_boost_edges_emits_at_most_two_versions`
R2 `test_boost_edges_scalar_delta_unchanged`
R2 `test_boost_edges_sequence_delta_per_pair`
R2 `test_boost_edges_sequence_delta_length_mismatch_raises`
A7 `test_boost_edges_coalesces_duplicate_pairs`
R3 site `test_sleep_consolidated_from_batches_into_two_versions`
R3 site `test_curiosity_bridge_batches_into_two_versions`
R3 site `test_schema_bind_batches_into_two_versions`
R3 site `test_pipeline_profile_modulates_batches_with_sequence_delta`
Eight tests minimum SPEC R4 asks for >= 5; this ships the full target from
CONTEXT D7.4-08.
"""
from __future__ import annotations
from uuid import uuid4
import pytest
from iai_mcp.store import EDGES_TABLE, MemoryStore
# ----------------------------------------------------------------- helpers
def _versions(store: MemoryStore) -> int:
"""Return the current LanceDB version count for the edges table."""
tbl = store.db.open_table(EDGES_TABLE)
return len(tbl.list_versions())
# ----------------------------------------------------------- R1 / A2 — versions
def test_boost_edges_emits_at_most_two_versions(tmp_path):
"""R1 + A2 acceptance: ONE call with 5 pairs (3 hits + 2 new) -> <= 2 new versions.
Today's pre-refactor body would emit 5 versions (1 per tbl.update / tbl.add).
The refactor consolidates to <= 2 (one merge_insert for the 3
updates, one tbl.add for the 2 new rows).
"""
store = MemoryStore(path=tmp_path)
a, b, c, d, e, f, g = (uuid4() for _ in range(7))
# Seed 3 edges via a single call (the seed itself produces ~1 version).
store.boost_edges([(a, b), (c, d), (e, f)], delta=0.1, edge_type="hebbian")
versions_before = _versions(store)
# 5-pair call: 3 hits (a,b), (c,d), (e,f) + 2 new (a,c), (f,g).
new = store.boost_edges(
[(a, b), (c, d), (e, f), (a, c), (f, g)],
delta=0.2,
edge_type="hebbian",
)
versions_after = _versions(store)
delta_versions = versions_after - versions_before
# Hard cap: <= 2 (one merge_insert for updates + one tbl.add for inserts).
assert delta_versions <= 2, (
f"boost_edges emitted {delta_versions} versions "
f"(expected <= 2 after batching)"
)
# Returned weights must be: 0.3 for the 3 pre-existing pairs (0.1 + 0.2)
# and 0.2 for the 2 new pairs (0 + 0.2). Keys are canonical-sorted.
assert len(new) == 5
for key, weight in new.items():
if {key[0], key[1]} in ({str(a), str(b)}, {str(c), str(d)}, {str(e), str(f)}):
assert abs(weight - 0.3) < 1e-5, f"{key} expected 0.3, got {weight}"
else:
assert abs(weight - 0.2) < 1e-5, f"{key} expected 0.2, got {weight}"
# ----------------------------------------------------------- R2 — scalar delta
def test_boost_edges_scalar_delta_unchanged(tmp_path):
"""R2 backwards-compat: scalar `delta=0.3` applies uniformly to all pairs."""
store = MemoryStore(path=tmp_path)
a, b, c, d = (uuid4() for _ in range(4))
new = store.boost_edges([(a, b), (c, d)], delta=0.3, edge_type="hebbian")
assert len(new) == 2
for weight in new.values():
assert abs(weight - 0.3) < 1e-5
# ----------------------------------------------------------- R2 — sequence delta
def test_boost_edges_sequence_delta_per_pair(tmp_path):
"""R2: `delta=[0.5, 0.7]` applies per-pair (in pair-list order)."""
store = MemoryStore(path=tmp_path)
a, b, c, d = (uuid4() for _ in range(4))
new = store.boost_edges(
[(a, b), (c, d)],
delta=[0.5, 0.7],
edge_type="hebbian",
)
assert len(new) == 2
# Map back from canonical-sorted key to original pair to assert per-pair delta.
key_ab = tuple(sorted([str(a), str(b)]))
key_cd = tuple(sorted([str(c), str(d)]))
assert abs(new[key_ab] - 0.5) < 1e-5
assert abs(new[key_cd] - 0.7) < 1e-5
def test_boost_edges_sequence_delta_length_mismatch_raises(tmp_path):
"""R2: Sequence-delta with len(deltas) != len(pairs) -> ValueError."""
store = MemoryStore(path=tmp_path)
a, b, c, d = (uuid4() for _ in range(4))
with pytest.raises(ValueError, match="deltas length"):
store.boost_edges(
[(a, b), (c, d)],
delta=[0.5, 0.7, 0.9], # 3 deltas for 2 pairs
edge_type="hebbian",
)
# ----------------------------------------------------------- A7 — coalesce
def test_boost_edges_coalesces_duplicate_pairs(tmp_path):
"""A7: `[(a,b), (a,b)]` with delta=0.1 produces `cur + 0.2`, NOT `cur + 0.1`.
The legacy implementation refreshed `existing = tbl.to_pandas()` after every
pair so duplicate canonical (src,dst) keys saw each other's delta. The
refactor preserves this semantic via in-memory coalescing BEFORE the write.
"""
store = MemoryStore(path=tmp_path)
a, b = uuid4(), uuid4()
# First seed one edge so `cur` is non-zero.
store.boost_edges([(a, b)], delta=0.1, edge_type="hebbian")
# Second call: SAME pair listed twice. Expect 0.1 (existing) + 0.2 (sum) = 0.3.
new = store.boost_edges([(a, b), (a, b)], delta=0.1, edge_type="hebbian")
assert len(new) == 1, "duplicate pair should collapse to ONE canonical key"
canonical = tuple(sorted([str(a), str(b)]))
assert abs(new[canonical] - 0.3) < 1e-5, (
f"coalesced delta should be cur + 2*delta = 0.3, got {new[canonical]}"
)
def test_boost_edges_coalesces_duplicate_pairs_first_call(tmp_path):
"""A7 strengthen: even on a FRESH edge, `[(a,b), (a,b)]` with delta=0.1
should produce 0.2 (NOT 0.1) coalescing happens before write."""
store = MemoryStore(path=tmp_path)
a, b = uuid4(), uuid4()
new = store.boost_edges([(a, b), (a, b)], delta=0.1, edge_type="hebbian")
canonical = tuple(sorted([str(a), str(b)]))
assert abs(new[canonical] - 0.2) < 1e-5
# ----------------------------------------------------------- R3 — site-level
def test_sleep_consolidated_from_batches_into_two_versions(tmp_path):
"""R3 site-level: sleep._create_semantic_summary's per-source loop now
issues ONE boost_edges call (consolidated_from edges).
Asserts the summary's outgoing consolidated_from edges all exist with the
expected weight, AND the create-summary call did not balloon the edges.lance
version count by N (one per source) only by <= 2 (one tbl.add for the new
rows; merge_insert path empty since these are fresh edges).
"""
from iai_mcp.sleep import _create_semantic_summary
from tests.test_store import _make
store = MemoryStore(path=tmp_path)
# Seed 5 source records into a "cluster".
cluster = [_make(text=f"source memory {i}") for i in range(5)]
for r in cluster:
store.insert(r)
versions_before = _versions(store)
summary_id = _create_semantic_summary(
store,
cluster,
summary_text="cls summary of 5 source memories",
language="en",
)
versions_after = _versions(store)
delta_versions = versions_after - versions_before
# <= 2 covers the 1 add for new edges (5 fresh consolidated_from rows) PLUS
# any incidental merge_insert version when the merge_insert path is empty.
assert delta_versions <= 2, (
f"sleep.cls boost emitted {delta_versions} versions for 5 sources "
f"(expected <= 2 after Phase 7.4)"
)
tbl = store.db.open_table(EDGES_TABLE)
df = tbl.to_pandas()
summary_str = str(summary_id)
consolidated = df[
(df["src"].isin([summary_str, *[str(r.id) for r in cluster]]))
& (df["dst"].isin([summary_str, *[str(r.id) for r in cluster]]))
& (df["edge_type"] == "consolidated_from")
]
assert len(consolidated) == 5, (
f"expected 5 consolidated_from edges, got {len(consolidated)}"
)
# Every weight should equal delta=1.0 (the legacy per-iter scalar).
for w in consolidated["weight"]:
assert abs(float(w) - 1.0) < 1e-5
def test_curiosity_bridge_batches_into_two_versions(tmp_path):
"""R3 site-level: curiosity.fire's per-trigger loop now issues ONE
boost_edges call (curiosity_bridge edges)."""
from iai_mcp.curiosity import fire_curiosity
from tests.test_store import _make
store = MemoryStore(path=tmp_path)
# Seed 5 records that will become triggers (entropy must be high enough to
# surface a question — we drive it via direct call below).
triggers = [_make(text=f"ambiguous memory {i}") for i in range(5)]
for r in triggers:
store.insert(r)
# Build a fake hits structure compatible with fire_curiosity.
class _Hit:
def __init__(self, record_id):
self.record_id = record_id
self.score = 0.4
hits = [_Hit(r.id) for r in triggers]
versions_before = _versions(store)
# entropy=1.5 (above ENTROPY_HIGH default) -> tier="question" path,
# 5 trigger_ids, ONE batched boost_edges call after the refactor.
q = fire_curiosity(
store,
hits=hits,
cue="what was that thing",
entropy=1.5,
session_id="sess-curiosity",
turn=10,
)
versions_after = _versions(store)
assert q is not None, "high-entropy curiosity call should fire"
delta_versions = versions_after - versions_before
assert delta_versions <= 2, (
f"curiosity boost emitted {delta_versions} versions for 5 triggers "
f"(expected <= 2 after Phase 7.4)"
)
tbl = store.db.open_table(EDGES_TABLE)
df = tbl.to_pandas()
bridge = df[df["edge_type"] == "curiosity_bridge"]
assert len(bridge) == 5, (
f"expected 5 curiosity_bridge edges, got {len(bridge)}"
)
def test_schema_bind_batches_into_two_versions(tmp_path):
"""R3 site-level: schema.bind's per-evidence loop now issues ONE
boost_edges call (schema_instance_of edges)."""
from iai_mcp.schema import SchemaCandidate, persist_schema
from tests.test_store import _make
store = MemoryStore(path=tmp_path)
# Seed 5 evidence records.
evidence = [_make(text=f"evidence {i}") for i in range(5)]
for r in evidence:
store.insert(r)
# Pattern is unique to this test so the dedup branch in persist_schema
# does NOT short-circuit (we want the new-schema insert path that contains
# the line-374 for-loop -> batched call).
candidate = SchemaCandidate(
pattern="phase74_test_pattern_unique",
confidence=0.7,
evidence_ids=[r.id for r in evidence],
evidence_count=5,
status="auto",
)
versions_before = _versions(store)
schema_id = persist_schema(store, candidate)
versions_after = _versions(store)
assert schema_id is not None
delta_versions = versions_after - versions_before
# `induce` emits both schema_instance_of edges (this plan's batched call)
# AND the schema record's own row insert (records.lance, not edges.lance —
# so it doesn't hit our edges-version count). <= 2 covers the merge_insert
# + tbl.add for 5 fresh schema_instance_of edges.
assert delta_versions <= 2, (
f"schema.bind boost emitted {delta_versions} versions for 5 evidence "
f"(expected <= 2 after Phase 7.4)"
)
tbl = store.db.open_table(EDGES_TABLE)
df = tbl.to_pandas()
instance_edges = df[df["edge_type"] == "schema_instance_of"]
assert len(instance_edges) == 5, (
f"expected 5 schema_instance_of edges, got {len(instance_edges)}"
)
def test_pipeline_profile_modulates_batches_with_sequence_delta(tmp_path):
"""R3 site-level: pipeline.recall_hook's per-hit profile_modulates loop
now issues ONE boost_edges call with `delta=deltas` Sequence (per-hit
varying gain).
This directly exercises the loop body that was changed in pipeline.py:924.
We unit-test the gather-then-batch pattern by simulating the hits + gains
structure and asserting:
1. ONE boost_edges call produces edges for all hits with non-empty gains.
2. Hits with empty gains are skipped (preserves the existing fallback).
3. Hits with total_gain<=0 fall back to delta=1.0 (preserves fallback).
4. <= 2 versions per call regardless of hit count.
"""
from iai_mcp.pipeline import PROFILE_SENTINEL_UUID
store = MemoryStore(path=tmp_path)
# 5 record ids; we treat them as h.record_id values.
record_ids = [uuid4() for _ in range(5)]
# Per-hit gains: gain values mirror what profile_modulation_gain dict gives.
gains_per_hit = [
{"profile_match_strong": 0.4, "language_match": 0.1}, # total = 0.5
{}, # skipped (empty)
{"profile_match_weak": 0.2}, # total = 0.2
{"profile_match_neg": -0.5, "language_match": 0.1}, # total = -0.4 -> 1.0
{"profile_match_strong": 0.7}, # total = 0.7
]
# Replicate the gather-then-batch pattern from pipeline.py:924 in a
# contained form so the test is independent of the full recall plumbing.
pairs: list[tuple] = []
deltas: list[float] = []
for rid, gains in zip(record_ids, gains_per_hit):
if not gains:
continue
total_gain = float(sum(gains.values()))
if total_gain <= 0:
total_gain = 1.0
pairs.append((rid, PROFILE_SENTINEL_UUID))
deltas.append(total_gain)
assert len(pairs) == 4, "4 hits should produce edges (1 skipped for empty gains)"
assert len(deltas) == 4
versions_before = _versions(store)
new = store.boost_edges(
pairs,
delta=deltas,
edge_type="profile_modulates",
)
versions_after = _versions(store)
delta_versions = versions_after - versions_before
assert delta_versions <= 2, (
f"profile_modulates boost emitted {delta_versions} versions "
f"(expected <= 2 after Phase 7.4)"
)
# 4 edges created, each with the per-hit delta.
assert len(new) == 4
expected_per_pair = {
tuple(sorted([str(record_ids[0]), str(PROFILE_SENTINEL_UUID)])): 0.5,
tuple(sorted([str(record_ids[2]), str(PROFILE_SENTINEL_UUID)])): 0.2,
tuple(sorted([str(record_ids[3]), str(PROFILE_SENTINEL_UUID)])): 1.0,
tuple(sorted([str(record_ids[4]), str(PROFILE_SENTINEL_UUID)])): 0.7,
}
for key, exp in expected_per_pair.items():
assert key in new, f"missing edge for {key}"
assert abs(new[key] - exp) < 1e-5, (
f"{key} expected {exp}, got {new[key]}"
)

230
tests/test_hebbian_ltp.py Normal file
View file

@ -0,0 +1,230 @@
"""Tests for 02-REVIEW.md H-03 (CLS heavy cycle missing Hebbian LTP).
Bug: run_heavy_consolidation creates `consolidated_from` edges for cluster
members (LTD-side write) but does NOT strengthen existing hebbian edges
between co-retrieved cluster members (LTP). The spec requires both
sides -- frequently-traversed edges strengthen; old rarely-traversed fade.
Pre-fix, the only LTP source was store.boost_edges inside pipeline_recall,
which fires on explicit user retrieval, never during offline consolidation.
Fix:
- Add module constant HEAVY_LTP_DELTA = 0.05 in sleep.py.
- In run_heavy_consolidation, after _create_semantic_summary runs for a
cluster, call store.boost_edges(combinations(cluster_ids, 2),
edge_type="hebbian", delta=HEAVY_LTP_DELTA) so existing hebbian edges
between co-cluster members are potentiated.
- Non-cluster edges remain untouched.
Constitutional contract (MEM-07 biological fidelity + symmetry):
Hebbian LTP/LTD symmetry is the core Hebbian-learning invariant. Without
LTP during consolidation the graph drifts monotonically weaker. Matches
Woz 2022 SRS reinforcement on co-retrieval.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import UUID, uuid4
import pytest
from iai_mcp.types import EMBED_DIM, MemoryRecord
# ---------------------------------------------------------------- helpers
def _record(
*,
text: str = "n",
language: str = "en",
) -> MemoryRecord:
now = datetime.now(timezone.utc)
return MemoryRecord(
id=uuid4(),
tier="episodic",
literal_surface=text,
aaak_index="",
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
community_id=None,
centrality=0.0,
detail_level=2,
pinned=False,
stability=0.5,
difficulty=0.3,
last_reviewed=None,
never_decay=False,
never_merge=False,
provenance=[],
created_at=now,
updated_at=now,
tags=[],
language=language,
)
def _hebbian_weight(store, a: UUID, b: UUID) -> float | None:
"""Look up the current hebbian edge weight for (a, b), canonicalised."""
from iai_mcp.store import EDGES_TABLE
key = sorted([str(a), str(b)])
df = store.db.open_table(EDGES_TABLE).to_pandas()
if df.empty:
return None
mask = (
(df["src"] == key[0])
& (df["dst"] == key[1])
& (df["edge_type"] == "hebbian")
)
if not mask.any():
return None
return float(df.loc[mask, "weight"].iloc[0])
# ==================================================== H-03: named constant
def test_heavy_ltp_delta_is_named_constant():
"""The LTP increment must be a module-scope constant (HEAVY_LTP_DELTA=0.05)
so maintainers can tune it without hunting for magic numbers, matching the
DECAY_BASE / DECAY_EPSILON pattern already used for the LTD side."""
from iai_mcp import sleep as sleep_mod
assert hasattr(sleep_mod, "HEAVY_LTP_DELTA"), (
"sleep.py must define HEAVY_LTP_DELTA at module scope"
)
assert sleep_mod.HEAVY_LTP_DELTA == pytest.approx(0.05, abs=1e-6), (
f"HEAVY_LTP_DELTA must equal 0.05, got {sleep_mod.HEAVY_LTP_DELTA}"
)
# ==================================================== H-03: LTP on cluster members
def test_heavy_cycle_strengthens_existing_hebbian_edges(tmp_path):
"""4-member cluster with pre-existing hebbian edges: after heavy
consolidation every pairwise edge weight increases by >= HEAVY_LTP_DELTA.
Pre-fix: weights stayed at 0.3 (decay-only behaviour).
Post-fix: weights >= 0.35 (every pair potentiated once by LTP).
"""
from iai_mcp.guard import BudgetLedger, RateLimitLedger
from iai_mcp.sleep import HEAVY_LTP_DELTA, SleepConfig, run_heavy_consolidation
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
# 4 records A B C D all cohesive
recs = [_record(text=f"fact_{i}") for i in range(4)]
for r in recs:
store.insert(r)
# Pre-seed pairwise hebbian edges at 0.3 each
ids = [r.id for r in recs]
pairs = [
(ids[i], ids[j])
for i in range(len(ids))
for j in range(i + 1, len(ids))
]
for a, b in pairs:
store.boost_edges([(a, b)], edge_type="hebbian", delta=0.3)
# Sanity: all 6 pairs at 0.3
for a, b in pairs:
w = _hebbian_weight(store, a, b)
assert w == pytest.approx(0.3, abs=1e-3), (
f"pre-condition: {a}/{b} weight must be 0.3, got {w}"
)
# Run heavy consolidation, Tier-0 path
cfg = SleepConfig(llm_enabled=False)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
run_heavy_consolidation(
store,
session_id="ltp-test",
config=cfg,
budget=budget,
rate=rate,
has_api_key=False,
)
# Every pairwise edge weight must have grown by at least HEAVY_LTP_DELTA
for a, b in pairs:
w = _hebbian_weight(store, a, b)
assert w is not None, f"edge {a}/{b} must still exist"
assert w >= 0.3 + HEAVY_LTP_DELTA - 1e-3, (
f"hebbian edge {a}/{b} not potentiated: expected >= "
f"{0.3 + HEAVY_LTP_DELTA}, got {w}"
)
def test_heavy_cycle_does_not_touch_non_cluster_edges(tmp_path):
"""An edge between a cluster member and an unrelated record must NOT be
boosted by the heavy cycle LTP path. Only co-cluster edges receive the
potentiation."""
from iai_mcp.guard import BudgetLedger, RateLimitLedger
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
from iai_mcp.store import MemoryStore
store = MemoryStore(path=tmp_path)
# Cluster A B C (all 3 hebbian-linked)
cluster = [_record(text=f"c{i}") for i in range(3)]
for r in cluster:
store.insert(r)
cluster_ids = [r.id for r in cluster]
cluster_pairs = [
(cluster_ids[0], cluster_ids[1]),
(cluster_ids[1], cluster_ids[2]),
(cluster_ids[0], cluster_ids[2]),
]
for a, b in cluster_pairs:
store.boost_edges([(a, b)], edge_type="hebbian", delta=0.3)
# Extra record X with a hebbian edge to an UNRELATED record E
rec_x = _record(text="x")
rec_e = _record(text="e")
store.insert(rec_x)
store.insert(rec_e)
# Only X<->E, not connected to the cluster
store.boost_edges([(rec_x.id, rec_e.id)], edge_type="hebbian", delta=0.4)
x_e_before = _hebbian_weight(store, rec_x.id, rec_e.id)
assert x_e_before == pytest.approx(0.4, abs=1e-3)
# Run heavy
cfg = SleepConfig(llm_enabled=False)
budget = BudgetLedger(store)
rate = RateLimitLedger(store)
run_heavy_consolidation(
store,
session_id="ltp-isolate",
config=cfg,
budget=budget,
rate=rate,
has_api_key=False,
)
# X-E edge untouched because it is its own isolated 2-node component
# (below CLUSTER_MIN_SIZE=3), so no LTP fires on it.
x_e_after = _hebbian_weight(store, rec_x.id, rec_e.id)
assert x_e_after == pytest.approx(0.4, abs=1e-3), (
f"non-cluster edge must stay at 0.4, got {x_e_after}"
)
def test_heavy_cycle_boost_edges_uses_hebbian_type(tmp_path):
"""Structural check: run_heavy_consolidation source MUST call
boost_edges with edge_type='hebbian' (not consolidated_from). Prevents a
regression where someone 'fixes' this by just reusing the consolidated_from
write path."""
import inspect
from iai_mcp import sleep as sleep_mod
src = inspect.getsource(sleep_mod.run_heavy_consolidation)
assert "edge_type=\"hebbian\"" in src or "edge_type='hebbian'" in src, (
"run_heavy_consolidation must boost hebbian edges (LTP), not only "
"create consolidated_from edges"
)
assert "HEAVY_LTP_DELTA" in src, (
"run_heavy_consolidation must use the named HEAVY_LTP_DELTA constant"
)

Some files were not shown because too many files have changed in this diff Show more