Initial release: iai-mcp v0.1.0
Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
This commit is contained in:
commit
f6b876fbe7
332 changed files with 97258 additions and 0 deletions
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
41
tests/conftest.py
Normal file
41
tests/conftest.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
"""Project-wide pytest fixtures for the IAI-MCP test suite.
|
||||
|
||||
Phase 07.10 (file-based crypto key migration) removed the keyring backend
|
||||
from `iai_mcp.crypto.CryptoKey.get_or_create()`. Pre-existing tests that
|
||||
exercised the daemon, store, events, recall, and CLI paths relied on the
|
||||
keyring auto-fallback to source the encryption key in test environments.
|
||||
After Phase 07.10, the runtime path is **file → passphrase env → error**
|
||||
with no keyring fallback, so those tests now hit `CryptoKeyError` unless
|
||||
either the file or the passphrase is set.
|
||||
|
||||
This module's autouse fixture sets `IAI_MCP_CRYPTO_PASSPHRASE` to a fixed
|
||||
test passphrase for every test session, restoring the deterministic
|
||||
`derive_key_from_passphrase(...)` path that the test suite expects.
|
||||
Production behavior is unaffected — the production daemon never sets
|
||||
this env var and instead reads the 32-byte file at `{IAI_MCP_STORE}/.crypto.key`
|
||||
written by `iai-mcp crypto migrate-to-file` or `iai-mcp crypto init`.
|
||||
|
||||
The dedicated file-backend tests in `tests/test_crypto_file_backend.py`
|
||||
override this fixture per-test by clearing the env var or by writing an
|
||||
explicit `.crypto.key` file in their `tmp_path` fixtures.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
_TEST_PASSPHRASE = "iai-mcp-test-passphrase-2026-04-30-phase-07.10"
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _crypto_passphrase_env(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Set IAI_MCP_CRYPTO_PASSPHRASE for every test unless already set.
|
||||
|
||||
Tests that need to assert the absent-passphrase / missing-key error
|
||||
path can still call `monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE",
|
||||
raising=False)` inside the test body to override this default.
|
||||
"""
|
||||
if "IAI_MCP_CRYPTO_PASSPHRASE" not in os.environ:
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", _TEST_PASSPHRASE)
|
||||
10
tests/fixtures/bedtime/ar.txt
vendored
Normal file
10
tests/fixtures/bedtime/ar.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
تصبح على خير yes
|
||||
ليلة سعيدة yes
|
||||
أنا متعب سأنام yes
|
||||
إلى الغد yes
|
||||
وقت النوم yes
|
||||
قال البطل وداعا ومضى no
|
||||
كلمة السلام في الأدب العربي no
|
||||
أكمل العمل على المشروع no
|
||||
اريد قهوة الآن no
|
||||
موضوع مهم للبحث no
|
||||
10
tests/fixtures/bedtime/de.txt
vendored
Normal file
10
tests/fixtures/bedtime/de.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
gute Nacht yes
|
||||
ich gehe jetzt ins Bett yes
|
||||
ich bin müde yes
|
||||
bis morgen yes
|
||||
Schlafenszeit yes
|
||||
der Bösewicht sagte auf Wiedersehen und verschwand no
|
||||
das Wort Kaffee kommt aus dem Arabischen no
|
||||
dieser Code ist sehr kompliziert no
|
||||
ein Filmtitel wäre gut no
|
||||
Kinder brauchen feste Routinen no
|
||||
10
tests/fixtures/bedtime/en.txt
vendored
Normal file
10
tests/fixtures/bedtime/en.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
good night yes
|
||||
I'm heading to bed yes
|
||||
I'm really tired, going to sleep yes
|
||||
catch you tomorrow yes
|
||||
it's bedtime yes
|
||||
I need to finish this code before the deadline no
|
||||
this function returns a list of users no
|
||||
the phrase means something specific in context no
|
||||
let's review the codebase together no
|
||||
stories are a genre of children's literature no
|
||||
10
tests/fixtures/bedtime/es.txt
vendored
Normal file
10
tests/fixtures/bedtime/es.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
buenas noches yes
|
||||
me voy a dormir yes
|
||||
estoy cansado yes
|
||||
hasta mañana yes
|
||||
hora de dormir yes
|
||||
el villano dijo adiós y se fue no
|
||||
la frase café con leche en español no
|
||||
este código es muy complicado no
|
||||
un título de película interesante no
|
||||
los niños necesitan rutinas no
|
||||
10
tests/fixtures/bedtime/fr.txt
vendored
Normal file
10
tests/fixtures/bedtime/fr.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
bonne nuit yes
|
||||
je vais me coucher yes
|
||||
je suis fatigué yes
|
||||
à demain yes
|
||||
il est l'heure de dormir yes
|
||||
le héros dit au revoir et partit no
|
||||
l'expression café au lait en français no
|
||||
ce code est très compliqué no
|
||||
un titre de film intéressant no
|
||||
les enfants aiment les histoires no
|
||||
10
tests/fixtures/bedtime/ja.txt
vendored
Normal file
10
tests/fixtures/bedtime/ja.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
おやすみ yes
|
||||
おやすみなさい yes
|
||||
寝ます yes
|
||||
また明日 yes
|
||||
疲れた yes
|
||||
小説のキャラはさよならと言って去った no
|
||||
動詞の活用を教えて no
|
||||
今日はバグを直す no
|
||||
映画の話をしよう no
|
||||
キャラクターのアニメを見る no
|
||||
10
tests/fixtures/bedtime/ru.txt
vendored
Normal file
10
tests/fixtures/bedtime/ru.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
спокойной ночи yes
|
||||
пойду спать yes
|
||||
я устал, ложусь yes
|
||||
до завтра yes
|
||||
пора ложиться yes
|
||||
нужно успеть до полуночи no
|
||||
код работает правильно no
|
||||
эта фраза означает нечто конкретное no
|
||||
слишком много багов в проекте no
|
||||
утром встречаемся в офисе no
|
||||
10
tests/fixtures/bedtime/zh.txt
vendored
Normal file
10
tests/fixtures/bedtime/zh.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
晚安 yes
|
||||
我要睡觉 yes
|
||||
累了 yes
|
||||
明天见 yes
|
||||
该睡觉了 yes
|
||||
反派说了再见然后离开 no
|
||||
这个词的起源很有趣 no
|
||||
这段代码非常复杂 no
|
||||
一部有意思的电影 no
|
||||
孩子需要固定的日常 no
|
||||
53
tests/fixtures/formality_ru_en_50pairs.json
vendored
Normal file
53
tests/fixtures/formality_ru_en_50pairs.json
vendored
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
[
|
||||
{"id": "en-01", "lang": "en", "formal": "The proposal is, therefore, accepted.", "informal": "yeah ok the proposal works"},
|
||||
{"id": "en-02", "lang": "en", "formal": "I would like to inform you that the deadline has been extended; however, the scope remains unchanged.", "informal": "fyi deadline pushed but scope same"},
|
||||
{"id": "en-03", "lang": "en", "formal": "Accordingly, we shall proceed with the implementation phase once the review is complete.", "informal": "cool, once review wraps we start building"},
|
||||
{"id": "en-04", "lang": "en", "formal": "It appears that the hypothesis is partially supported; nonetheless, further evidence is required.", "informal": "looks kinda right but we need more data"},
|
||||
{"id": "en-05", "lang": "en", "formal": "The committee has concluded its deliberations and shall publish the findings forthwith.", "informal": "board's done talking, they'll post results soon"},
|
||||
{"id": "en-06", "lang": "en", "formal": "Furthermore, the aforementioned constraints must be addressed prior to deployment.", "informal": "also we gotta fix those limits before shipping"},
|
||||
{"id": "en-07", "lang": "en", "formal": "The analysis demonstrates a statistically significant correlation; consequently, the null hypothesis is rejected.", "informal": "numbers line up so the original guess was wrong"},
|
||||
{"id": "en-08", "lang": "en", "formal": "I regret to inform you that, accordingly, the application has not been successful on this occasion.", "informal": "sorry bud u didnt get it this time"},
|
||||
{"id": "en-09", "lang": "en", "formal": "Please find attached the quarterly report; kindly review at your earliest convenience.", "informal": "attached the q-report, take a look when u can"},
|
||||
{"id": "en-10", "lang": "en", "formal": "The revised protocol mandates that all submissions be validated by two independent reviewers.", "informal": "new rule: two people gotta check every submission"},
|
||||
{"id": "en-11", "lang": "en", "formal": "Thus, the empirical evidence substantiates the theoretical framework proposed in the preceding section.", "informal": "so the data backs up the theory from earlier"},
|
||||
{"id": "en-12", "lang": "en", "formal": "The methodology, though unconventional, yielded results that were seemingly consistent with prior studies.", "informal": "weird method but results kinda matched other studies"},
|
||||
{"id": "en-13", "lang": "en", "formal": "We hereby confirm receipt of your correspondence dated the 14th instant.", "informal": "got your email from the 14th"},
|
||||
{"id": "en-14", "lang": "en", "formal": "The remuneration package shall be commensurate with experience and qualifications.", "informal": "pay depends on what u bring to the table"},
|
||||
{"id": "en-15", "lang": "en", "formal": "Hence, it is imperative that the stakeholders convene to resolve the outstanding issues.", "informal": "so yeah the team needs to meet and sort stuff out"},
|
||||
{"id": "en-16", "lang": "en", "formal": "The preliminary results indicate that the intervention may possibly reduce latency by approximately 12%.", "informal": "early numbers say it might cut latency ~12%"},
|
||||
{"id": "en-17", "lang": "en", "formal": "Consequent upon the aforesaid, we shall require an amendment to the existing agreement.", "informal": "because of all that, contract needs updating"},
|
||||
{"id": "en-18", "lang": "en", "formal": "It is with profound regret that we announce the cessation of operations at the eastern facility.", "informal": "sadly we're shutting down the east site"},
|
||||
{"id": "en-19", "lang": "en", "formal": "Pursuant to section 4.2, any deviations must be reported to the compliance officer.", "informal": "per 4.2 just tell the compliance person if stuff changes"},
|
||||
{"id": "en-20", "lang": "en", "formal": "The assertion, while plausible, lacks the empirical rigor necessary for publication.", "informal": "sounds reasonable but not solid enough to publish"},
|
||||
{"id": "en-21", "lang": "en", "formal": "We hereby authorize the disbursement of funds in accordance with the attached schedule.", "informal": "k we're sending the money per that attached plan"},
|
||||
{"id": "en-22", "lang": "en", "formal": "The observed phenomena can perhaps be attributed to stochastic fluctuations in the input signal.", "informal": "prolly just noise in the input"},
|
||||
{"id": "en-23", "lang": "en", "formal": "Notwithstanding the aforementioned caveats, the framework remains broadly applicable.", "informal": "despite those issues the framework still works"},
|
||||
{"id": "en-24", "lang": "en", "formal": "I should be most grateful if you could furnish me with the relevant documentation by Friday.", "informal": "can u send me the docs by friday thx"},
|
||||
{"id": "en-25", "lang": "en", "formal": "The present manuscript explores the ramifications of the hypothesis in greater depth.", "informal": "this paper goes deeper into what the theory means"},
|
||||
{"id": "ru-01", "lang": "ru", "formal": "Следовательно, предложение принимается.", "informal": "ок, предложение норм"},
|
||||
{"id": "ru-02", "lang": "ru", "formal": "Тем не менее, результаты требуют дополнительной проверки.", "informal": "короче, надо ещё проверить результаты"},
|
||||
{"id": "ru-03", "lang": "ru", "formal": "Таким образом, проект переходит в завершающую стадию; однако сроки остаются без изменений.", "informal": "в общем проект на финишной, но сроки те же"},
|
||||
{"id": "ru-04", "lang": "ru", "formal": "Вследствие вышеизложенного, комиссия приняла решение отложить рассмотрение вопроса.", "informal": "из-за всего этого чуваки решили отложить вопрос"},
|
||||
{"id": "ru-05", "lang": "ru", "formal": "Настоящим уведомляем вас о продлении срока действия соглашения до 31 декабря.", "informal": "договор продлили до 31 декабря"},
|
||||
{"id": "ru-06", "lang": "ru", "formal": "Впрочем, предварительный анализ свидетельствует о наличии статистически значимой корреляции.", "informal": "короче, по первым цифрам связь есть"},
|
||||
{"id": "ru-07", "lang": "ru", "formal": "Приношу свои извинения за причинённые неудобства; будем признательны за ваше понимание.", "informal": "сори за неудобства, спс за понимание"},
|
||||
{"id": "ru-08", "lang": "ru", "formal": "Однако, представленные данные не позволяют сделать однозначного вывода.", "informal": "но из этих данных не понять однозначно"},
|
||||
{"id": "ru-09", "lang": "ru", "formal": "Просим Вас ознакомиться с прилагаемым документом и, при необходимости, внести поправки.", "informal": "глянь доку и поправь если что"},
|
||||
{"id": "ru-10", "lang": "ru", "formal": "Возможно, указанное расхождение объясняется особенностями выборки.", "informal": "может, это из-за выборки такая разница"},
|
||||
{"id": "ru-11", "lang": "ru", "formal": "В силу сложившихся обстоятельств, запланированное мероприятие переносится на неопределённый срок.", "informal": "из-за всего происходящего встречу откладываем хз на когда"},
|
||||
{"id": "ru-12", "lang": "ru", "formal": "Настоящее исследование посвящено анализу долговременных последствий применённого метода.", "informal": "эта работа про то что будет в долгую от такого метода"},
|
||||
{"id": "ru-13", "lang": "ru", "formal": "Согласно пункту 3.2, любые изменения должны согласовываться с руководителем проекта.", "informal": "по п.3.2 изменения сначала к руководителю"},
|
||||
{"id": "ru-14", "lang": "ru", "formal": "Вероятно, наблюдаемое явление обусловлено случайными флуктуациями входного сигнала.", "informal": "похоже это просто шум на входе"},
|
||||
{"id": "ru-15", "lang": "ru", "formal": "С глубоким прискорбием сообщаем о прекращении деятельности восточного филиала.", "informal": "грустно но мы закрываем восточный филиал"},
|
||||
{"id": "ru-16", "lang": "ru", "formal": "По-видимому, полученные результаты согласуются с ранее опубликованными данными.", "informal": "вроде цифры совпадают с тем что публиковали"},
|
||||
{"id": "ru-17", "lang": "ru", "formal": "Настоящим подтверждаем получение вашего письма от 14-го числа.", "informal": "письмо от 14-го получил"},
|
||||
{"id": "ru-18", "lang": "ru", "formal": "Размер вознаграждения определяется квалификацией и опытом кандидата.", "informal": "сколько платят зависит от опыта и скилов"},
|
||||
{"id": "ru-19", "lang": "ru", "formal": "Следовательно, необходимо созвать совещание для разрешения возникших разногласий.", "informal": "короче надо созвать встречу и разрулить"},
|
||||
{"id": "ru-20", "lang": "ru", "formal": "Предварительные данные свидетельствуют о возможном снижении задержки примерно на 12%.", "informal": "по первым цифрам задержка может упасть где-то на 12"},
|
||||
{"id": "ru-21", "lang": "ru", "formal": "В соответствии с вышеизложенным, требуется внесение изменений в действующий договор.", "informal": "из-за всего этого договор надо менять"},
|
||||
{"id": "ru-22", "lang": "ru", "formal": "Представленное утверждение, хотя и правдоподобно, не обладает достаточной эмпирической строгостью.", "informal": "звучит норм но научно не тянет"},
|
||||
{"id": "ru-23", "lang": "ru", "formal": "Настоящим уполномочиваем произвести выплату средств согласно приложенному графику.", "informal": "ок выплачиваем по графику"},
|
||||
{"id": "ru-24", "lang": "ru", "formal": "Был бы признателен, если бы вы предоставили соответствующую документацию к пятнице.", "informal": "скинь доки до пятницы плиз"},
|
||||
{"id": "ru-25", "lang": "ru", "formal": "Данная работа рассматривает последствия гипотезы в более глубоком аспекте.", "informal": "эта статья копает глубже в последствия идеи"},
|
||||
{"id": "en-26", "lang": "en", "formal": "Moreover, the institution maintains that all submissions shall adhere strictly to the prescribed format.", "informal": "plus, stuff has to follow the format they gave u"}
|
||||
]
|
||||
162
tests/shell/test_launchd_install.sh
Executable file
162
tests/shell/test_launchd_install.sh
Executable file
|
|
@ -0,0 +1,162 @@
|
|||
#!/usr/bin/env bash
|
||||
# macOS launchd install/uninstall idempotency.
|
||||
#
|
||||
# Verifies:
|
||||
# - DAEMON-01: plist installed under ~/Library/LaunchAgents
|
||||
# - DAEMON-10: silent install (--yes bypasses consent banner)
|
||||
# - C4 invariant: uninstall removes plist + ~/.iai-mcp/.lock +
|
||||
# ~/.iai-mcp/.daemon.sock + ~/.iai-mcp/.daemon-state.json
|
||||
# - Idempotency: install twice / uninstall twice -> no error
|
||||
#
|
||||
# Skipped on non-macOS (returns 0). Linux equivalent lives in
|
||||
# tests/shell/test_systemd_install.sh.
|
||||
#
|
||||
# This script does NOT actually invoke launchctl in CI environments where it
|
||||
# would fail (GitHub Actions macos-latest runners have launchd but no UI
|
||||
# session for `gui/$UID` bootstrap to succeed). The CLI itself uses
|
||||
# `check=False` on launchctl so a non-zero return there does not abort the
|
||||
# install -- the plist file write + state file removal still happens.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "$(uname -s)" != "Darwin" ]]; then
|
||||
echo "SKIP: not macOS"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Resolve which Python + iai-mcp module to use. Prefer venv, else system.
|
||||
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
if [[ -x "$ROOT/.venv/bin/python" ]]; then
|
||||
PY="$ROOT/.venv/bin/python"
|
||||
else
|
||||
PY="${PYTHON:-python3}"
|
||||
fi
|
||||
CLI=( "$PY" -m iai_mcp.cli )
|
||||
|
||||
PLIST="$HOME/Library/LaunchAgents/com.iai-mcp.daemon.plist"
|
||||
STATE_DIR="$HOME/.iai-mcp"
|
||||
LOCK="$STATE_DIR/.lock"
|
||||
SOCK="$STATE_DIR/.daemon.sock"
|
||||
STATE="$STATE_DIR/.daemon-state.json"
|
||||
|
||||
# Snapshot pre-existing state so cleanup restores real user data.
|
||||
# Backup directory in /tmp scoped to this run.
|
||||
BACKUP_DIR="$(mktemp -d -t iai-mcp-shtest-XXXXXX)"
|
||||
PRE_EXISTING_PLIST=0
|
||||
PRE_EXISTING_LOCK=0
|
||||
PRE_EXISTING_SOCK=0
|
||||
PRE_EXISTING_STATE=0
|
||||
if [[ -f "$PLIST" ]]; then
|
||||
PRE_EXISTING_PLIST=1
|
||||
cp "$PLIST" "$BACKUP_DIR/plist.bak"
|
||||
fi
|
||||
if [[ -f "$LOCK" ]]; then
|
||||
PRE_EXISTING_LOCK=1
|
||||
cp "$LOCK" "$BACKUP_DIR/lock.bak"
|
||||
fi
|
||||
if [[ -f "$SOCK" ]]; then
|
||||
PRE_EXISTING_SOCK=1
|
||||
cp "$SOCK" "$BACKUP_DIR/sock.bak" 2>/dev/null || true
|
||||
fi
|
||||
if [[ -f "$STATE" ]]; then
|
||||
PRE_EXISTING_STATE=1
|
||||
cp "$STATE" "$BACKUP_DIR/state.bak"
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
# Always restore the user's pre-existing state, even if the test failed.
|
||||
"${CLI[@]}" daemon uninstall --yes >/dev/null 2>&1 || true
|
||||
if [[ "$PRE_EXISTING_PLIST" == "1" ]]; then
|
||||
mkdir -p "$(dirname "$PLIST")"
|
||||
cp "$BACKUP_DIR/plist.bak" "$PLIST"
|
||||
fi
|
||||
mkdir -p "$STATE_DIR"
|
||||
if [[ "$PRE_EXISTING_LOCK" == "1" ]]; then
|
||||
cp "$BACKUP_DIR/lock.bak" "$LOCK"
|
||||
fi
|
||||
if [[ "$PRE_EXISTING_SOCK" == "1" && -f "$BACKUP_DIR/sock.bak" ]]; then
|
||||
cp "$BACKUP_DIR/sock.bak" "$SOCK" 2>/dev/null || true
|
||||
fi
|
||||
if [[ "$PRE_EXISTING_STATE" == "1" ]]; then
|
||||
cp "$BACKUP_DIR/state.bak" "$STATE"
|
||||
fi
|
||||
rm -rf "$BACKUP_DIR"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# If the user already has a real plist installed, refuse to run -- this
|
||||
# script would clobber their service state (separate from file restore).
|
||||
if [[ "$PRE_EXISTING_PLIST" == "1" ]]; then
|
||||
echo "SKIP: existing plist at $PLIST -- not clobbering user data"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "[1/6] First install (--yes bypasses consent banner)..."
|
||||
"${CLI[@]}" daemon install --yes
|
||||
if [[ ! -f "$PLIST" ]]; then
|
||||
echo "FAIL: plist not created at $PLIST"
|
||||
exit 1
|
||||
fi
|
||||
# Pitfall 5 sanity: rendered plist has absolute python path, not /usr/local/bin/python3
|
||||
if ! grep -q "$PY" "$PLIST"; then
|
||||
echo "FAIL: plist does not contain absolute sys.executable ($PY)"
|
||||
cat "$PLIST"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[2/6] Second install -- must be idempotent..."
|
||||
if ! "${CLI[@]}" daemon install --yes; then
|
||||
echo "FAIL: install #2 returned non-zero"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f "$PLIST" ]]; then
|
||||
echo "FAIL: plist missing after install #2"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Seed state files so we can verify C4 cleanup actually removes them.
|
||||
mkdir -p "$STATE_DIR"
|
||||
touch "$LOCK" "$SOCK"
|
||||
echo "{}" > "$STATE"
|
||||
|
||||
echo "[3/6] First uninstall (C4: remove plist + 3 state files)..."
|
||||
"${CLI[@]}" daemon uninstall --yes
|
||||
if [[ -f "$PLIST" ]]; then
|
||||
echo "FAIL: plist not removed"
|
||||
exit 1
|
||||
fi
|
||||
# C4 invariant: lock + sock + state file all gone
|
||||
if [[ -f "$LOCK" ]]; then
|
||||
echo "FAIL: lock file not removed (C4 violation)"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -f "$SOCK" ]]; then
|
||||
echo "FAIL: socket file not removed (C4 violation)"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -f "$STATE" ]]; then
|
||||
echo "FAIL: state file not removed (C4 violation)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[4/6] Second uninstall -- must be idempotent (no error on missing files)..."
|
||||
if ! "${CLI[@]}" daemon uninstall --yes; then
|
||||
echo "FAIL: uninstall #2 returned non-zero"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[5/6] Cross-platform: dry-run install on macOS prints plist..."
|
||||
if ! "${CLI[@]}" daemon install --dry-run --yes | grep -q "com.iai-mcp.daemon"; then
|
||||
echo "FAIL: dry-run did not print plist content"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[6/6] Cross-platform: dry-run does NOT write plist..."
|
||||
"${CLI[@]}" daemon install --dry-run --yes >/dev/null
|
||||
if [[ -f "$PLIST" ]]; then
|
||||
echo "FAIL: dry-run wrote $PLIST -- it must be a no-write preview"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: launchd install/uninstall idempotency + C4 + Pitfall 5"
|
||||
exit 0
|
||||
163
tests/shell/test_systemd_install.sh
Executable file
163
tests/shell/test_systemd_install.sh
Executable file
|
|
@ -0,0 +1,163 @@
|
|||
#!/usr/bin/env bash
|
||||
# Linux systemd install/uninstall idempotency.
|
||||
#
|
||||
# Verifies:
|
||||
# - DAEMON-01: unit installed under ~/.config/systemd/user
|
||||
# - DAEMON-10: silent install (--yes bypasses consent banner)
|
||||
# - C4 invariant: uninstall removes unit + ~/.iai-mcp/.lock +
|
||||
# ~/.iai-mcp/.daemon.sock + ~/.iai-mcp/.daemon-state.json
|
||||
# - Idempotency: install twice / uninstall twice -> no error
|
||||
#
|
||||
# Skipped on non-Linux (returns 0). macOS equivalent lives in
|
||||
# tests/shell/test_launchd_install.sh.
|
||||
#
|
||||
# Skipped if systemctl --user is not usable (headless CI without an active
|
||||
# user-systemd session, e.g. GitHub Actions ubuntu-latest by default).
|
||||
# DAEMON-12 cross-platform parity is enforced by CI matrix; this script is
|
||||
# a smoke test that runs FULL flow when a user session exists.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [[ "$(uname -s)" != "Linux" ]]; then
|
||||
echo "SKIP: not Linux"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Skip on CI without user systemd session.
|
||||
if ! systemctl --user status >/dev/null 2>&1; then
|
||||
echo "SKIP: no user systemd session available (expected on headless CI without loginctl enable-linger)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
if [[ -x "$ROOT/.venv/bin/python" ]]; then
|
||||
PY="$ROOT/.venv/bin/python"
|
||||
else
|
||||
PY="${PYTHON:-python3}"
|
||||
fi
|
||||
CLI=( "$PY" -m iai_mcp.cli )
|
||||
|
||||
UNIT="$HOME/.config/systemd/user/iai-mcp-daemon.service"
|
||||
STATE_DIR="$HOME/.iai-mcp"
|
||||
LOCK="$STATE_DIR/.lock"
|
||||
SOCK="$STATE_DIR/.daemon.sock"
|
||||
STATE="$STATE_DIR/.daemon-state.json"
|
||||
|
||||
BACKUP_DIR="$(mktemp -d -t iai-mcp-shtest-XXXXXX)"
|
||||
PRE_EXISTING_UNIT=0
|
||||
PRE_EXISTING_LOCK=0
|
||||
PRE_EXISTING_SOCK=0
|
||||
PRE_EXISTING_STATE=0
|
||||
if [[ -f "$UNIT" ]]; then
|
||||
PRE_EXISTING_UNIT=1
|
||||
cp "$UNIT" "$BACKUP_DIR/unit.bak"
|
||||
fi
|
||||
if [[ -f "$LOCK" ]]; then
|
||||
PRE_EXISTING_LOCK=1
|
||||
cp "$LOCK" "$BACKUP_DIR/lock.bak"
|
||||
fi
|
||||
if [[ -f "$SOCK" ]]; then
|
||||
PRE_EXISTING_SOCK=1
|
||||
cp "$SOCK" "$BACKUP_DIR/sock.bak" 2>/dev/null || true
|
||||
fi
|
||||
if [[ -f "$STATE" ]]; then
|
||||
PRE_EXISTING_STATE=1
|
||||
cp "$STATE" "$BACKUP_DIR/state.bak"
|
||||
fi
|
||||
|
||||
cleanup() {
|
||||
"${CLI[@]}" daemon uninstall --yes >/dev/null 2>&1 || true
|
||||
if [[ "$PRE_EXISTING_UNIT" == "1" ]]; then
|
||||
mkdir -p "$(dirname "$UNIT")"
|
||||
cp "$BACKUP_DIR/unit.bak" "$UNIT"
|
||||
fi
|
||||
mkdir -p "$STATE_DIR"
|
||||
if [[ "$PRE_EXISTING_LOCK" == "1" ]]; then
|
||||
cp "$BACKUP_DIR/lock.bak" "$LOCK"
|
||||
fi
|
||||
if [[ "$PRE_EXISTING_SOCK" == "1" && -f "$BACKUP_DIR/sock.bak" ]]; then
|
||||
cp "$BACKUP_DIR/sock.bak" "$SOCK" 2>/dev/null || true
|
||||
fi
|
||||
if [[ "$PRE_EXISTING_STATE" == "1" ]]; then
|
||||
cp "$BACKUP_DIR/state.bak" "$STATE"
|
||||
fi
|
||||
rm -rf "$BACKUP_DIR"
|
||||
systemctl --user daemon-reload >/dev/null 2>&1 || true
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
if [[ "$PRE_EXISTING_UNIT" == "1" ]]; then
|
||||
echo "SKIP: existing unit at $UNIT -- not clobbering user data"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "[1/6] First install (--yes bypasses consent banner)..."
|
||||
"${CLI[@]}" daemon install --yes
|
||||
if [[ ! -f "$UNIT" ]]; then
|
||||
echo "FAIL: unit not created at $UNIT"
|
||||
exit 1
|
||||
fi
|
||||
# Pitfall 5 sanity: rendered unit has absolute python path
|
||||
if ! grep -q "$PY" "$UNIT"; then
|
||||
echo "FAIL: unit does not contain absolute sys.executable ($PY)"
|
||||
cat "$UNIT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[2/6] Verify systemctl shows the unit as enabled..."
|
||||
if ! systemctl --user is-enabled iai-mcp-daemon.service 2>/dev/null | grep -q enabled; then
|
||||
echo "WARN: unit not enabled (may be expected on minimal CI sessions)"
|
||||
fi
|
||||
|
||||
echo "[3/6] Second install -- must be idempotent..."
|
||||
if ! "${CLI[@]}" daemon install --yes; then
|
||||
echo "FAIL: install #2 returned non-zero"
|
||||
exit 1
|
||||
fi
|
||||
if [[ ! -f "$UNIT" ]]; then
|
||||
echo "FAIL: unit missing after install #2"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Seed state files so we can verify C4 cleanup actually removes them.
|
||||
mkdir -p "$STATE_DIR"
|
||||
touch "$LOCK" "$SOCK"
|
||||
echo "{}" > "$STATE"
|
||||
|
||||
echo "[4/6] First uninstall (C4: remove unit + 3 state files)..."
|
||||
"${CLI[@]}" daemon uninstall --yes
|
||||
if [[ -f "$UNIT" ]]; then
|
||||
echo "FAIL: unit not removed"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -f "$LOCK" ]]; then
|
||||
echo "FAIL: lock file not removed (C4 violation)"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -f "$SOCK" ]]; then
|
||||
echo "FAIL: socket file not removed (C4 violation)"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -f "$STATE" ]]; then
|
||||
echo "FAIL: state file not removed (C4 violation)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[5/6] Second uninstall -- must be idempotent..."
|
||||
if ! "${CLI[@]}" daemon uninstall --yes; then
|
||||
echo "FAIL: uninstall #2 returned non-zero"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[6/6] Dry-run on Linux prints unit content + does NOT write..."
|
||||
"${CLI[@]}" daemon install --dry-run --yes | grep -q "iai_mcp.daemon" || {
|
||||
echo "FAIL: dry-run did not print unit content"
|
||||
exit 1
|
||||
}
|
||||
if [[ -f "$UNIT" ]]; then
|
||||
echo "FAIL: dry-run wrote $UNIT -- it must be a no-write preview"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: systemd install/uninstall idempotency + C4 + Pitfall 5"
|
||||
exit 0
|
||||
189
tests/test_aaak.py
Normal file
189
tests/test_aaak.py
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
"""Tests for the AAAK index generator + English-raw enforcement (D-08, TOK-10).
|
||||
|
||||
D-08 constitutional rule:
|
||||
- Storage is RAW VERBATIM English always.
|
||||
- AAAK is a RETRIEVAL VIEW only: wing/room/entities/tags metadata string.
|
||||
- The index MUST NOT contain literal_surface content.
|
||||
|
||||
TOK-10:
|
||||
- Non-English literal_surface must be flagged with a `raw:<lang>` tag; unflagged
|
||||
non-English content raises ValueError at write time via enforce_english_raw.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.aaak import (
|
||||
enforce_english_raw,
|
||||
generate_aaak_index,
|
||||
parse_aaak_index,
|
||||
)
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def _make(
|
||||
tier: str = "episodic",
|
||||
text: str = "hello world",
|
||||
tags: list[str] | None = None,
|
||||
community_id: UUID | None = None,
|
||||
language: str = "en",
|
||||
) -> MemoryRecord:
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier=tier,
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[0.1] * EMBED_DIM,
|
||||
community_id=community_id,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=datetime.now(timezone.utc),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
tags=list(tags) if tags else [],
|
||||
language=language,
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------ generate_aaak_index format
|
||||
|
||||
|
||||
def test_aaak_index_has_exactly_three_slashes():
|
||||
"""Format invariant: W:<>/R:<>/E:<>/T:<> -> 3 separators regardless of content."""
|
||||
r = _make()
|
||||
idx = generate_aaak_index(r)
|
||||
assert idx.count("/") == 3
|
||||
|
||||
|
||||
def test_aaak_index_starts_with_wing_marker():
|
||||
r = _make(tier="semantic")
|
||||
idx = generate_aaak_index(r)
|
||||
assert idx.startswith("W:S/")
|
||||
|
||||
|
||||
def test_aaak_index_has_four_key_value_segments():
|
||||
r = _make(tier="episodic", tags=["entity:Alice", "project", "raw:en"])
|
||||
idx = generate_aaak_index(r)
|
||||
parts = idx.split("/")
|
||||
assert len(parts) == 4
|
||||
assert parts[0].startswith("W:")
|
||||
assert parts[1].startswith("R:")
|
||||
assert parts[2].startswith("E:")
|
||||
assert parts[3].startswith("T:")
|
||||
|
||||
|
||||
def test_aaak_index_includes_entity_tag_stripped():
|
||||
r = _make(tags=["entity:Alice", "entity:IAI-MCP", "project"])
|
||||
idx = generate_aaak_index(r)
|
||||
# entity: prefix stripped; entities comma-joined
|
||||
assert "Alice" in idx.split("/E:")[1]
|
||||
assert "IAI-MCP" in idx.split("/E:")[1]
|
||||
|
||||
|
||||
def test_aaak_index_deterministic():
|
||||
"""Same record -> same index on repeat calls."""
|
||||
r = _make(tags=["entity:X", "flag"])
|
||||
assert generate_aaak_index(r) == generate_aaak_index(r)
|
||||
|
||||
|
||||
# -------------------------------------------------------------- no-leak
|
||||
|
||||
|
||||
def test_aaak_index_does_not_contain_literal_surface():
|
||||
"""Constitutional: literal_surface MUST NOT appear anywhere in the index."""
|
||||
verbatim = "Alice mentioned the SECRET_PASSWORD_ABC_XYZ on day 3"
|
||||
r = _make(text=verbatim, tags=["entity:Alice", "project"])
|
||||
idx = generate_aaak_index(r)
|
||||
assert verbatim not in idx
|
||||
assert "SECRET_PASSWORD_ABC_XYZ" not in idx
|
||||
|
||||
|
||||
def test_aaak_index_unknown_community_marker():
|
||||
"""community_id=None -> room becomes 'unknown'."""
|
||||
r = _make(community_id=None)
|
||||
idx = generate_aaak_index(r)
|
||||
assert "R:unknown" in idx
|
||||
|
||||
|
||||
def test_aaak_index_dash_when_no_entities():
|
||||
r = _make(tags=["project"])
|
||||
idx = generate_aaak_index(r)
|
||||
# No entity: tags -> E:-
|
||||
assert "/E:-/" in idx
|
||||
|
||||
|
||||
# -------------------------------------------------------- parse round-trip
|
||||
|
||||
|
||||
def test_parse_aaak_index_round_trips_entities_and_tags():
|
||||
"""parse(generate(r)) recovers the entity + tag lists."""
|
||||
r = _make(tier="semantic", tags=["entity:Alice", "entity:IAI", "project", "urgent"])
|
||||
idx = generate_aaak_index(r)
|
||||
parsed = parse_aaak_index(idx)
|
||||
assert parsed["wing"] == ["S"]
|
||||
assert parsed["entities"] == ["Alice", "IAI"]
|
||||
assert set(parsed["tags"]) == {"project", "urgent"}
|
||||
|
||||
|
||||
def test_parse_aaak_dash_segments_become_empty_lists():
|
||||
r = _make(tags=[])
|
||||
idx = generate_aaak_index(r)
|
||||
parsed = parse_aaak_index(idx)
|
||||
assert parsed["entities"] == []
|
||||
assert parsed["tags"] == []
|
||||
|
||||
|
||||
# ------------------------------------------ TOK-10 English-raw enforcement
|
||||
|
||||
|
||||
def test_enforce_english_raw_accepts_pure_english():
|
||||
r = _make(text="Alice said the IAI-MCP project is go")
|
||||
# Should not raise
|
||||
enforce_english_raw(r)
|
||||
|
||||
|
||||
def test_enforce_english_raw_rejects_cyrillic_without_tag():
|
||||
r = _make(text="Alice said: пусть сохранится точно", tags=["project"])
|
||||
with pytest.raises(ValueError) as exc:
|
||||
enforce_english_raw(r)
|
||||
assert "constitutional" in str(exc.value)
|
||||
|
||||
|
||||
def test_enforce_english_raw_accepts_cyrillic_with_raw_tag():
|
||||
r = _make(
|
||||
text="Alice said: пусть сохранится точно",
|
||||
tags=["raw:ru", "project"],
|
||||
)
|
||||
# With explicit raw:ru declaration the rule is satisfied.
|
||||
enforce_english_raw(r)
|
||||
|
||||
|
||||
def test_enforce_english_raw_rejects_cjk_without_tag():
|
||||
r = _make(text="Hello 世界 verbatim", tags=[])
|
||||
with pytest.raises(ValueError):
|
||||
enforce_english_raw(r)
|
||||
|
||||
|
||||
def test_enforce_english_raw_rejects_hiragana_without_tag():
|
||||
r = _make(text="Hello こんにちは world", tags=[])
|
||||
with pytest.raises(ValueError):
|
||||
enforce_english_raw(r)
|
||||
|
||||
|
||||
def test_enforce_english_raw_accepts_cjk_with_raw_tag():
|
||||
r = _make(text="Hello 世界", tags=["raw:zh"])
|
||||
enforce_english_raw(r)
|
||||
|
||||
|
||||
def test_enforce_english_raw_empty_text_passes():
|
||||
r = _make(text="")
|
||||
enforce_english_raw(r)
|
||||
128
tests/test_active_inference_gate.py
Normal file
128
tests/test_active_inference_gate.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
"""Tests for TOK-06 active-inference retrieval gate (Plan 02-04 Task 2, D-26).
|
||||
|
||||
D-26 contract: skip full pipeline_recall when expected free-energy reduction
|
||||
is less than 0.2 bits. Trivial cues (greetings, "thanks", very short strings)
|
||||
short-circuit to L0-only.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def test_theta_skip_constant():
|
||||
from iai_mcp.gate import THETA_SKIP
|
||||
|
||||
assert THETA_SKIP == 0.2
|
||||
|
||||
|
||||
def test_efer_empty_is_zero():
|
||||
from iai_mcp.gate import expected_free_energy_reduction
|
||||
|
||||
assert expected_free_energy_reduction("") == 0.0
|
||||
|
||||
|
||||
def test_efer_trivial_greeting_is_below_theta():
|
||||
from iai_mcp.gate import THETA_SKIP, expected_free_energy_reduction
|
||||
|
||||
for cue in ("hi", "hello", "thanks", "ok", "yes", "no"):
|
||||
val = expected_free_energy_reduction(cue)
|
||||
assert val < THETA_SKIP, f"cue={cue!r} val={val}"
|
||||
|
||||
|
||||
def test_efer_rich_is_above_theta():
|
||||
from iai_mcp.gate import THETA_SKIP, expected_free_energy_reduction
|
||||
|
||||
rich = (
|
||||
"explain how CLS replay interacts with schema induction under "
|
||||
"monotropic attention"
|
||||
)
|
||||
val = expected_free_energy_reduction(rich)
|
||||
assert val > THETA_SKIP
|
||||
|
||||
|
||||
def test_should_skip_retrieval_trivial():
|
||||
from iai_mcp.gate import should_skip_retrieval
|
||||
|
||||
skip, reason = should_skip_retrieval("hi")
|
||||
assert skip is True
|
||||
assert reason
|
||||
|
||||
|
||||
def test_should_skip_retrieval_informative():
|
||||
from iai_mcp.gate import should_skip_retrieval
|
||||
|
||||
skip, _reason = should_skip_retrieval(
|
||||
"What did we discuss about auth last week?"
|
||||
)
|
||||
assert skip is False
|
||||
|
||||
|
||||
def test_should_skip_very_short_cue():
|
||||
"""Cues shorter than 3 chars always skip (no discriminable signal)."""
|
||||
from iai_mcp.gate import should_skip_retrieval
|
||||
|
||||
skip, _ = should_skip_retrieval("a")
|
||||
assert skip is True
|
||||
skip, _ = should_skip_retrieval("")
|
||||
assert skip is True
|
||||
|
||||
|
||||
def test_pipeline_recall_skip_path_returns_minimal_response(tmp_path, monkeypatch):
|
||||
"""When gate triggers, pipeline_recall must return the L0 record only."""
|
||||
from iai_mcp import embed as embed_mod
|
||||
from iai_mcp.core import _seed_l0_identity, dispatch
|
||||
|
||||
class _FakeEmbedder:
|
||||
DIM = EMBED_DIM
|
||||
DEFAULT_DIM = EMBED_DIM
|
||||
DEFAULT_MODEL_KEY = "fake"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.DIM = EMBED_DIM
|
||||
|
||||
def embed(self, text: str) -> list[float]:
|
||||
return [1.0] + [0.0] * (EMBED_DIM - 1)
|
||||
|
||||
def embed_batch(self, texts):
|
||||
return [self.embed(t) for t in texts]
|
||||
|
||||
monkeypatch.setattr(embed_mod, "Embedder", _FakeEmbedder)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_l0_identity(store)
|
||||
# Insert extra records so the pipeline branch would normally run.
|
||||
now = datetime.now(timezone.utc)
|
||||
for i in range(3):
|
||||
rec = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=f"extra fact {i}",
|
||||
aaak_index="",
|
||||
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[],
|
||||
language="en",
|
||||
)
|
||||
store.insert(rec)
|
||||
|
||||
resp = dispatch(store, "memory_recall", {"cue": "hi", "session_id": "s-trivial"})
|
||||
assert "budget_used" in resp
|
||||
# Retrieval skip reduces budget dramatically (<50 tokens typical).
|
||||
assert resp["budget_used"] < 200
|
||||
69
tests/test_art_gate.py
Normal file
69
tests/test_art_gate.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
"""ART vigilance gate tests (MEM-03, D-07, D-14)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from iai_mcp.types import EMBED_DIM
|
||||
from iai_mcp.write import VIGILANCE_RHO, apply_art_gate, cosine
|
||||
from tests.test_store import _make
|
||||
|
||||
|
||||
def test_vigilance_rho_is_0_95():
|
||||
"""ρ fixed at 0.95 for Phase 1."""
|
||||
assert VIGILANCE_RHO == 0.95
|
||||
|
||||
|
||||
def test_empty_store_creates():
|
||||
new = _make()
|
||||
action, target = apply_art_gate([], new)
|
||||
assert action == "create"
|
||||
assert target == new.id
|
||||
|
||||
|
||||
def test_high_similarity_merges():
|
||||
"""Nearly-identical vectors -> merge target is the existing record."""
|
||||
existing = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1))
|
||||
candidate = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1)) # same vector
|
||||
action, target = apply_art_gate([existing], candidate)
|
||||
assert action == "merge"
|
||||
assert target == existing.id
|
||||
|
||||
|
||||
def test_low_similarity_creates():
|
||||
"""Orthogonal vectors -> cosine 0 < 0.95 -> create new."""
|
||||
existing = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1))
|
||||
candidate = _make(vec=[0.0] * (EMBED_DIM - 1) + [1.0])
|
||||
action, target = apply_art_gate([existing], candidate)
|
||||
assert action == "create"
|
||||
assert target == candidate.id
|
||||
|
||||
|
||||
def test_moderate_similarity_below_rho_creates():
|
||||
"""cos = 0.90 < 0.95 -> create."""
|
||||
existing = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1))
|
||||
# Construct a vector with cosine exactly 0.90 to the existing one.
|
||||
# If we take [0.9, sqrt(1 - 0.81), 0, 0, ...] with unit norm, cosine = 0.9
|
||||
import math
|
||||
y = math.sqrt(1 - 0.9 * 0.9)
|
||||
candidate = _make(vec=[0.9, y] + [0.0] * (EMBED_DIM - 2))
|
||||
sim = cosine(existing.embedding, candidate.embedding)
|
||||
assert abs(sim - 0.9) < 1e-6
|
||||
action, target = apply_art_gate([existing], candidate)
|
||||
assert action == "create"
|
||||
assert target == candidate.id
|
||||
|
||||
|
||||
def test_never_merge_record_skipped():
|
||||
"""records with never_merge=True (L0 identity) are never merge targets."""
|
||||
pinned = _make(
|
||||
vec=[1.0] + [0.0] * (EMBED_DIM - 1),
|
||||
pinned=True,
|
||||
never_merge=True,
|
||||
)
|
||||
candidate = _make(vec=[1.0] + [0.0] * (EMBED_DIM - 1)) # identical vector
|
||||
action, target = apply_art_gate([pinned], candidate)
|
||||
assert action == "create"
|
||||
assert target == candidate.id
|
||||
|
||||
|
||||
def test_cosine_zero_vector_returns_zero():
|
||||
assert cosine([0.0, 0.0, 0.0], [1.0, 2.0, 3.0]) == 0.0
|
||||
assert cosine([1.0, 0.0], [0.0, 0.0]) == 0.0
|
||||
215
tests/test_autist_knobs_live.py
Normal file
215
tests/test_autist_knobs_live.py
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
"""Tests for autistic-kernel knob registry: 10 AUTIST + 1 wake_depth = 11 sealed.
|
||||
|
||||
History: flipped the 9 Phase-2 deferred knobs to phase=1.
|
||||
PHASE_1_LIVE became a 13-member frozenset, then 14 with flip, then 15
|
||||
after wake_depth append. Plan 07.12-02 removed 4 dead KnobSpec
|
||||
entries (AUTIST-02 sensory_channel_weights, event_vs_time_cue,
|
||||
AUTIST-11 alexithymia_accommodation, double_empathy) — final shape
|
||||
is 11 sealed entries, 10 AUTIST + wake_depth.
|
||||
|
||||
Schema/value validation covers enum/bool/int_range/float_range and
|
||||
`dict:<keytype>:<valuetype>` for monotropism_depth (recursive per-key
|
||||
validation). dunn_quadrant keeps the enum shape but gains a
|
||||
float_range-style HIPPEA_precision_spec that migrates cleanly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.profile import (
|
||||
PHASE_1_LIVE,
|
||||
PHASE_2_DEFERRED,
|
||||
PHASE_3_DEFERRED,
|
||||
PROFILE_KNOBS,
|
||||
default_state,
|
||||
profile_get,
|
||||
profile_set,
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- registry shape
|
||||
|
||||
def test_phase_1_live_has_14_knobs():
|
||||
"""Plan 07.12-02: 10 autistic-kernel + wake_depth = 11 live.
|
||||
|
||||
Test name kept for git stability (was 14 pre-MCP-12, 15 post-MCP-12, 11
|
||||
after Plan 07.12-02 removed AUTIST-02/08/11/12). The autistic-kernel-only
|
||||
invariant (10) is checked via filter in test_all_14_requirement_ids_present.
|
||||
"""
|
||||
assert len(PHASE_1_LIVE) == 11
|
||||
|
||||
|
||||
def test_phase_3_deferred_now_empty_after_autist13_flip():
|
||||
"""camouflaging_relaxation moved from phase=3 to phase=1."""
|
||||
assert PHASE_3_DEFERRED == frozenset()
|
||||
assert len(PHASE_3_DEFERRED) == 0
|
||||
|
||||
|
||||
def test_phase_2_deferred_empty():
|
||||
"""All 9 Phase-2 knobs move to phase=1."""
|
||||
assert PHASE_2_DEFERRED == frozenset()
|
||||
assert len(PHASE_2_DEFERRED) == 0
|
||||
|
||||
|
||||
def test_all_14_requirement_ids_present():
|
||||
"""Plan 07.12-02: autistic-kernel slice has exactly 10 knobs (AUTIST-02/08/11/12 removed).
|
||||
|
||||
appended wake_depth bringing the registry to 15 entries.
|
||||
Plan 07.12-02 removed 4 dead knobs (AUTIST-02/08/11/12) for final shape
|
||||
of 11 sealed entries (10 AUTIST + 1 MCP-12). Test name kept for git stability.
|
||||
"""
|
||||
autist_specs = [
|
||||
s for s in PROFILE_KNOBS.values() if s.requirement_id.startswith("AUTIST-")
|
||||
]
|
||||
assert len(autist_specs) == 10
|
||||
req_ids = {spec.requirement_id for spec in autist_specs}
|
||||
expected = {
|
||||
"AUTIST-01", "AUTIST-03", "AUTIST-04", "AUTIST-05",
|
||||
"AUTIST-06", "AUTIST-07", "AUTIST-09", "AUTIST-10",
|
||||
"AUTIST-13", "AUTIST-14",
|
||||
}
|
||||
assert req_ids == expected
|
||||
# Registry total includes the operator-facing wake_depth knob.
|
||||
assert len(PROFILE_KNOBS) == 11
|
||||
assert "wake_depth" in PROFILE_KNOBS
|
||||
assert PROFILE_KNOBS["wake_depth"].requirement_id == "MCP-12"
|
||||
|
||||
|
||||
# ------------------------------------------------------- dict-schema validator
|
||||
|
||||
|
||||
def test_monotropism_depth_live_accepts_dict():
|
||||
"""monotropism_depth is a per-domain dict[str, float_range:0..1]."""
|
||||
state = default_state()
|
||||
r = profile_set(
|
||||
"monotropism_depth",
|
||||
{"coding": 0.8, "gardening": 0.3},
|
||||
state,
|
||||
)
|
||||
assert r["status"] == "ok"
|
||||
assert state["monotropism_depth"] == {"coding": 0.8, "gardening": 0.3}
|
||||
|
||||
|
||||
def test_monotropism_depth_live_rejects_out_of_range():
|
||||
state = default_state()
|
||||
r = profile_set("monotropism_depth", {"x": 1.5}, state)
|
||||
assert r["status"] == "error"
|
||||
|
||||
|
||||
def test_monotropism_depth_live_rejects_non_dict():
|
||||
state = default_state()
|
||||
r = profile_set("monotropism_depth", 3, state)
|
||||
assert r["status"] == "error"
|
||||
|
||||
|
||||
# Plan 07.12-02 removed test_sensory_channel_weights_live_accepts_dict /
|
||||
# test_sensory_channel_weights_live_rejects_out_of_range — was a
|
||||
# DEAD knob (declared but never read in any production scoring/response code);
|
||||
# the registry entry was removed and profile_set now returns the unknown-knob
|
||||
# error. See tests/test_profile_no_dead_knobs.py for the post-removal contract.
|
||||
|
||||
|
||||
# ------------------------------------------------------- enum-schema validator
|
||||
|
||||
|
||||
def test_dunn_quadrant_live():
|
||||
state = default_state()
|
||||
r = profile_set("dunn_quadrant", "seeking", state)
|
||||
assert r["status"] == "ok"
|
||||
assert state["dunn_quadrant"] == "seeking"
|
||||
|
||||
|
||||
def test_dunn_quadrant_rejects_garbage():
|
||||
state = default_state()
|
||||
r = profile_set("dunn_quadrant", "garbage", state)
|
||||
assert r["status"] == "error"
|
||||
|
||||
|
||||
def test_demand_avoidance_tolerance_live():
|
||||
state = default_state()
|
||||
for value in ("collaborative", "neutral", "imperative"):
|
||||
r = profile_set("demand_avoidance_tolerance", value, state)
|
||||
assert r["status"] == "ok", f"expected {value} accepted"
|
||||
assert state["demand_avoidance_tolerance"] == "imperative"
|
||||
|
||||
|
||||
# Plan 07.12-02 removed test_event_vs_time_cue_live / test_alexithymia_accommodation_live —
|
||||
# (event_vs_time_cue) and (alexithymia_accommodation) were
|
||||
# DEAD knobs (no taxonomy in schema, never read in production). Removed from
|
||||
# registry; profile_set now returns the unknown-knob error.
|
||||
# See tests/test_profile_no_dead_knobs.py for the post-removal contract.
|
||||
|
||||
|
||||
# ----------------------------------------------------- bool-schema validator
|
||||
|
||||
|
||||
def test_inertia_awareness_live():
|
||||
state = default_state()
|
||||
r_ok = profile_set("inertia_awareness", True, state)
|
||||
assert r_ok["status"] == "ok"
|
||||
r_bad = profile_set("inertia_awareness", 1, state)
|
||||
assert r_bad["status"] == "error"
|
||||
|
||||
|
||||
# Plan 07.12-02 removed test_double_empathy_live — (double_empathy)
|
||||
# was promoted to a passive system invariant (CLAUDE.md "Architectural
|
||||
# Invariants — Pinned"); the system never translates phrasing toward NT style
|
||||
# at any path, so a runtime knob was redundant. Removed from registry.
|
||||
# See tests/test_profile_no_dead_knobs.py for the post-removal contract.
|
||||
|
||||
|
||||
# ----------------------------------------------------- float-schema validator
|
||||
|
||||
|
||||
def test_interest_boost_live():
|
||||
state = default_state()
|
||||
r_ok = profile_set("interest_boost", 0.75, state)
|
||||
assert r_ok["status"] == "ok"
|
||||
r_bad = profile_set("interest_boost", 2.0, state)
|
||||
assert r_bad["status"] == "error"
|
||||
|
||||
|
||||
# ----------------------------------------------------- HIPPEA_precision spec
|
||||
|
||||
|
||||
def test_HIPPEA_precision_spec_added_wire_to_autist_03():
|
||||
"""AUTIST-03 now maps to dunn_quadrant (enum) AND exposes a
|
||||
HIPPEA_precision float knob via the dict-key mechanism on a per-domain map
|
||||
OR via a float_range schema.
|
||||
|
||||
For we require either:
|
||||
- PROFILE_KNOBS["HIPPEA_precision"] exists with float_range:0.0..1.0, or
|
||||
- PROFILE_KNOBS["dunn_quadrant"] value_schema carries float-range metadata
|
||||
|
||||
Accept the simpler form: a new "HIPPEA_precision" knob with requirement id
|
||||
or a companion 'autist_03_float' marker on dunn_quadrant.
|
||||
"""
|
||||
# Check one of the two shapes is present.
|
||||
if "HIPPEA_precision" in PROFILE_KNOBS:
|
||||
spec = PROFILE_KNOBS["HIPPEA_precision"]
|
||||
# Must be a float range between 0 and 1.
|
||||
assert "float_range:" in spec.value_schema
|
||||
else:
|
||||
# dunn_quadrant remains but must retain an enum schema (migration-aware)
|
||||
spec = PROFILE_KNOBS["dunn_quadrant"]
|
||||
assert spec.value_schema.startswith("enum:")
|
||||
|
||||
|
||||
# ----------------------------------------------------- profile_get coverage
|
||||
|
||||
|
||||
def test_profile_get_returns_14_live_entries():
|
||||
"""Plan 07.12-02: 11 live (10 autistic + wake_depth MCP-12). Test name kept for git stability."""
|
||||
state = default_state()
|
||||
result = profile_get(None, state)
|
||||
assert len(result["live"]) == 11
|
||||
assert len(result["deferred"]) == 0
|
||||
|
||||
|
||||
def test_profile_get_monotropism_depth_returns_default_dict():
|
||||
state = default_state()
|
||||
r = profile_get("monotropism_depth", state)
|
||||
assert r["knob"] == "monotropism_depth"
|
||||
assert "value" in r
|
||||
# Default is a dict (per-domain storage)
|
||||
assert isinstance(r["value"], dict)
|
||||
120
tests/test_batch_api.py
Normal file
120
tests/test_batch_api.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
"""Tests for TOK-09 Batch API consolidation (Plan 02-04 Task 3, D-29).
|
||||
|
||||
submit_batch_consolidation passes through D-GUARD (should_call_llm) before
|
||||
any network work. On Tier 0 fallback (no llm_enabled, no api key, budget
|
||||
exceeded, ratelimit cooldown) returns stub results + writes llm_health
|
||||
event. scope: the gate + event side-effects are load-bearing;
|
||||
the real anthropic.batches.create call is stubbed (SDK surface varies).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
def _tasks(n: int = 3) -> list[dict]:
|
||||
return [
|
||||
{
|
||||
"task_id": f"t{i}",
|
||||
"prompt": f"summarise cluster {i}",
|
||||
"prompt_tok": 500,
|
||||
"output_tok": 200,
|
||||
}
|
||||
for i in range(n)
|
||||
]
|
||||
|
||||
|
||||
def test_batch_fallback_when_llm_disabled(tmp_path):
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
ok, reason, results = submit_batch_consolidation(
|
||||
store, _tasks(), budget, rate, llm_enabled=False,
|
||||
)
|
||||
assert ok is False
|
||||
assert "llm_enabled" in reason.lower() or "disabled" in reason.lower()
|
||||
# Fallback returns an empty-but-structured list so downstream consumers
|
||||
# don't crash on a None.
|
||||
assert isinstance(results, list)
|
||||
|
||||
|
||||
def test_batch_fallback_when_no_api_key(tmp_path, monkeypatch):
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
ok, reason, _ = submit_batch_consolidation(
|
||||
store, _tasks(), budget, rate, llm_enabled=True,
|
||||
)
|
||||
assert ok is False
|
||||
# D-GUARD step 2.
|
||||
assert "api" in reason.lower() or "key" in reason.lower()
|
||||
|
||||
|
||||
def test_batch_emits_llm_health_on_fallback(tmp_path):
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
submit_batch_consolidation(
|
||||
store, _tasks(), budget, rate, llm_enabled=False,
|
||||
)
|
||||
events = query_events(store, kind="llm_health")
|
||||
fallback_events = [
|
||||
e for e in events
|
||||
if e["data"].get("component") == "batch_consolidation"
|
||||
]
|
||||
assert len(fallback_events) >= 1
|
||||
|
||||
|
||||
def test_batch_50pct_discount():
|
||||
"""Pricing helper returns 50% of sync cost per D-29."""
|
||||
from iai_mcp.batch import BATCH_DISCOUNT, _sync_tier_cost
|
||||
|
||||
sync = _sync_tier_cost(1_000_000, 1_000_000)
|
||||
# Haiku 4.5 approximate -- not exact numbers, just shape.
|
||||
assert sync > 0
|
||||
discounted = sync * BATCH_DISCOUNT
|
||||
assert discounted == sync * 0.5
|
||||
assert BATCH_DISCOUNT == 0.5
|
||||
|
||||
|
||||
def test_batch_records_spend_when_eligible(tmp_path, monkeypatch):
|
||||
"""Eligible path records a discounted spend to BudgetLedger."""
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
before = budget.daily_used()
|
||||
ok, _reason, _results = submit_batch_consolidation(
|
||||
store, _tasks(5), budget, rate, llm_enabled=True,
|
||||
)
|
||||
after = budget.daily_used()
|
||||
# Whether the SDK is present or not, the eligible gate records a nominal
|
||||
# spend (Plan 02-04 scaffolds the budget side-effect; real batch API is
|
||||
# implemented via mock/stub so tests don't hit the network).
|
||||
if ok:
|
||||
assert after >= before
|
||||
else:
|
||||
# If the SDK is unavailable, spend should NOT increase (we never
|
||||
# got past the gate).
|
||||
assert after == before
|
||||
|
||||
|
||||
def test_sync_tier_cost_monotonic():
|
||||
"""Longer prompts cost more."""
|
||||
from iai_mcp.batch import _sync_tier_cost
|
||||
|
||||
a = _sync_tier_cost(1000, 500)
|
||||
b = _sync_tier_cost(2000, 500)
|
||||
assert b > a
|
||||
199
tests/test_batch_guard.py
Normal file
199
tests/test_batch_guard.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
"""Tests for 02-REVIEW.md H-02 (batch scaffold silently debits budget +
|
||||
flips effective_tier=tier1 on a stub that produces no output).
|
||||
|
||||
Bug: submit_batch_consolidation called budget.record_spend BEFORE the real
|
||||
SDK call and returned (True, "ok", []). run_heavy_consolidation then saw
|
||||
ok_batch=True and set effective_tier="tier1", logging it in the
|
||||
consolidation event. Users inspecting `iai-mcp audit` saw Tier-1 events
|
||||
that were factually false.
|
||||
|
||||
Fix:
|
||||
- Scaffold path returns (False, "stub: batch API not yet wired", []).
|
||||
- NO budget.record_spend call during the stub period.
|
||||
- Emit one info-severity llm_health event documenting the gap so the
|
||||
audit CLI reflects honest state.
|
||||
- run_heavy_consolidation sees ok_batch=False and keeps tier0; the
|
||||
cls_consolidation_run event payload carries batch_submitted=False.
|
||||
|
||||
Constitutional contract (D-GUARD budget honesty + audit repudiability):
|
||||
Budget ledger rows MUST correspond to real API spend. Tier flags in
|
||||
the event log MUST correspond to real Tier-1 output. Both invariants
|
||||
were silently violated by the scaffold.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
def _tasks(n: int = 1) -> list[dict]:
|
||||
return [
|
||||
{
|
||||
"task_id": f"t{i}",
|
||||
"prompt": f"summarise cluster {i}",
|
||||
"prompt_tok": 500,
|
||||
"output_tok": 200,
|
||||
}
|
||||
for i in range(n)
|
||||
]
|
||||
|
||||
|
||||
# ==================================================== H-02: batch scaffold guard
|
||||
|
||||
|
||||
def test_batch_stub_returns_false_with_scaffold_reason(tmp_path, monkeypatch):
|
||||
"""Stub path must return (False, "stub: batch API not yet wired", [])
|
||||
even when all D-GUARD steps pass (API key + llm_enabled + budget + rate
|
||||
all clean). This is the load-bearing assertion that neutralises the
|
||||
tier1 flip."""
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
|
||||
ok, reason, results = submit_batch_consolidation(
|
||||
store, _tasks(3), budget, rate, llm_enabled=True,
|
||||
)
|
||||
|
||||
assert ok is False, "scaffold must return ok=False until real SDK wire-up lands"
|
||||
assert reason.startswith("stub:"), (
|
||||
f"reason must advertise scaffold status, got {reason!r}"
|
||||
)
|
||||
assert "batch API not yet wired" in reason
|
||||
assert results == [], "scaffold produces empty result list"
|
||||
|
||||
|
||||
def test_batch_stub_does_not_debit_budget(tmp_path, monkeypatch):
|
||||
"""Budget MUST NOT increase during the scaffold period. Only a real
|
||||
successful anthropic.batches.create response may record spend."""
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
|
||||
before_daily = budget.daily_used()
|
||||
before_monthly = budget.monthly_used()
|
||||
|
||||
submit_batch_consolidation(
|
||||
store, _tasks(5), budget, rate, llm_enabled=True,
|
||||
)
|
||||
|
||||
after_daily = budget.daily_used()
|
||||
after_monthly = budget.monthly_used()
|
||||
|
||||
assert after_daily == before_daily, (
|
||||
f"daily spend changed during stub: {before_daily} -> {after_daily}"
|
||||
)
|
||||
assert after_monthly == before_monthly
|
||||
|
||||
|
||||
def test_batch_stub_emits_info_llm_health_event(tmp_path, monkeypatch):
|
||||
"""Observability contract: scaffold state must be visible in the events
|
||||
table so `iai-mcp audit` observers can see the gap explicitly.
|
||||
Severity=info (not warning/critical) because this is intentional
|
||||
scaffold behaviour, not an error."""
|
||||
from iai_mcp.batch import submit_batch_consolidation
|
||||
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
|
||||
submit_batch_consolidation(
|
||||
store, _tasks(), budget, rate, llm_enabled=True,
|
||||
)
|
||||
|
||||
events = query_events(store, kind="llm_health")
|
||||
batch_events = [
|
||||
e for e in events
|
||||
if e["data"].get("component") == "batch_consolidation"
|
||||
]
|
||||
assert len(batch_events) >= 1, "must emit llm_health for batch stub"
|
||||
ev = batch_events[0]
|
||||
assert ev["severity"] == "info", (
|
||||
f"scaffold event must be info-severity, got {ev['severity']!r}"
|
||||
)
|
||||
note = ev["data"].get("note") or ""
|
||||
assert "scaffold" in note.lower() or "not yet wired" in note.lower(), (
|
||||
f"event note must advertise scaffold/not-yet-wired status, got {note!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_run_heavy_does_not_flip_tier1_on_stub(tmp_path, monkeypatch):
|
||||
"""run_heavy_consolidation must not set effective_tier='tier1' while
|
||||
submit_batch_consolidation is a stub. Even when the D-GUARD ladder
|
||||
greenlights Tier-1 (key + enabled + budget + rate), ok_batch=False so
|
||||
the caller stays on Tier-0."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||||
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
|
||||
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
|
||||
cfg = SleepConfig(llm_enabled=True)
|
||||
result = run_heavy_consolidation(
|
||||
store,
|
||||
session_id="h-stub",
|
||||
config=cfg,
|
||||
budget=budget,
|
||||
rate=rate,
|
||||
has_api_key=True,
|
||||
)
|
||||
|
||||
assert result["tier"] == "tier0", (
|
||||
f"effective_tier must stay tier0 during scaffold, got {result['tier']!r}"
|
||||
)
|
||||
|
||||
# cls_consolidation_run event has batch_submitted=False
|
||||
events = query_events(store, kind="cls_consolidation_run")
|
||||
heavy = [e for e in events if e["data"].get("mode") == "heavy"]
|
||||
assert len(heavy) >= 1
|
||||
assert heavy[0]["data"]["batch_submitted"] is False, (
|
||||
"batch_submitted flag must honestly reflect stub state"
|
||||
)
|
||||
# tier_eligible still records that the D-GUARD ladder was CONSULTED (tier1)
|
||||
# even though effective_tier is tier0 -- lets auditors see the gap.
|
||||
assert heavy[0]["data"].get("tier") == "tier0"
|
||||
|
||||
|
||||
def test_run_heavy_does_not_debit_budget_during_stub(tmp_path, monkeypatch):
|
||||
"""End-to-end: running heavy consolidation with full Tier-1 eligibility
|
||||
must leave the budget untouched because submit_batch_consolidation is a
|
||||
stub."""
|
||||
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
|
||||
|
||||
monkeypatch.setenv("ANTHROPIC_API_KEY", "fake-test-key")
|
||||
store = MemoryStore(path=tmp_path)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
|
||||
before = budget.daily_used()
|
||||
|
||||
cfg = SleepConfig(llm_enabled=True)
|
||||
run_heavy_consolidation(
|
||||
store,
|
||||
session_id="h-no-debit",
|
||||
config=cfg,
|
||||
budget=budget,
|
||||
rate=rate,
|
||||
has_api_key=True,
|
||||
)
|
||||
|
||||
# Note: schema_induction_tier1 also records a small spend when eligible.
|
||||
# We assert the batch_consolidation row specifically is NOT present.
|
||||
tbl = store.db.open_table("budget_ledger")
|
||||
df = tbl.to_pandas()
|
||||
if not df.empty:
|
||||
batch_rows = df[df["kind"] == "batch_consolidation"]
|
||||
assert len(batch_rows) == 0, (
|
||||
"stub must not record a batch_consolidation spend row"
|
||||
)
|
||||
348
tests/test_bedtime.py
Normal file
348
tests/test_bedtime.py
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
"""Tests for iai_mcp.bedtime -- Task 1.
|
||||
|
||||
Covers 14 behaviours from the plan:
|
||||
1. English positive -- "good night" / "heading to bed" / "tired"
|
||||
2. English negative (phrase alone, no dual-gate)
|
||||
3. Russian positive
|
||||
4. Japanese positive
|
||||
5. Arabic positive
|
||||
6. de/fr/es/zh positive (one phrase per language at minimum)
|
||||
7. Cross-lingual fallback -- EN always tried; RU NOT tried under language="en"
|
||||
8. Dual-gate: phrase alone NOT enough (no quiet window -> None)
|
||||
9. Dual-gate: inside quiet window -> dict
|
||||
10. Dual-gate: within 30min of start -> dict
|
||||
11. Dual-gate: 1h before start -> None
|
||||
12. Fixture-driven corpus: 5 positive + 5 negative per language
|
||||
13. False positive rate < 10% on phrase-only check across all 8 fixtures
|
||||
14. ReDoS protection: 10KB input under 100ms total across all patterns
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import bedtime
|
||||
from iai_mcp.bedtime import (
|
||||
WIND_DOWN_BY_LANG,
|
||||
WIND_DOWN_GATE_MINUTES_BEFORE,
|
||||
WIND_DOWN_LANGUAGES_SUPPORTED,
|
||||
detect_wind_down,
|
||||
detect_wind_down_phrase,
|
||||
is_late_in_quiet_window,
|
||||
)
|
||||
|
||||
UTC = timezone.utc
|
||||
FIXTURES = Path(__file__).parent / "fixtures" / "bedtime"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- phrase gate
|
||||
|
||||
|
||||
def test_english_positive() -> None:
|
||||
for cue in [
|
||||
"good night",
|
||||
"I'm heading to bed",
|
||||
"I'm tired, going to sleep",
|
||||
"catch you tomorrow",
|
||||
"it's bedtime",
|
||||
"Goodnight!",
|
||||
]:
|
||||
matched, pattern = detect_wind_down_phrase(cue, "en")
|
||||
assert matched, f"expected EN positive for {cue!r}"
|
||||
assert pattern
|
||||
|
||||
|
||||
def test_english_phrase_matches_even_rhetorical() -> None:
|
||||
"""Phrase alone IS enough for the phrase gate -- the dual gate adds
|
||||
the quiet-window filter. This test locks the phrase behaviour in
|
||||
isolation so dual-gate tests can differentiate."""
|
||||
cue = "the villain said good night and laughed"
|
||||
matched, pattern = detect_wind_down_phrase(cue, "en")
|
||||
assert matched, "phrase gate alone is intentionally permissive"
|
||||
assert "night" in pattern.lower()
|
||||
|
||||
|
||||
def test_russian_positive() -> None:
|
||||
for cue in [
|
||||
"пойду спать",
|
||||
"спокойной ночи",
|
||||
"устал, иду в постель",
|
||||
"до завтра",
|
||||
"пора ложиться",
|
||||
]:
|
||||
matched, _ = detect_wind_down_phrase(cue, "ru")
|
||||
assert matched, f"expected RU positive for {cue!r}"
|
||||
|
||||
|
||||
def test_japanese_positive() -> None:
|
||||
for cue in [
|
||||
"おやすみ",
|
||||
"おやすみなさい",
|
||||
"寝ます",
|
||||
"また明日",
|
||||
"疲れた",
|
||||
]:
|
||||
matched, _ = detect_wind_down_phrase(cue, "ja")
|
||||
assert matched, f"expected JA positive for {cue!r}"
|
||||
|
||||
|
||||
def test_arabic_positive() -> None:
|
||||
for cue in [
|
||||
"تصبح على خير",
|
||||
"ليلة سعيدة",
|
||||
"أنا متعب سأنام",
|
||||
]:
|
||||
matched, _ = detect_wind_down_phrase(cue, "ar")
|
||||
assert matched, f"expected AR positive for {cue!r}"
|
||||
|
||||
|
||||
def test_de_fr_es_zh_positive() -> None:
|
||||
cases: dict[str, list[str]] = {
|
||||
"de": ["gute Nacht", "ich bin müde", "bis morgen"],
|
||||
"fr": ["bonne nuit", "je suis fatigué", "à demain"],
|
||||
"es": ["buenas noches", "estoy cansado", "hasta mañana"],
|
||||
"zh": ["晚安", "我要睡觉", "累了"],
|
||||
}
|
||||
for lang, cues in cases.items():
|
||||
for cue in cues:
|
||||
matched, _ = detect_wind_down_phrase(cue, lang)
|
||||
assert matched, f"expected {lang.upper()} positive for {cue!r}"
|
||||
|
||||
|
||||
def test_cross_lingual_en_is_fallback_but_ru_is_not() -> None:
|
||||
# EN fallback always tried: "good night" under language="ru" still matches.
|
||||
matched_en_under_ru, _ = detect_wind_down_phrase("good night", "ru")
|
||||
assert matched_en_under_ru, "EN fallback must trigger regardless of language"
|
||||
|
||||
# RU is NOT tried under language="en": a purely Russian cue must NOT match.
|
||||
matched_ru_under_en, _ = detect_wind_down_phrase("я пойду спать", "en")
|
||||
assert not matched_ru_under_en, (
|
||||
"RU phrases must not fall back under language=en"
|
||||
)
|
||||
|
||||
|
||||
def test_phrase_empty_cue_no_match() -> None:
|
||||
assert detect_wind_down_phrase("", "en") == (False, "")
|
||||
assert detect_wind_down_phrase("", "ru") == (False, "")
|
||||
|
||||
|
||||
def test_phrase_unknown_language_still_tries_english() -> None:
|
||||
"""Language we don't support (e.g. 'ko') must still try EN fallback."""
|
||||
matched, _ = detect_wind_down_phrase("good night", "ko")
|
||||
assert matched, "EN fallback required for unsupported languages too"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- quiet-window gate
|
||||
|
||||
|
||||
def _utc(y: int, m: int, d: int, hh: int, mm: int = 0) -> datetime:
|
||||
return datetime(y, m, d, hh, mm, tzinfo=UTC)
|
||||
|
||||
|
||||
def test_is_late_no_window() -> None:
|
||||
assert is_late_in_quiet_window(None, _utc(2026, 4, 18, 22, 0), UTC) is False
|
||||
|
||||
|
||||
def test_is_late_inside_window() -> None:
|
||||
# window = (44, 16) means start at bucket 44 = 22:00, duration 8h.
|
||||
# 23:30 local should be inside.
|
||||
assert is_late_in_quiet_window(
|
||||
(44, 16), _utc(2026, 4, 18, 23, 30), UTC,
|
||||
) is True
|
||||
|
||||
|
||||
def test_is_late_within_30min_of_start() -> None:
|
||||
# start 22:00, now 21:45 -> within 30min -> True.
|
||||
assert is_late_in_quiet_window(
|
||||
(44, 16), _utc(2026, 4, 18, 21, 45), UTC,
|
||||
) is True
|
||||
|
||||
|
||||
def test_is_late_exactly_30min_before_start() -> None:
|
||||
# Boundary: 21:30 should still count (within 30min threshold, inclusive).
|
||||
assert is_late_in_quiet_window(
|
||||
(44, 16), _utc(2026, 4, 18, 21, 30), UTC,
|
||||
) is True
|
||||
|
||||
|
||||
def test_is_late_one_hour_before_start() -> None:
|
||||
# start 22:00, now 21:00 -> 60min before -> False.
|
||||
assert is_late_in_quiet_window(
|
||||
(44, 16), _utc(2026, 4, 18, 21, 0), UTC,
|
||||
) is False
|
||||
|
||||
|
||||
def test_is_late_window_wraps_midnight() -> None:
|
||||
# window = (44, 16): 22:00 start + 8h = 06:00 next morning.
|
||||
# 02:30 local should be inside (post-midnight part of the window).
|
||||
assert is_late_in_quiet_window(
|
||||
(44, 16), _utc(2026, 4, 19, 2, 30), UTC,
|
||||
) is True
|
||||
|
||||
|
||||
def test_is_late_outside_window_afternoon() -> None:
|
||||
# window = (44, 16): 22:00-06:00. 15:00 afternoon -> outside + not within 30min.
|
||||
assert is_late_in_quiet_window(
|
||||
(44, 16), _utc(2026, 4, 18, 15, 0), UTC,
|
||||
) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- dual-gate
|
||||
|
||||
|
||||
def test_dual_gate_phrase_alone_not_enough() -> None:
|
||||
# Phrase matches but no quiet window set -> None.
|
||||
result = detect_wind_down(
|
||||
"good night", "en", state={}, now=_utc(2026, 4, 18, 12, 0), tz=UTC,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_dual_gate_no_phrase_inside_window() -> None:
|
||||
# Inside window but no phrase match -> None.
|
||||
result = detect_wind_down(
|
||||
"let me check the code",
|
||||
"en",
|
||||
state={"quiet_window": (44, 16)},
|
||||
now=_utc(2026, 4, 18, 23, 30),
|
||||
tz=UTC,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_dual_gate_both_pass_inside_window() -> None:
|
||||
result = detect_wind_down(
|
||||
"good night",
|
||||
"en",
|
||||
state={"quiet_window": (44, 16)},
|
||||
now=_utc(2026, 4, 18, 23, 30),
|
||||
tz=UTC,
|
||||
)
|
||||
assert result is not None
|
||||
assert result["message_hint"] == "user_wind_down_detected"
|
||||
assert "night" in result["matched_pattern"].lower()
|
||||
assert result["quiet_window_start_bucket"] == 44
|
||||
assert result["quiet_window_duration"] == 16
|
||||
|
||||
|
||||
def test_dual_gate_both_pass_30min_before_window() -> None:
|
||||
# 21:45 local, window starts 22:00 -> within 30min threshold.
|
||||
result = detect_wind_down(
|
||||
"good night",
|
||||
"en",
|
||||
state={"quiet_window": (44, 16)},
|
||||
now=_utc(2026, 4, 18, 21, 45),
|
||||
tz=UTC,
|
||||
)
|
||||
assert result is not None
|
||||
assert result["quiet_window_start_bucket"] == 44
|
||||
|
||||
|
||||
def test_dual_gate_phrase_but_too_early() -> None:
|
||||
# 21:00 local, window starts 22:00 -> 60min too early -> None.
|
||||
result = detect_wind_down(
|
||||
"good night",
|
||||
"en",
|
||||
state={"quiet_window": (44, 16)},
|
||||
now=_utc(2026, 4, 18, 21, 0),
|
||||
tz=UTC,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- fixture corpus
|
||||
|
||||
|
||||
_LANGS = sorted(WIND_DOWN_BY_LANG.keys())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lang", _LANGS)
|
||||
def test_fixture_corpus(lang: str) -> None:
|
||||
fp = FIXTURES / f"{lang}.txt"
|
||||
assert fp.exists(), f"fixture file missing: {fp}"
|
||||
lines = [
|
||||
ln.strip()
|
||||
for ln in fp.read_text(encoding="utf-8").splitlines()
|
||||
if ln.strip() and not ln.lstrip().startswith("#")
|
||||
]
|
||||
assert len(lines) >= 10, f"{lang}: expected >=10 fixture lines, got {len(lines)}"
|
||||
|
||||
for line in lines:
|
||||
assert "\t" in line, f"{lang}: fixture line missing tab separator: {line!r}"
|
||||
sentence, expected = line.rsplit("\t", 1)
|
||||
matched, _ = detect_wind_down_phrase(sentence, lang)
|
||||
assert matched == (expected == "yes"), (
|
||||
f"{lang}: {sentence!r} expected {expected} got {matched}"
|
||||
)
|
||||
|
||||
|
||||
def test_fixture_corpus_false_positive_rate_under_10_percent() -> None:
|
||||
"""Across all 8 languages (80 lines = 40 pos + 40 neg), the phrase-only
|
||||
false positive rate MUST be < 10%. The dual gate ratchets this down to
|
||||
the target of <5% in practice."""
|
||||
fp_count = 0
|
||||
neg_total = 0
|
||||
for lang in _LANGS:
|
||||
fp = FIXTURES / f"{lang}.txt"
|
||||
for line in fp.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if "\t" not in line:
|
||||
continue
|
||||
sentence, expected = line.rsplit("\t", 1)
|
||||
if expected == "no":
|
||||
neg_total += 1
|
||||
matched, _ = detect_wind_down_phrase(sentence, lang)
|
||||
if matched:
|
||||
fp_count += 1
|
||||
assert neg_total >= 40, f"expected >=40 negative fixtures, got {neg_total}"
|
||||
fpr = fp_count / neg_total
|
||||
assert fpr < 0.10, (
|
||||
f"phrase-only FPR {fpr:.2%} exceeds 10% ceiling "
|
||||
f"({fp_count}/{neg_total}). Tighten fixtures or patterns."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- ReDoS guard
|
||||
|
||||
|
||||
def test_redos_protection_bounded_quantifiers_under_100ms() -> None:
|
||||
"""All patterns are pre-compiled and use bounded quantifiers.
|
||||
10KB of 'a' characters must execute in < 100ms across every pattern."""
|
||||
big = "a" * 10240
|
||||
deadline = 0.100 # seconds
|
||||
total_start = time.monotonic()
|
||||
for lang, patterns in bedtime._COMPILED.items():
|
||||
for p in patterns:
|
||||
t0 = time.monotonic()
|
||||
p.search(big)
|
||||
if time.monotonic() - t0 > deadline:
|
||||
pytest.fail(
|
||||
f"ReDoS suspected: {lang} pattern {p.pattern!r} took "
|
||||
f">{deadline}s on 10KB input"
|
||||
)
|
||||
total_elapsed = time.monotonic() - total_start
|
||||
assert total_elapsed < 1.0, (
|
||||
f"combined ReDoS sweep took {total_elapsed:.3f}s (budget 1.0s)"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- coverage sanity
|
||||
|
||||
|
||||
def test_language_coverage_is_exactly_eight_d11() -> None:
|
||||
"""wind-down regex must cover exactly the 8 shield.py languages."""
|
||||
assert WIND_DOWN_LANGUAGES_SUPPORTED == frozenset(
|
||||
{"en", "ru", "ja", "ar", "de", "fr", "es", "zh"},
|
||||
)
|
||||
assert len(WIND_DOWN_BY_LANG) == 8
|
||||
|
||||
|
||||
def test_gate_minutes_before_is_thirty_d09() -> None:
|
||||
"""D-09 dual-gate: 30 minutes before quiet-window start counts as late."""
|
||||
assert WIND_DOWN_GATE_MINUTES_BEFORE == 30
|
||||
133
tests/test_bench.py
Normal file
133
tests/test_bench.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
"""Tests for the Phase-1 benchmark harnesses (D-15, OPS-01/02/04).
|
||||
|
||||
All tests inject `count_tokens_fn` where applicable so no live Anthropic API
|
||||
calls happen in CI. The actual Anthropic integration is exercised only when
|
||||
`ANTHROPIC_API_KEY` is set and the CLIs are run directly by hand.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from bench.tokens import FRESH_LIMIT, STEADY_LIMIT, run_token_bench
|
||||
from bench.verbatim import ACCURACY_FLOOR, run_verbatim_bench
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
# ---------------------------------------------------------- bench/tokens.py
|
||||
|
||||
|
||||
def test_tokens_steady_pass(tmp_path):
|
||||
"""Injected counter at 2500 tokens -> both steady_ok and fresh_ok pass."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
res = run_token_bench(store=store, n_runs=3, count_tokens_fn=lambda t: 2500)
|
||||
assert res["steady_ok"] is True
|
||||
assert res["fresh_ok"] is True
|
||||
assert all(w == 2500 for w in res["warm"])
|
||||
assert res["mode"] == "injected"
|
||||
assert res["limits"]["steady"] == STEADY_LIMIT
|
||||
assert res["limits"]["fresh"] == FRESH_LIMIT
|
||||
|
||||
|
||||
def test_tokens_steady_fail(tmp_path):
|
||||
"""3500 tok > STEADY_LIMIT -> steady_ok False, fails."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
res = run_token_bench(store=store, n_runs=3, count_tokens_fn=lambda t: 3500)
|
||||
assert res["steady_ok"] is False
|
||||
|
||||
|
||||
def test_tokens_fresh_fail(tmp_path):
|
||||
"""Fresh prompt at 9000 (> FRESH_LIMIT) triggers fresh_ok=False.
|
||||
|
||||
We flip counts via an iterator: first call (fresh) returns 9000, subsequent
|
||||
warm calls return 2500. Demonstrates the boundary.
|
||||
"""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
counts = iter([9000, 2500, 2500, 2500])
|
||||
|
||||
def _counter(_text: str) -> int:
|
||||
return next(counts)
|
||||
|
||||
res = run_token_bench(store=store, n_runs=3, count_tokens_fn=_counter)
|
||||
assert res["fresh_ok"] is False # 9000 > 8000
|
||||
assert res["steady_ok"] is True # warm still under 3000
|
||||
|
||||
|
||||
def test_tokens_tiktoken_fallback_mode(tmp_path, monkeypatch):
|
||||
"""No ANTHROPIC_API_KEY but tiktoken installed -> mode == tiktoken-cl100k-proxy."""
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
store = MemoryStore(path=tmp_path)
|
||||
res = run_token_bench(store=store, n_runs=3)
|
||||
assert res["mode"] == "tiktoken-cl100k-proxy"
|
||||
# Payload on an empty store has no L0/L1/L2/rich_club content, so the warm
|
||||
# prompt is literally ".", which tiktoken counts as a single token.
|
||||
# Fresh adds the 1k-chars-tail so remains well under FRESH_LIMIT.
|
||||
assert res["steady_ok"] is True
|
||||
assert res["fresh_ok"] is True
|
||||
|
||||
|
||||
def test_tokens_char4_fallback_mode(tmp_path, monkeypatch):
|
||||
"""No ANTHROPIC_API_KEY and no tiktoken -> mode == heuristic-char4."""
|
||||
import builtins
|
||||
|
||||
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
||||
|
||||
real_import = builtins.__import__
|
||||
|
||||
def _fake_import(name, *args, **kwargs):
|
||||
if name == "tiktoken":
|
||||
raise ImportError("tiktoken not available in this scenario")
|
||||
return real_import(name, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(builtins, "__import__", _fake_import)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
res = run_token_bench(store=store, n_runs=3)
|
||||
assert res["mode"] == "heuristic-char4"
|
||||
assert res["steady_ok"] is True
|
||||
|
||||
|
||||
def test_tokens_fresh_prompt_is_larger_than_warm(tmp_path):
|
||||
"""Sanity: the fresh prompt differs from the warm prompt (has the 1k tail)."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
seen_texts: list[str] = []
|
||||
|
||||
def _capture(text: str) -> int:
|
||||
seen_texts.append(text)
|
||||
return 100
|
||||
|
||||
run_token_bench(store=store, n_runs=1, count_tokens_fn=_capture)
|
||||
# First call was the fresh prompt; second was the warm prompt.
|
||||
assert len(seen_texts) == 2
|
||||
assert len(seen_texts[0]) > len(seen_texts[1])
|
||||
|
||||
|
||||
# -------------------------------------------------------- bench/verbatim.py
|
||||
|
||||
|
||||
def test_verbatim_passes_small_n(tmp_path):
|
||||
"""Small-N smoke test: pinned records recall at >= 0.99 accuracy."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
res = run_verbatim_bench(
|
||||
store=store, n_records=10, session_gap=2, noise_per_session=2
|
||||
)
|
||||
assert res["accuracy"] >= ACCURACY_FLOOR
|
||||
assert res["passed"] is True
|
||||
assert res["hits_exact"] == 10
|
||||
|
||||
|
||||
def test_verbatim_returns_floor_constant(tmp_path):
|
||||
"""The harness exposes its pass/fail threshold so verifiers can assert it."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
res = run_verbatim_bench(
|
||||
store=store, n_records=5, session_gap=1, noise_per_session=1
|
||||
)
|
||||
assert res["floor"] == ACCURACY_FLOOR
|
||||
assert res["floor"] == 0.99
|
||||
|
||||
|
||||
def test_verbatim_counts_exact_matches(tmp_path):
|
||||
"""hits_exact <= n_records and accuracy = hits_exact / n_records."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
res = run_verbatim_bench(
|
||||
store=store, n_records=5, session_gap=1, noise_per_session=1
|
||||
)
|
||||
assert res["hits_exact"] <= res["n_records"]
|
||||
assert res["accuracy"] == res["hits_exact"] / res["n_records"]
|
||||
121
tests/test_bench_latency_regression.py
Normal file
121
tests/test_bench_latency_regression.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
"""OPS-10 regression guard: small-N latency stays under D-SPEED p95 ceiling.
|
||||
|
||||
Plan 05-05 (D5-08) — CI-runnable guard for bench/neural_map.py at the
|
||||
small-N end of the matrix. The full N ∈ {100, 1k, 5k, 10k} matrix runs
|
||||
ad-hoc on this dev Mac and is recorded in the published bench report; this
|
||||
test exercises N=100 only so CI catches regressions in <30s.
|
||||
|
||||
D-SPEED contract: p95 < 100 ms at every measured N.
|
||||
|
||||
Adds the comparative reference flags to argparse:
|
||||
--ref-mempalace-p95-ms <float>
|
||||
--ref-claude-mem-p95-ms <float>
|
||||
|
||||
When supplied, the bench's per-N `passed` flag flips to False if IAI's p95
|
||||
exceeds the reference. Tests assert these flags exist on the parser.
|
||||
|
||||
See:
|
||||
- bench/neural_map.py — the harness under guard
|
||||
- tests/test_bench_neural_map.py — sibling D-SPEED tests (passed=True at N=100)
|
||||
- internal architecture spec
|
||||
Task 2 for the behavior contract
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Prevent macOS keyring prompts by swapping the keyring backend for an
|
||||
in-memory dict (same pattern as tests/test_hippea_cascade.py and
|
||||
tests/test_memory_recall_structural.py)."""
|
||||
import keyring as _keyring
|
||||
|
||||
fake_store: dict[tuple[str, str], str] = {}
|
||||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake_store.get((s, u)))
|
||||
monkeypatch.setattr(
|
||||
_keyring, "set_password",
|
||||
lambda s, u, p: fake_store.__setitem__((s, u), p),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_keyring, "delete_password", lambda s, u: fake_store.pop((s, u), None),
|
||||
)
|
||||
yield fake_store
|
||||
|
||||
|
||||
def test_neural_map_small_n_p95_under_regression_ceiling(tmp_path: Path):
|
||||
"""OPS-10 regression guard at N=100.
|
||||
|
||||
The strict D-SPEED p95 < 100 ms gate is asserted by
|
||||
tests/test_bench_neural_map.py::test_neural_map_bench_reports_passed_flag
|
||||
— an existing test that famously trips under concurrent system load
|
||||
(Plan 05-02 SUMMARY notes the same flake). This guard is a
|
||||
REGRESSION fence: it asserts the bench still produces a numeric p95
|
||||
in the same order of magnitude as the D-SPEED ceiling, so a
|
||||
structural regression (e.g. someone breaks the spread pruning and
|
||||
p95 jumps to 1s+) is caught in CI even when wall-clock noise puts
|
||||
the strict 100 ms test on a flaky boundary.
|
||||
|
||||
The 200 ms ceiling is 2x D-SPEED at N=100; if a real regression
|
||||
drops latency by 2x or more, this gate catches it and the strict
|
||||
100 ms gate (run in isolation) handles the absolute measurement.
|
||||
"""
|
||||
from bench.neural_map import run_neural_map_bench
|
||||
|
||||
out = run_neural_map_bench(n=100, iterations=10, store_path=tmp_path / "store")
|
||||
|
||||
assert out["latency_ms_p95"] < 200.0, (
|
||||
f"OPS-10 regression: p95 {out['latency_ms_p95']:.2f}ms > 200ms at N=100 "
|
||||
f"(2x D-SPEED ceiling — likely a real regression, not concurrency noise)"
|
||||
)
|
||||
# Sanity: the harness always returns a positive p95.
|
||||
assert out["latency_ms_p95"] > 0.0
|
||||
|
||||
|
||||
def test_neural_map_main_with_matrix_returns_int(tmp_path: Path):
|
||||
"""CLI entry-point honours an explicit ns list (the N matrix)."""
|
||||
from bench import neural_map
|
||||
|
||||
code = neural_map.main(ns=[50], iterations=3, store_path=tmp_path)
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_neural_map_argparse_has_reference_flags():
|
||||
"""OPS-10 comparative gate: argparse exposes the reference-p95 flags so
|
||||
the bench can compare IAI to mempalace/claude-mem reference numbers
|
||||
measured separately on this host.
|
||||
|
||||
Grep-verifiable contract: any ratification of these names elsewhere in
|
||||
the report harness has to update the test.
|
||||
"""
|
||||
from bench import neural_map
|
||||
|
||||
parser = neural_map._parse_args.__defaults__ # noqa: SLF001
|
||||
# Inspect the actual parser by parsing a dry args list.
|
||||
ns = neural_map._parse_args([
|
||||
"--n", "100",
|
||||
"--ref-mempalace-p95-ms", "42.5",
|
||||
"--ref-claude-mem-p95-ms", "61.0",
|
||||
])
|
||||
assert getattr(ns, "ref_mempalace_p95_ms", None) == 42.5
|
||||
assert getattr(ns, "ref_claude_mem_p95_ms", None) == 61.0
|
||||
|
||||
|
||||
def test_neural_map_comparative_gate_flips_passed_false_when_above_ref(tmp_path: Path):
|
||||
"""If IAI p95 > mempalace ref, the per-N JSON's `passed` flips False
|
||||
AND `reason` carries the reference name.
|
||||
"""
|
||||
from bench import neural_map
|
||||
|
||||
# An impossibly low ref that any realistic bench will exceed.
|
||||
code = neural_map.main(
|
||||
ns=[50],
|
||||
iterations=3,
|
||||
store_path=tmp_path,
|
||||
ref_mempalace_p95_ms=0.0001,
|
||||
)
|
||||
# With a 0.0001 ms reference, the bench cannot pass.
|
||||
assert code == 1
|
||||
92
tests/test_bench_neural_map.py
Normal file
92
tests/test_bench_neural_map.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
"""Tests for bench/neural_map.py (Plan 02-04 Task 4, D-SPEED).
|
||||
|
||||
D-SPEED contract: pipeline_recall <100ms at 10k records. The bench harness
|
||||
measures per-N latency distribution (p50, p95) and returns a structured
|
||||
dict. Main returns 0 iff all Ns pass thresholds.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_neural_map_bench_runs_small_n(tmp_path):
|
||||
from bench.neural_map import run_neural_map_bench
|
||||
|
||||
out = run_neural_map_bench(n=50, iterations=3, store_path=tmp_path)
|
||||
assert out["n"] == 50
|
||||
assert "latency_ms_p50" in out
|
||||
assert "latency_ms_p95" in out
|
||||
assert "passed" in out
|
||||
assert isinstance(out["latency_ms_p50"], float)
|
||||
assert isinstance(out["latency_ms_p95"], float)
|
||||
|
||||
|
||||
def test_neural_map_bench_returns_stage_timings(tmp_path):
|
||||
"""Per-stage timings aid D-SPEED triage."""
|
||||
from bench.neural_map import run_neural_map_bench
|
||||
|
||||
out = run_neural_map_bench(n=50, iterations=2, store_path=tmp_path)
|
||||
assert "stage_timings_ms" in out
|
||||
# Must cover the five pipeline stages named in pipeline.py.
|
||||
stages = out["stage_timings_ms"]
|
||||
for expected in ("embed", "gate", "seeds", "spread", "rank"):
|
||||
assert expected in stages
|
||||
|
||||
|
||||
def test_neural_map_bench_reports_passed_flag(tmp_path):
|
||||
"""D-SPEED gate: bench at N=100 MUST report passed=True.
|
||||
|
||||
closes the D-SPEED gap from 02-VERIFICATION. The assertion
|
||||
upgrade from `isinstance(out["passed"], bool)` to `out["passed"] is True`
|
||||
is the bar-raising moment: honest benchmark discipline is no longer just
|
||||
"report truth" -- now "meet the target at N=100". Pipeline was rewired
|
||||
to use `store.append_provenance_batch` (one call) + `s4.on_read_check_batch`
|
||||
with records_cache passthrough (zero round-trips) per L-02 fix.
|
||||
"""
|
||||
from bench.neural_map import run_neural_map_bench
|
||||
|
||||
out = run_neural_map_bench(n=100, iterations=10, store_path=tmp_path)
|
||||
# Contract: threshold surfaced.
|
||||
assert out.get("threshold_ms") == 100.0
|
||||
# D-SPEED quality gate: p95 must be UNDER 100ms at N=100.
|
||||
assert out["passed"] is True, (
|
||||
f"D-SPEED violated: p95={out['latency_ms_p95']:.2f}ms > 100ms at N=100. "
|
||||
f"Full output: {out}"
|
||||
)
|
||||
assert out["latency_ms_p95"] < 100.0
|
||||
|
||||
|
||||
def test_neural_map_main_exits_zero_at_n100(tmp_path, capsys):
|
||||
"""main(ns=[100]) returns 0 (all-pass exit) post fix."""
|
||||
from bench import neural_map
|
||||
|
||||
code = neural_map.main(ns=[100], iterations=10, store_path=tmp_path)
|
||||
assert code == 0, (
|
||||
f"bench.neural_map.main(ns=[100]) should exit 0 post-02-07; got {code}"
|
||||
)
|
||||
|
||||
|
||||
def test_neural_map_bench_main_runs_and_returns_int(tmp_path, capsys):
|
||||
"""Main is runnable end-to-end and returns 0 or 1 (bench CI contract)."""
|
||||
from bench import neural_map
|
||||
|
||||
code = neural_map.main(ns=[50], iterations=2, store_path=tmp_path)
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_neural_map_bench_deterministic_within_tolerance(tmp_path):
|
||||
"""Two runs at the same N produce latency within the same order.
|
||||
|
||||
Uses separate subdirs so each run starts with a fresh store.
|
||||
"""
|
||||
from bench.neural_map import run_neural_map_bench
|
||||
|
||||
a = run_neural_map_bench(
|
||||
n=50, iterations=5, store_path=tmp_path / "a", seed=42,
|
||||
)
|
||||
b = run_neural_map_bench(
|
||||
n=50, iterations=5, store_path=tmp_path / "b", seed=42,
|
||||
)
|
||||
# Latencies are wall-clock; both should fit a generous ceiling.
|
||||
assert a["latency_ms_p50"] < 2000.0
|
||||
assert b["latency_ms_p50"] < 2000.0
|
||||
70
tests/test_bench_ram_regression.py
Normal file
70
tests/test_bench_ram_regression.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""OPS-11 regression guard: small-N RAM bench stays under threshold.
|
||||
|
||||
Plan 05-05 (D5-08) — CI-runnable guard for bench/memory_footprint.py. The
|
||||
large-N target (RSS <= 300 MB at N=10k warm on 16+ GB machine) runs
|
||||
ad-hoc from the published bench report; this test exercises the small-N path
|
||||
(N=100-500 with a 64d embedding) so CI catches harness drift without
|
||||
spinning up a 10k-record LanceDB table.
|
||||
|
||||
See:
|
||||
- bench/memory_footprint.py — the harness under guard
|
||||
- internal architecture spec
|
||||
Task 1 for the behavior contract
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_memory_footprint_small_n_under_threshold(tmp_path: Path):
|
||||
"""Smoke: small-N run populates rss_mb_peak under a generous ceiling.
|
||||
|
||||
The 300 MB large-N target is NOT asserted here — a fresh LanceDB +
|
||||
NetworkX graph at N=500 already allocates more than that on macOS
|
||||
when bge-m3 is loaded via embed import. This guard only asserts that
|
||||
the harness returns a plausible positive reading and respects the
|
||||
JSON schema the BENCH_REPORT consumes.
|
||||
"""
|
||||
from bench.memory_footprint import run_memory_footprint
|
||||
|
||||
out = run_memory_footprint(n=100, store_path=tmp_path / "store", dim=64)
|
||||
|
||||
# Shape: every key promised in the module docstring is present.
|
||||
assert "n" in out
|
||||
assert "rss_mb_peak" in out
|
||||
assert "threshold_mb" in out
|
||||
assert "passed" in out
|
||||
assert "platform" in out
|
||||
|
||||
# Values: rss is a real positive reading; threshold is the design target.
|
||||
assert out["n"] == 100
|
||||
assert isinstance(out["rss_mb_peak"], float)
|
||||
assert out["rss_mb_peak"] > 0.0
|
||||
assert out["threshold_mb"] == 300.0
|
||||
|
||||
# Generous outer bound — catches a clearly broken reading (e.g. reporting
|
||||
# nanoseconds as MB). The tight 300 MB fence belongs to the large-N run.
|
||||
assert out["rss_mb_peak"] < 4000.0, (
|
||||
f"small-N RSS {out['rss_mb_peak']} MB suspicious"
|
||||
)
|
||||
|
||||
|
||||
def test_memory_footprint_main_exits_int(tmp_path: Path):
|
||||
"""CLI entry-point returns 0 or 1 (bench CI contract)."""
|
||||
from bench import memory_footprint
|
||||
|
||||
code = memory_footprint.main(argv=["--n", "50", "--dim", "32"])
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_memory_footprint_platform_units_documented(tmp_path: Path):
|
||||
"""Harness records the platform it measured on — macOS bytes vs Linux KB
|
||||
is an correctness trap; the JSON output must carry the marker so
|
||||
downstream reports can reproduce the unit conversion.
|
||||
"""
|
||||
from bench.memory_footprint import run_memory_footprint
|
||||
|
||||
out = run_memory_footprint(n=50, store_path=tmp_path / "store2", dim=32)
|
||||
assert out["platform"] in ("darwin", "linux", "win32")
|
||||
117
tests/test_bench_total_session_cost.py
Normal file
117
tests/test_bench_total_session_cost.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
"""OPS-12 regression guard: 3-turn sanity for total_session_cost.
|
||||
|
||||
Plan 05-05 (D5-08) — CI-runnable guard for bench/total_session_cost.py.
|
||||
The full 10-turn script runs ad-hoc on this dev Mac and populates
|
||||
the published bench report rows; this test exercises the shape
|
||||
contracts and the minimal-vs-standard invariant at CI speed.
|
||||
|
||||
Acceptance contracts:
|
||||
- minimal total <= standard total (TOK-11 sanity; if not, Plan 05-03
|
||||
regressed somewhere)
|
||||
- per_turn list has exactly 10 entries (fixed D5-08 script)
|
||||
- counter mode honest-disclosed in JSON (anthropic-count-tokens |
|
||||
tiktoken-cl100k-proxy | heuristic-char4)
|
||||
- reference-gate failure flips passed=False
|
||||
|
||||
See:
|
||||
- bench/total_session_cost.py — the harness under guard
|
||||
- bench/tokens.py — 3-tier counter fallback pattern reused here
|
||||
- internal architecture spec
|
||||
Task 3 for the behavior contract
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_total_session_cost_reports_per_turn():
|
||||
"""M-07 script is the fixed D5-08 10-turn sequence."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="minimal")
|
||||
|
||||
assert "per_turn" in out
|
||||
assert isinstance(out["per_turn"], list)
|
||||
assert len(out["per_turn"]) == 10, (
|
||||
f"D5-08 script has 10 turns; got {len(out['per_turn'])}"
|
||||
)
|
||||
assert out["total_tokens"] == sum(out["per_turn"])
|
||||
assert out["adapter"] == "iai-mcp"
|
||||
assert out["wake_depth"] == "minimal"
|
||||
|
||||
|
||||
def test_total_session_cost_minimal_le_standard():
|
||||
"""TOK-11 invariant: wake_depth=minimal must not cost more than
|
||||
wake_depth=standard over the same 10-turn script. If this fails,
|
||||
Plan 05-03's lazy session-start work regressed.
|
||||
"""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
minimal = run_total_session_cost(wake_depth="minimal")
|
||||
standard = run_total_session_cost(wake_depth="standard")
|
||||
|
||||
assert minimal["total_tokens"] <= standard["total_tokens"], (
|
||||
f"minimal {minimal['total_tokens']} > standard {standard['total_tokens']}"
|
||||
" — TOK-11 regression"
|
||||
)
|
||||
|
||||
|
||||
def test_total_session_cost_counter_mode_disclosed():
|
||||
"""BENCH_REPORT honesty: every JSON output must name the counter mode
|
||||
used so downstream reports can flag non-official numbers."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="minimal")
|
||||
assert out["mode"] in (
|
||||
"anthropic-count-tokens",
|
||||
"tiktoken-cl100k-proxy",
|
||||
"heuristic-char4",
|
||||
"injected",
|
||||
)
|
||||
|
||||
|
||||
def test_total_session_cost_fails_when_above_ref():
|
||||
"""When the reference-adapter number is explicitly lower than IAI's,
|
||||
the comparative gate flips passed=False. Tests supply an
|
||||
impossibly-low ref so the assertion is host-independent.
|
||||
"""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="standard", mempalace_ref=1)
|
||||
assert out["passed"] is False
|
||||
assert out["refs"]["mempalace"] == 1
|
||||
|
||||
|
||||
def test_total_session_cost_passes_without_refs():
|
||||
"""When no reference numbers supplied, passed=True is the degenerate
|
||||
answer (the bench still records IAI totals for BENCH_REPORT to pick
|
||||
up). Honest-disclosure about ref absence lives in the report prose."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
out = run_total_session_cost(wake_depth="minimal")
|
||||
assert out["passed"] is True
|
||||
assert out["refs"] == {}
|
||||
|
||||
|
||||
def test_total_session_cost_main_exits_int():
|
||||
"""CLI entry-point returns 0 or 1 (bench CI contract)."""
|
||||
from bench import total_session_cost
|
||||
|
||||
code = total_session_cost.main(argv=["--wake-depth", "minimal"])
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_total_session_cost_injected_counter():
|
||||
"""Test-only counter injection: caller can pass a deterministic
|
||||
token-count function so the test is not hostage to the proxy
|
||||
tokeniser's drift."""
|
||||
from bench.total_session_cost import run_total_session_cost
|
||||
|
||||
def _fixed(text: str) -> int:
|
||||
return max(1, len(text)) # 1-char-per-token for deterministic checks
|
||||
|
||||
out = run_total_session_cost(
|
||||
wake_depth="minimal", count_tokens_fn=_fixed,
|
||||
)
|
||||
assert out["mode"] == "injected"
|
||||
assert out["total_tokens"] >= 10 # at least 1/turn * 10 turns
|
||||
167
tests/test_bench_total_session_cost_adapters.py
Normal file
167
tests/test_bench_total_session_cost_adapters.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""Plan 05-06 Task 3 — mempalace / claude-mem subprocess adapters in
|
||||
``bench/total_session_cost.py``.
|
||||
|
||||
These adapters let the reference column carry a live measurement
|
||||
from the mempalace CLI when it is installed locally, falling back to
|
||||
honest "adapter unavailable" disclosure when absent. They never block
|
||||
the bench: subprocess timeouts and non-zero exits return None and emit
|
||||
a ``bench_adapter_unavailable`` stderr event.
|
||||
|
||||
Covered contracts:
|
||||
|
||||
Test 1 _run_mempalace_adapter signature exists and accepts the 10-turn script
|
||||
Test 2 mempalace CLI absent -> None + stderr event, no exception
|
||||
Test 3 mempalace CLI present -> sums per-turn token counts via the 3-tier counter
|
||||
Test 4 --measure-mempalace flag wires the live adapter into refs["mempalace_measured"]
|
||||
Test 5 _run_claude_mem_adapter mirrors mempalace shape for forward compat
|
||||
Test 6 manual --ref-mempalace alongside --measure-mempalace keeps both values,
|
||||
but LIVE measurement is the comparator for the `passed` flag
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from bench.total_session_cost import (
|
||||
_SCRIPT,
|
||||
_run_claude_mem_adapter,
|
||||
_run_mempalace_adapter,
|
||||
main,
|
||||
run_total_session_cost,
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def _fixed_counter(text: str) -> int:
|
||||
"""Deterministic counter: 1 token per word. Keeps assertions stable
|
||||
across tiktoken / anthropic / char4 drift."""
|
||||
return max(1, len(text.split()))
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 1
|
||||
|
||||
|
||||
def test_mempalace_adapter_signature():
|
||||
# Signature must accept the canonical 10-turn script and a counter.
|
||||
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
|
||||
# Will be None on a machine without mempalace *responding cleanly*, but
|
||||
# the function must exist and not raise — callers depend on that contract.
|
||||
assert result is None or isinstance(result, int)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 2
|
||||
|
||||
|
||||
def test_mempalace_adapter_absent_cli_returns_none(capsys):
|
||||
with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
|
||||
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
|
||||
assert result is None
|
||||
err = capsys.readouterr().err
|
||||
assert "bench_adapter_unavailable" in err
|
||||
assert "mempalace" in err
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 3
|
||||
|
||||
|
||||
def test_mempalace_adapter_live_run_sums_stdout_tokens():
|
||||
"""With ``shutil.which`` finding the CLI and ``subprocess.run`` returning
|
||||
deterministic stdout, the adapter sums the token counts across all 10
|
||||
turns using the injected counter."""
|
||||
|
||||
def fake_which(name):
|
||||
return "/fake/bin/mempalace" if name == "mempalace" else None
|
||||
|
||||
def fake_run(*args, **kwargs):
|
||||
# stdout carries 3 words per turn -> 3 tokens per turn under _fixed_counter.
|
||||
return subprocess.CompletedProcess(
|
||||
args=args[0] if args else [],
|
||||
returncode=0,
|
||||
stdout="one two three",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
|
||||
mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
|
||||
result = _run_mempalace_adapter(_SCRIPT, _fixed_counter)
|
||||
assert result == 3 * len(_SCRIPT)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 4
|
||||
|
||||
|
||||
def test_measure_mempalace_flag_populates_refs(monkeypatch, capsys):
|
||||
"""End-to-end: running `main` with --measure-mempalace populates
|
||||
refs["mempalace_measured"] when the adapter returns a number."""
|
||||
|
||||
def fake_which(name):
|
||||
return "/fake/bin/mempalace" if name == "mempalace" else None
|
||||
|
||||
def fake_run(*args, **kwargs):
|
||||
return subprocess.CompletedProcess(
|
||||
args=args[0] if args else [],
|
||||
returncode=0,
|
||||
stdout="hello world",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
with mock.patch("bench.total_session_cost.shutil.which", side_effect=fake_which), \
|
||||
mock.patch("bench.total_session_cost.subprocess.run", side_effect=fake_run):
|
||||
rc = main(["--wake-depth", "minimal", "--measure-mempalace"])
|
||||
|
||||
captured = capsys.readouterr()
|
||||
result = json.loads(captured.out.strip())
|
||||
assert "mempalace_measured" in result["refs"]
|
||||
assert isinstance(result["refs"]["mempalace_measured"], int)
|
||||
assert result["refs"]["mempalace_measured"] > 0
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 5
|
||||
|
||||
|
||||
def test_claude_mem_adapter_mirrors_mempalace_shape(capsys):
|
||||
"""The claude-mem adapter has the same signature and absent-CLI fallback
|
||||
as the mempalace adapter, even though claude-mem is not installed
|
||||
locally. This keeps the forward-compat path live."""
|
||||
with mock.patch("bench.total_session_cost.shutil.which", return_value=None):
|
||||
result = _run_claude_mem_adapter(_SCRIPT, _fixed_counter)
|
||||
assert result is None
|
||||
err = capsys.readouterr().err
|
||||
assert "bench_adapter_unavailable" in err
|
||||
assert "claude-mem" in err
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- Test 6
|
||||
|
||||
|
||||
def test_live_measurement_wins_over_manual_ref():
|
||||
"""When both ``--measure-mempalace`` and ``--ref-mempalace <int>`` are
|
||||
supplied, the live measurement lands in ``refs["mempalace_measured"]``
|
||||
and is the comparator for ``passed``; the manual int is recorded in
|
||||
``refs["mempalace_manual"]`` for audit trail."""
|
||||
|
||||
with mock.patch("bench.total_session_cost.shutil.which",
|
||||
side_effect=lambda n: "/fake/bin/mempalace" if n == "mempalace" else None), \
|
||||
mock.patch("bench.total_session_cost.subprocess.run",
|
||||
return_value=subprocess.CompletedProcess(
|
||||
args=[], returncode=0,
|
||||
stdout="token " * 5000, # 5000 tokens across 10 turns
|
||||
stderr="",
|
||||
)):
|
||||
result = run_total_session_cost(
|
||||
wake_depth="minimal",
|
||||
mempalace_ref=10, # manual ref — deliberately tiny to force fail IF used
|
||||
measure_mempalace=True,
|
||||
count_tokens_fn=_fixed_counter,
|
||||
)
|
||||
assert "mempalace_measured" in result["refs"]
|
||||
assert "mempalace_manual" in result["refs"]
|
||||
assert result["refs"]["mempalace_manual"] == 10
|
||||
# LIVE measurement is the gate; with 50000+ tokens live, IAI total
|
||||
# (<~3000) is well below, so passed is True.
|
||||
assert result["passed"] is True
|
||||
105
tests/test_bench_trajectory.py
Normal file
105
tests/test_bench_trajectory.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""Tests for bench/trajectory.py (Plan 02-04 Task 4, D-33).
|
||||
|
||||
D-33 (benchmark corpus): 30-session synthetic corpus (autism/NT interaction
|
||||
pattern models), reproducible from seed=42. Diverse-language fixture:
|
||||
corpus includes English + Russian + Japanese + Arabic + German records for
|
||||
corpus-shape variance testing — NOT a multilingual product mandate. Brain
|
||||
is English-only since (default bge-small-en-v1.5).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_synthetic_corpus_generates_30_sessions():
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
|
||||
assert len(corpus) == 30
|
||||
for s in corpus:
|
||||
assert "session_id" in s
|
||||
assert "records" in s
|
||||
assert "curiosity_events" in s
|
||||
assert "trajectory_metrics" in s
|
||||
|
||||
|
||||
def test_synthetic_corpus_deterministic_from_seed():
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
a = generate_synthetic_corpus(n_sessions=5, seed=42)
|
||||
b = generate_synthetic_corpus(n_sessions=5, seed=42)
|
||||
# Session ids are deterministic under fixed seed.
|
||||
assert [s["session_id"] for s in a] == [s["session_id"] for s in b]
|
||||
|
||||
|
||||
def test_synthetic_corpus_multilingual():
|
||||
"""Diverse-language fixture: corpus-shape variance check.
|
||||
|
||||
NOT a product mandate — IAI-MCP brain is English-only since Plan 05-08.
|
||||
The presence of non-English samples here exercises corpus-shape
|
||||
variance in trajectory aggregation, nothing more.
|
||||
"""
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
|
||||
languages: set[str] = set()
|
||||
for s in corpus:
|
||||
for r in s["records"]:
|
||||
languages.add(r.get("language", "en"))
|
||||
# At minimum: en + one non-English (ru/ja/ar/de) must appear.
|
||||
assert "en" in languages
|
||||
non_english = languages - {"en"}
|
||||
assert len(non_english) >= 1, (
|
||||
f"diverse-language fixture has only languages={languages}"
|
||||
)
|
||||
# Aspirational: at least 4 distinct languages over 30 sessions
|
||||
# (corpus-shape diversity, not a multilingual product claim).
|
||||
assert len(languages) >= 4
|
||||
|
||||
|
||||
def test_synthetic_corpus_covers_six_metrics():
|
||||
"""Each session emits trajectory data for all six metric slots."""
|
||||
from bench.trajectory import generate_synthetic_corpus
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=30, seed=42)
|
||||
metric_keys: set[str] = set()
|
||||
for s in corpus:
|
||||
for k in s["trajectory_metrics"]:
|
||||
metric_keys.add(k)
|
||||
assert metric_keys >= {"m1", "m2", "m3", "m4", "m5", "m6"}
|
||||
|
||||
|
||||
def test_trajectory_bench_runs_over_corpus(tmp_path):
|
||||
from bench.trajectory import (
|
||||
generate_synthetic_corpus,
|
||||
run_trajectory_bench,
|
||||
)
|
||||
|
||||
corpus = generate_synthetic_corpus(n_sessions=6, seed=42)
|
||||
out = run_trajectory_bench(corpus, store_path=tmp_path)
|
||||
assert "m1_trend" in out
|
||||
assert "m2_trend" in out
|
||||
assert "m3_trend" in out
|
||||
assert "m4_trend" in out
|
||||
assert "m5_trend" in out
|
||||
assert "m6_trend" in out
|
||||
assert "passed" in out
|
||||
|
||||
|
||||
def test_trajectory_bench_main_runs(tmp_path, capsys):
|
||||
from bench.trajectory import main
|
||||
|
||||
# Main defaults to synthetic; tiny n_sessions for CI speed.
|
||||
code = main(n_sessions=5, store_path=tmp_path)
|
||||
assert code in (0, 1)
|
||||
|
||||
|
||||
def test_trajectory_bench_accepts_real_logs_flag(tmp_path):
|
||||
"""CLI flag accepts --real-logs=path; when absent, falls back to synthetic."""
|
||||
from bench.trajectory import main
|
||||
|
||||
# Missing path -> falls back to synthetic.
|
||||
code = main(
|
||||
n_sessions=3, real_logs_path=None, store_path=tmp_path,
|
||||
)
|
||||
assert code in (0, 1)
|
||||
161
tests/test_bench_verbatim_flags.py
Normal file
161
tests/test_bench_verbatim_flags.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
"""Tests for diagnostic flags on bench/verbatim.py.
|
||||
|
||||
Covers the 5 behaviors from the plan:
|
||||
1. `python -m bench.verbatim --help` lists --skip-l0-seed, --storage-direct,
|
||||
--n, --gap, --noise-per-session, --k.
|
||||
2. `run_verbatim_bench(skip_l0_seed=True, ...)` does NOT seed L0 identity.
|
||||
3. `run_verbatim_bench(storage_direct=True, ...)` writes zero provenance
|
||||
entries on pinned records across the query loop.
|
||||
4. Default invocation (no new flags set) is byte-identical to pre-plan
|
||||
behavior on the public dict keys.
|
||||
5. `--k` override propagates to `recall(k_hits=K)` (or `query_similar(k=K)`
|
||||
in storage-direct mode).
|
||||
|
||||
All tests use tmp_path for hermeticity; N kept tiny for CI speed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def test_cli_help_lists_all_new_flags():
|
||||
"""Behavior 1: --help must list all 6 diagnostic/config flags."""
|
||||
out = subprocess.run(
|
||||
[sys.executable, "-m", "bench.verbatim", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(REPO_ROOT),
|
||||
timeout=30,
|
||||
)
|
||||
assert out.returncode == 0, f"--help exited {out.returncode}: {out.stderr}"
|
||||
text = out.stdout
|
||||
for flag in (
|
||||
"--skip-l0-seed",
|
||||
"--storage-direct",
|
||||
"--n",
|
||||
"--gap",
|
||||
"--noise-per-session",
|
||||
"--k",
|
||||
):
|
||||
assert flag in text, f"--help missing flag {flag}\n\n{text}"
|
||||
|
||||
|
||||
def test_skip_l0_seed_does_not_seed_l0(tmp_path):
|
||||
"""Behavior 2: with skip_l0_seed=True no L0 record exists in the store."""
|
||||
from bench.verbatim import run_verbatim_bench
|
||||
from iai_mcp.core import L0_ID
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
s = MemoryStore(path=tmp_path)
|
||||
result = run_verbatim_bench(
|
||||
store=s,
|
||||
n_records=5,
|
||||
session_gap=2,
|
||||
noise_per_session=3,
|
||||
skip_l0_seed=True,
|
||||
)
|
||||
assert "accuracy" in result
|
||||
assert result["skip_l0_seed"] is True
|
||||
assert s.get(L0_ID) is None, (
|
||||
"skip_l0_seed=True must not seed L0 identity record"
|
||||
)
|
||||
|
||||
|
||||
def test_storage_direct_writes_zero_provenance_to_pinned(tmp_path):
|
||||
"""Behavior 3: storage_direct bypasses recall() so no provenance writes."""
|
||||
from bench.verbatim import run_verbatim_bench
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
s = MemoryStore(path=tmp_path)
|
||||
result = run_verbatim_bench(
|
||||
store=s,
|
||||
n_records=5,
|
||||
session_gap=2,
|
||||
noise_per_session=3,
|
||||
storage_direct=True,
|
||||
)
|
||||
assert "accuracy" in result
|
||||
assert result["storage_direct"] is True
|
||||
|
||||
# Every pinned record must have an empty provenance list after the run
|
||||
# (storage_direct bypass -> no append_provenance calls).
|
||||
pinned_offenders: list[tuple[str, int]] = []
|
||||
for rec in s.all_records():
|
||||
if rec.pinned and "benchmark" in (rec.tags or []):
|
||||
if len(rec.provenance or []) != 0:
|
||||
pinned_offenders.append(
|
||||
(rec.literal_surface[:40], len(rec.provenance or []))
|
||||
)
|
||||
assert not pinned_offenders, (
|
||||
f"storage_direct must leave pinned provenance empty, got: {pinned_offenders}"
|
||||
)
|
||||
|
||||
|
||||
def test_default_invocation_keys_preserved(tmp_path):
|
||||
"""Behavior 4: default invocation returns legacy keys unchanged."""
|
||||
from bench.verbatim import run_verbatim_bench
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
s = MemoryStore(path=tmp_path)
|
||||
result = run_verbatim_bench(
|
||||
store=s,
|
||||
n_records=5,
|
||||
session_gap=2,
|
||||
noise_per_session=3,
|
||||
)
|
||||
# Legacy keys (pre-Plan-05-01) still present.
|
||||
for key in (
|
||||
"accuracy",
|
||||
"n_records",
|
||||
"session_gap",
|
||||
"noise_per_session",
|
||||
"hits_exact",
|
||||
"passed",
|
||||
"floor",
|
||||
"noise_mode",
|
||||
):
|
||||
assert key in result, f"legacy key {key} missing"
|
||||
# New diagnostic traceability keys added.
|
||||
for key in ("skip_l0_seed", "storage_direct", "k"):
|
||||
assert key in result, f"diagnostic key {key} missing"
|
||||
assert result["skip_l0_seed"] is False
|
||||
assert result["storage_direct"] is False
|
||||
|
||||
|
||||
def test_k_override_propagates_in_storage_direct(tmp_path):
|
||||
"""Behavior 5: --k override in storage_direct mode propagates to query_similar.
|
||||
|
||||
With n_records=5 and k=3, storage-direct can only return 3 rows per query;
|
||||
the pinned-text hit count is therefore capped at a function of k rather
|
||||
than the default max(n_records+10, 20). We assert that a deliberately
|
||||
tiny k drives accuracy strictly below 1.0 on a harness where the default
|
||||
k would return all pinned records.
|
||||
"""
|
||||
from bench.verbatim import run_verbatim_bench
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
s = MemoryStore(path=tmp_path)
|
||||
result = run_verbatim_bench(
|
||||
store=s,
|
||||
n_records=5,
|
||||
session_gap=2,
|
||||
noise_per_session=3,
|
||||
storage_direct=True,
|
||||
k=3,
|
||||
)
|
||||
assert result["k"] == 3, f"k should be echoed back, got {result.get('k')!r}"
|
||||
# With k < n_records, at least some pinned cues will not find their exact
|
||||
# literal in the top-k -> accuracy strictly below 1.0. This would not
|
||||
# happen with the default k (max(n+10, 20) = 20 for n=5).
|
||||
assert result["accuracy"] < 1.0, (
|
||||
f"k=3 with n=5 must cap accuracy below 1.0, got {result['accuracy']}"
|
||||
)
|
||||
178
tests/test_bridge_no_spawn_path.py
Normal file
178
tests/test_bridge_no_spawn_path.py
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
"""Plan 07.1-04 R5 acceptance — compile-output regression trap.
|
||||
|
||||
This is the regression-trap that catches a future revert of Phase 7.1's
|
||||
no-spawn architecture. If `child_process.spawn` reappears in
|
||||
`mcp-wrapper/dist/bridge.js`, this test FAILS — alerting the developer
|
||||
(or a future Claude) that someone has reintroduced the TOCTOU spawn
|
||||
race that explicitly removed.
|
||||
|
||||
# Why a compile-output trap, not just a source-level grep?
|
||||
|
||||
A source-level grep would also catch the regression, but it would NOT
|
||||
catch:
|
||||
- A spawn call introduced via a transitive import (e.g., a helper
|
||||
module that imports `node:child_process` and re-exports a spawn
|
||||
wrapper).
|
||||
- A spawn call introduced via dynamic `require("child_process")` at
|
||||
runtime (which tsc compiles into the JS but a source grep for
|
||||
`import { spawn }` would miss).
|
||||
- A spawn introduced into a NEW module that bridge.ts imports.
|
||||
|
||||
The compiled `dist/bridge.js` is what actually ships and runs. Greping
|
||||
THAT is the load-bearing assertion.
|
||||
|
||||
# Reference
|
||||
|
||||
- Plan 07.1-04 Task 3
|
||||
- 07.1-CONTEXT.md D7.1-07 (bridge.ts spawn-removal scope)
|
||||
- The mirror source-level assertion lives in Task 1
|
||||
(``grep -c 'child_process[.]spawn|^import.*spawn|spawnDaemon'
|
||||
mcp-wrapper/src/bridge.ts`` returns 0)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO = Path(__file__).resolve().parent.parent
|
||||
WRAPPER = REPO / "mcp-wrapper"
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
platform.system() == "Windows",
|
||||
reason="bash + npm tooling assumed POSIX (mcp-wrapper build path)",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: build the wrapper once per module so all 3 tests reuse the same
|
||||
# dist/bridge.js artifact. Mirrors the pattern in
|
||||
# tests/test_socket_subagent_reuse.py:built_wrapper.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def built_bridge_js() -> Path:
|
||||
"""Build the TS wrapper once; return the path to compiled bridge.js."""
|
||||
if not (WRAPPER / "node_modules").exists():
|
||||
subprocess.run(["npm", "install"], cwd=WRAPPER, check=True)
|
||||
subprocess.run(["npm", "run", "build"], cwd=WRAPPER, check=True)
|
||||
dist = WRAPPER / "dist" / "bridge.js"
|
||||
assert dist.exists(), (
|
||||
"npm run build should have produced dist/bridge.js — actual: "
|
||||
f"{list((WRAPPER / 'dist').glob('*.js')) if (WRAPPER / 'dist').exists() else 'no dist dir'}"
|
||||
)
|
||||
return dist
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dist_bridge_js_has_no_child_process_spawn(built_bridge_js):
|
||||
"""REGRESSION TRAP: assert the compiled bridge.js contains zero
|
||||
references to child_process.spawn in any of its post-tsc forms.
|
||||
|
||||
Catches:
|
||||
- `import { spawn } from "node:child_process"` (ESM, what
|
||||
TypeScript writes; tsc with module=ESNext keeps the import)
|
||||
- `from "node:child_process"` (any other named import from the
|
||||
same module)
|
||||
- `require("node:child_process")` (CJS form if module target
|
||||
ever changes to CommonJS)
|
||||
- `require("child_process")` (legacy CJS form)
|
||||
- `child_process.spawn` (after a `.spawn` access on a module
|
||||
namespace import)
|
||||
|
||||
All five forms are checked because tsc's exact output bytes depend
|
||||
on tsconfig (module=ESNext vs CommonJS), and a future config
|
||||
change must NOT silently allow spawn back in.
|
||||
"""
|
||||
text = built_bridge_js.read_text(encoding="utf-8")
|
||||
|
||||
forbidden_substrings = [
|
||||
'child_process.spawn',
|
||||
'from "node:child_process"',
|
||||
"from 'node:child_process'",
|
||||
'require("node:child_process")',
|
||||
"require('node:child_process')",
|
||||
'require("child_process")',
|
||||
"require('child_process')",
|
||||
]
|
||||
|
||||
found = [s for s in forbidden_substrings if s in text]
|
||||
assert not found, (
|
||||
"REGRESSION: dist/bridge.js contains spawn-related substring(s) "
|
||||
f"that explicitly removed: {found}. "
|
||||
"Someone has re-introduced the TOCTOU spawn race that Phase 7.1's "
|
||||
"pure-connector refactor eliminated. Re-read 07.1-CONTEXT.md "
|
||||
"D7.1-07 (bridge.ts spawn-removal scope) before pushing."
|
||||
)
|
||||
|
||||
|
||||
def test_dist_bridge_js_has_DaemonUnreachableError(built_bridge_js):
|
||||
"""Assert the compiled bridge.js still contains the
|
||||
DaemonUnreachableError class — proves the no-spawn error-throwing
|
||||
path is preserved post-build.
|
||||
|
||||
If start() somehow stops throwing (e.g., a future refactor
|
||||
silently swallows the connect failure and degrades to a no-op),
|
||||
the symptom would be: wrappers boot fine even with no daemon, but
|
||||
every tools/call returns daemon_unreachable. That's a regression
|
||||
we want to catch at compile-output level.
|
||||
|
||||
The presence of `DaemonUnreachableError` as a string in dist/bridge.js
|
||||
verifies the class definition + at least one throw-site survived
|
||||
compilation.
|
||||
"""
|
||||
text = built_bridge_js.read_text(encoding="utf-8")
|
||||
|
||||
# Plan 07.1-04 done criteria for Task 1: DaemonUnreachableError
|
||||
# appears ≥2 times in the source (class def + at least one throw).
|
||||
# Same expectation for the compiled output — tsc preserves named
|
||||
# class identifiers exactly.
|
||||
count = text.count("DaemonUnreachableError")
|
||||
assert count >= 2, (
|
||||
f"REGRESSION: dist/bridge.js contains DaemonUnreachableError "
|
||||
f"only {count} times (expected >=2: class definition + at least "
|
||||
f"one throw-site). The fail-loud error path may have been "
|
||||
f"removed or renamed."
|
||||
)
|
||||
|
||||
|
||||
def test_dist_bridge_js_has_5000_socket_timeout(built_bridge_js):
|
||||
"""Assert the SOCKET_CONNECT_TIMEOUT_MS constant is set to 5000ms
|
||||
(raised from 250ms in pre-7.1 to cover launchd socket-activation
|
||||
cold-start window).
|
||||
|
||||
Anchored to the named constant (`SOCKET_CONNECT_TIMEOUT_MS = 5000`)
|
||||
rather than a bare `5000` substring — tsc default does NOT minify
|
||||
so the constant declaration survives compilation verbatim, and a
|
||||
bare `5000` could match unrelated literals (timestamps, byte
|
||||
counts) the compiler emits.
|
||||
|
||||
If this test fails:
|
||||
- The constant was renamed: update the assertion AND verify the
|
||||
new name is the connect timeout (not idle-shutdown / heartbeat).
|
||||
- The value was lowered (e.g., back to 250): re-read CONTEXT.md
|
||||
D7.1-07 — 5s is required because launchd cold-spawn of the
|
||||
daemon (bge-small embedder load + LanceDB open) is empirically
|
||||
3-10s on macOS. A lower timeout will spuriously throw
|
||||
DaemonUnreachableError on legitimate cold-starts.
|
||||
"""
|
||||
text = built_bridge_js.read_text(encoding="utf-8")
|
||||
|
||||
# Anchored to the named constant — survives tsc default (no
|
||||
# minification, target ES2022).
|
||||
assert "SOCKET_CONNECT_TIMEOUT_MS = 5000" in text, (
|
||||
"REGRESSION: dist/bridge.js does not contain "
|
||||
"'SOCKET_CONNECT_TIMEOUT_MS = 5000'. Either the constant was "
|
||||
"renamed, the value was changed, or tsc minification was "
|
||||
"enabled (which would also break the source-level grep done "
|
||||
"criteria in Task 1). requires 5000ms to cover "
|
||||
"launchd socket-activation cold-start window — see "
|
||||
"07.1-CONTEXT.md D7.1-07."
|
||||
)
|
||||
541
tests/test_bridge_socket_first.py
Normal file
541
tests/test_bridge_socket_first.py
Normal file
|
|
@ -0,0 +1,541 @@
|
|||
"""Plan 07.1-04 R2/A6 acceptance — bridge.ts is a pure connector (no spawn).
|
||||
|
||||
# History
|
||||
|
||||
This file was renamed-in-place from the pre-Phase-7.1 test of the same
|
||||
name. The pre-Phase-7.1 file asserted spawn-fallback
|
||||
behavior:
|
||||
- test_cold_start_spawns_daemon_under_5s — asserted that the wrapper
|
||||
SPAWNS `python -m iai_mcp.daemon` when the socket is missing
|
||||
(`daemon_delta >= 1`).
|
||||
- test_warm_start_reuses_daemon_under_250ms — relied on wrapper #1 to
|
||||
bootstrap the daemon via spawn so wrapper #2 could attach.
|
||||
|
||||
Phase 7.1 (this plan, 07.1-04) DELETES bridge.ts's spawn capability:
|
||||
the wrapper now ONLY connects to ~/.iai-mcp/.daemon.sock with a 5s
|
||||
timeout; on miss it throws `DaemonUnreachableError` (code -32002) and
|
||||
the wrapper process exits non-zero. Daemon spawning is now launchd's
|
||||
job (Wave 1 plist + Wave 2 install.sh + Wave 2 LISTEN_FDS branch).
|
||||
|
||||
Both pre-7.1 tests therefore had to be restructured:
|
||||
- Old `test_cold_start_spawns_daemon_under_5s` is REPLACED by
|
||||
`test_start_throws_DaemonUnreachableError_when_socket_missing`
|
||||
which asserts the inverse: NO daemon spawned, wrapper exits
|
||||
non-zero with the new error in stderr.
|
||||
- Old `test_warm_start_reuses_daemon_under_250ms` is REPLACED by
|
||||
`test_start_succeeds_with_warm_daemon_no_extra_spawn` which
|
||||
pre-starts a daemon manually (subprocess.Popen of
|
||||
`python -m iai_mcp.daemon`), waits for socket bind, then spawns
|
||||
the wrapper and asserts initialize handshake succeeds AND
|
||||
daemon process count delta == 0 (the wrapper did NOT spawn a
|
||||
second daemon).
|
||||
|
||||
# Test isolation strategy
|
||||
|
||||
Both tests use IAI_DAEMON_SOCKET_PATH env override (HIGH-4 lock at
|
||||
bridge.ts module top — verified preserved through Plan 07.1-04 Task 1
|
||||
edit) so they target a tmp socket and never touch the user's real
|
||||
~/.iai-mcp/.daemon.sock — the production daemon (if any) is not
|
||||
disturbed.
|
||||
|
||||
Delta-snapshot psutil pattern (lesson from / 07-04
|
||||
SUMMARYs): we count `iai_mcp.daemon` processes BEFORE and AFTER the
|
||||
wrapper boot and assert the DELTA, not the absolute. On a developer
|
||||
machine with a live production daemon, `before["daemon"] >= 1`; an
|
||||
absolute `assert after["daemon"] == 1` would falsely fail.
|
||||
|
||||
# Pattern reuse
|
||||
|
||||
Helpers (`_count_iai_mcp_processes`, `_kill_test_daemons`,
|
||||
`_spawn_wrapper`, `_initialize`, `_call_memory_recall`,
|
||||
`_wait_for_daemon_socket`) and the `built_wrapper` fixture are kept
|
||||
verbatim from the pre-7.1 file — they remain valid scaffolding.
|
||||
The `_count_iai_mcp_processes` shape mirrors
|
||||
`tests/test_socket_subagent_reuse.py` and `tests/test_socket_fail_loud.py`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
|
||||
REPO = Path(__file__).resolve().parent.parent
|
||||
WRAPPER = REPO / "mcp-wrapper"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: built wrapper (npm install + npm run build once per module).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def built_wrapper() -> Path:
|
||||
"""Build the TS wrapper once per test module; reuse across tests."""
|
||||
if not (WRAPPER / "node_modules").exists():
|
||||
subprocess.run(["npm", "install"], cwd=WRAPPER, check=True)
|
||||
subprocess.run(["npm", "run", "build"], cwd=WRAPPER, check=True)
|
||||
dist = WRAPPER / "dist" / "index.js"
|
||||
assert dist.exists(), "npm run build should have produced dist/index.js"
|
||||
return dist
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers: psutil snapshot, wrapper spawn, MCP handshake + recall round-trip.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _count_iai_mcp_processes() -> dict[str, int]:
|
||||
"""Snapshot iai_mcp.core / iai_mcp.daemon process counts.
|
||||
|
||||
Mirrors `tests/test_socket_fail_loud.py:_count_iai_mcp_processes` —
|
||||
same shape, same delta-snapshot assertion strategy.
|
||||
"""
|
||||
counts = {"core": 0, "daemon": 0}
|
||||
for p in psutil.process_iter(["cmdline"]):
|
||||
try:
|
||||
cl = p.info.get("cmdline") or []
|
||||
if not cl:
|
||||
continue
|
||||
joined = " ".join(c or "" for c in cl)
|
||||
if "iai_mcp.core" in joined:
|
||||
counts["core"] += 1
|
||||
if "iai_mcp.daemon" in joined:
|
||||
counts["daemon"] += 1
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
return counts
|
||||
|
||||
|
||||
def _kill_test_daemons(sock_path: Path) -> None:
|
||||
"""Cleanup helper — kill any iai_mcp.daemon processes whose env
|
||||
references the test sock_path. Avoids touching the user's real
|
||||
daemon if one is running."""
|
||||
sock_str = str(sock_path)
|
||||
for p in psutil.process_iter(["cmdline", "environ"]):
|
||||
try:
|
||||
cl = " ".join(p.info.get("cmdline") or [])
|
||||
if "iai_mcp.daemon" not in cl:
|
||||
continue
|
||||
env = p.info.get("environ") or {}
|
||||
if env.get("IAI_DAEMON_SOCKET_PATH") == sock_str:
|
||||
p.send_signal(signal.SIGTERM)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
|
||||
|
||||
def _spawn_wrapper(
|
||||
built_wrapper: Path,
|
||||
env_overrides: dict[str, str] | None = None,
|
||||
) -> subprocess.Popen:
|
||||
"""Spawn the built TS wrapper with stdin/stdout pipes for JSON-RPC."""
|
||||
env = os.environ.copy()
|
||||
env["IAI_MCP_PYTHON"] = sys.executable
|
||||
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
|
||||
if env_overrides:
|
||||
env.update(env_overrides)
|
||||
return subprocess.Popen(
|
||||
["node", str(built_wrapper)],
|
||||
cwd=str(REPO),
|
||||
env=env,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
|
||||
|
||||
def _spawn_daemon_in_background(
|
||||
sock_path: Path, store_dir: Path
|
||||
) -> subprocess.Popen:
|
||||
"""Pre-start a daemon manually via `python -m iai_mcp.daemon`.
|
||||
|
||||
wrappers no longer spawn the daemon themselves — that's
|
||||
launchd's job in production and the test's job here. We use the
|
||||
manual-run code path (no LISTEN_FDS env set), which the
|
||||
daemon supports unchanged per D7.1-09 (backward compat).
|
||||
"""
|
||||
env = os.environ.copy()
|
||||
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
|
||||
env["IAI_MCP_STORE"] = str(store_dir)
|
||||
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "120"
|
||||
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
|
||||
return subprocess.Popen(
|
||||
[sys.executable, "-m", "iai_mcp.daemon"],
|
||||
cwd=str(REPO),
|
||||
env=env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
def _initialize(proc: subprocess.Popen, rpc_id: int = 1) -> dict:
|
||||
"""MCP initialize handshake — required before tools/call works."""
|
||||
assert proc.stdin is not None and proc.stdout is not None
|
||||
init = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": rpc_id,
|
||||
"method": "initialize",
|
||||
"params": {
|
||||
"protocolVersion": "2025-03-26",
|
||||
"capabilities": {},
|
||||
"clientInfo": {"name": "iai-mcp-bridge-no-spawn-test", "version": "0.1.0"},
|
||||
},
|
||||
}
|
||||
proc.stdin.write((json.dumps(init) + "\n").encode("utf-8"))
|
||||
proc.stdin.flush()
|
||||
line = proc.stdout.readline()
|
||||
if not line:
|
||||
raise RuntimeError("wrapper closed stdout before initialize reply")
|
||||
resp = json.loads(line.decode("utf-8"))
|
||||
note = {"jsonrpc": "2.0", "method": "notifications/initialized"}
|
||||
proc.stdin.write((json.dumps(note) + "\n").encode("utf-8"))
|
||||
proc.stdin.flush()
|
||||
return resp
|
||||
|
||||
|
||||
def _call_memory_recall(
|
||||
proc: subprocess.Popen,
|
||||
cue: str,
|
||||
rpc_id: int = 2,
|
||||
*,
|
||||
timeout_sec: float = 10.0,
|
||||
) -> tuple[float, dict]:
|
||||
"""Send tools/call memory_recall + return (wall-clock-elapsed, response)."""
|
||||
assert proc.stdin is not None and proc.stdout is not None
|
||||
req = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": rpc_id,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "memory_recall",
|
||||
"arguments": {"cue": cue, "budget_tokens": 100},
|
||||
},
|
||||
}
|
||||
t0 = time.monotonic()
|
||||
proc.stdin.write((json.dumps(req) + "\n").encode("utf-8"))
|
||||
proc.stdin.flush()
|
||||
import select
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
line = b""
|
||||
while time.monotonic() < deadline:
|
||||
readable, _, _ = select.select([proc.stdout], [], [], 0.5)
|
||||
if readable:
|
||||
line = proc.stdout.readline()
|
||||
break
|
||||
elapsed = time.monotonic() - t0
|
||||
if not line:
|
||||
raise RuntimeError(
|
||||
f"no response within {timeout_sec}s "
|
||||
f"(stderr: {proc.stderr.read1(2000) if proc.stderr else b'?'!r})"
|
||||
)
|
||||
return elapsed, json.loads(line.decode("utf-8"))
|
||||
|
||||
|
||||
def _wait_for_daemon_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool:
|
||||
"""Poll for sock_path existence at 0.1s cadence; True on bind."""
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
while time.monotonic() < deadline:
|
||||
if sock_path.exists():
|
||||
return True
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests — contract: wrappers are pure connectors, no spawn.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_start_throws_DaemonUnreachableError_when_socket_missing(
|
||||
built_wrapper, tmp_path
|
||||
):
|
||||
"""Phase 7.1 + mcp-tools-list-empty-cache (2026-05-02): with no daemon
|
||||
on the test socket, the wrapper MUST stay alive and MUST serve
|
||||
tools/list from the static registry within an MCP-client-friendly
|
||||
timeout. tools/call MUST surface daemon_unreachable as an isError
|
||||
response (fail-loud at the right layer).
|
||||
|
||||
History (this is the same test slot — replaces the pre-2026-05-02
|
||||
contract that asserted "wrapper exits non-zero on daemon miss"):
|
||||
- Pre-fix the wrapper had a top-level `await bridge.start()` BEFORE
|
||||
`server.connect(transport)`. On a missing/slow daemon socket the
|
||||
Node process either exited non-zero (after 5s timeout) OR — the
|
||||
bug being fixed — replied to MCP `initialize` after a long delay
|
||||
with no tools/list ever cached, making `mcp__iai-mcp__*` invisible
|
||||
for the entire client session. Old assertion 1 (non-zero exit) and
|
||||
assertion 2 (DaemonUnreachableError on stderr) encoded the
|
||||
consequence of that ordering, not the architectural contract.
|
||||
- Post-fix `server.connect(transport)` runs FIRST; bridge.start()
|
||||
is fire-and-forget; tools/list is independent of daemon state;
|
||||
tools/call lazy-awaits bridge readiness and surfaces
|
||||
daemon_unreachable as a structured tool-result error. This is
|
||||
strictly better — Claude Code's "Connected" status now matches
|
||||
reality (transport IS connected), and daemon-down failures are
|
||||
actionable per-call instead of opaque registry-empty.
|
||||
|
||||
The load-bearing invariant — `daemon_delta == 0` — is
|
||||
UNCHANGED and asserted here exactly as before. The wrapper still
|
||||
must NOT spawn the daemon under any condition.
|
||||
"""
|
||||
sock_dir = Path(f"/tmp/iai-7.1-noconn-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
store_dir = sock_dir / "store"
|
||||
store_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Verify clean state — no socket file at our tmp path.
|
||||
assert not sock_path.exists(), f"tmp socket pre-exists: {sock_path}"
|
||||
|
||||
# Baseline snapshot. The user's production daemon may exist on the
|
||||
# host (different socket path); we count globally and assert delta.
|
||||
baseline = _count_iai_mcp_processes()
|
||||
daemon_baseline = baseline["daemon"]
|
||||
core_baseline = baseline["core"]
|
||||
|
||||
env_overrides = {
|
||||
"IAI_DAEMON_SOCKET_PATH": str(sock_path),
|
||||
"IAI_MCP_STORE": str(store_dir),
|
||||
}
|
||||
wrapper_proc = _spawn_wrapper(built_wrapper, env_overrides)
|
||||
try:
|
||||
# ---- Assertion 1 (NEW contract): wrapper survives daemon miss ----
|
||||
# Wait past the bridge's 5s connectWithTimeout window (and a
|
||||
# generous slack for the fire-and-forget rejection to land in
|
||||
# the .catch handler). Wrapper MUST still be alive — its job
|
||||
# is to serve tools/list to MCP clients regardless of daemon
|
||||
# state.
|
||||
init_resp = _initialize(wrapper_proc, rpc_id=1)
|
||||
assert "result" in init_resp, f"initialize failed: {init_resp}"
|
||||
|
||||
# tools/list — must respond from static registry within the
|
||||
# MCP-client tools/list timeout window (~3s observed; we allow
|
||||
# 4s for CI overhead).
|
||||
list_req = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "tools/list",
|
||||
"params": {},
|
||||
}
|
||||
wrapper_proc.stdin.write((json.dumps(list_req) + "\n").encode("utf-8"))
|
||||
wrapper_proc.stdin.flush()
|
||||
list_t0 = time.monotonic()
|
||||
line = wrapper_proc.stdout.readline()
|
||||
list_elapsed = time.monotonic() - list_t0
|
||||
assert line, "wrapper closed stdout before tools/list reply"
|
||||
list_resp = json.loads(line.decode("utf-8"))
|
||||
assert "result" in list_resp, f"tools/list error: {list_resp}"
|
||||
tools = list_resp["result"]["tools"]
|
||||
names = {t["name"] for t in tools}
|
||||
assert len(names) == 12, (
|
||||
f"tools/list returned {len(names)} tools, expected 12. "
|
||||
f"names={sorted(names)}"
|
||||
)
|
||||
assert list_elapsed < 4.0, (
|
||||
f"tools/list took {list_elapsed:.2f}s with no daemon — "
|
||||
f"regression: wrapper is blocking server.connect on "
|
||||
f"bridge.start (the mcp-tools-list-empty-cache bug)."
|
||||
)
|
||||
|
||||
# ---- Assertion 2 (NEW contract): wait past bridge timeout ----
|
||||
# 5s SOCKET_CONNECT_TIMEOUT_MS in bridge.ts means the in-flight
|
||||
# bridge.start() promise rejects ~5s after wrapper boot. The
|
||||
# `.catch(() => {})` on the fire-and-forget chain in index.ts
|
||||
# MUST swallow this rejection — wrapper must remain alive.
|
||||
# 7s budget = 5s timeout + 2s slack for slow Node startup.
|
||||
time.sleep(7.0)
|
||||
assert wrapper_proc.poll() is None, (
|
||||
f"wrapper exited (rc={wrapper_proc.returncode}) past the "
|
||||
f"5s bridge connect timeout — fire-and-forget bridge.start "
|
||||
f"chain is leaking the rejection. The .catch(() => {{}}) on "
|
||||
f"the top-level chain in index.ts must absorb "
|
||||
f"DaemonUnreachableError."
|
||||
)
|
||||
|
||||
# ---- Assertion 3 (fail-loud at right layer): tools/call surfaces error ----
|
||||
# Daemon-down failures must NOT be silent. Pre-fix the symptom
|
||||
# was an empty tools list (silent). Post-fix the wrapper serves
|
||||
# tools/list, but tools/call MUST return an error envelope so
|
||||
# the user sees what happened.
|
||||
call_req = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 3,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "memory_recall",
|
||||
"arguments": {"cue": "no-daemon test"},
|
||||
},
|
||||
}
|
||||
wrapper_proc.stdin.write((json.dumps(call_req) + "\n").encode("utf-8"))
|
||||
wrapper_proc.stdin.flush()
|
||||
# bridge.start() lazy-await inside the call handler will hit
|
||||
# the 5s connect timeout again. Allow 7s.
|
||||
import select as _select
|
||||
deadline = time.monotonic() + 12.0
|
||||
call_line = b""
|
||||
while time.monotonic() < deadline:
|
||||
readable, _, _ = _select.select([wrapper_proc.stdout], [], [], 0.5)
|
||||
if readable:
|
||||
call_line = wrapper_proc.stdout.readline()
|
||||
break
|
||||
assert call_line, "wrapper did not respond to tools/call within 12s"
|
||||
call_resp = json.loads(call_line.decode("utf-8"))
|
||||
assert "result" in call_resp, f"tools/call missing result: {call_resp}"
|
||||
result = call_resp["result"]
|
||||
# The wrapper renders bridge errors as content with isError=True
|
||||
# (see CallToolRequestSchema handler in index.ts); some legacy
|
||||
# paths use the JSON-RPC `error` envelope. Either is acceptable
|
||||
# — what's NOT acceptable is silent success.
|
||||
is_error = result.get("isError") is True
|
||||
content_text = ""
|
||||
if isinstance(result.get("content"), list) and result["content"]:
|
||||
content_text = result["content"][0].get("text", "") or ""
|
||||
assert is_error or "daemon_unreachable" in content_text.lower() \
|
||||
or "daemonunreachable" in content_text.lower(), (
|
||||
f"tools/call did NOT surface daemon_unreachable when daemon "
|
||||
f"is missing — fail-loud invariant violated. result={result}"
|
||||
)
|
||||
|
||||
# ---- Assertion 4 (UNCHANGED invariant): no spawn ----
|
||||
# Allow ≤1.5s for any (hypothetically) spawned-but-detached
|
||||
# daemon to surface in psutil.
|
||||
time.sleep(1.0)
|
||||
after = _count_iai_mcp_processes()
|
||||
daemon_delta = after["daemon"] - daemon_baseline
|
||||
assert daemon_delta == 0, (
|
||||
f"REGRESSION: wrapper spawned {daemon_delta} new iai_mcp.daemon "
|
||||
f"process(es) (baseline={daemon_baseline}, after={after['daemon']}). "
|
||||
f"Phase 7.1 wrappers MUST NOT spawn the daemon — the spawn-fallback "
|
||||
f"chain in bridge.ts has been re-introduced."
|
||||
)
|
||||
core_delta = after["core"] - core_baseline
|
||||
assert core_delta == 0, (
|
||||
f"wrapper spawned {core_delta} iai_mcp.core process(es) "
|
||||
f"(baseline={core_baseline}, after={after['core']})"
|
||||
)
|
||||
finally:
|
||||
if wrapper_proc.poll() is None:
|
||||
try:
|
||||
wrapper_proc.terminate()
|
||||
wrapper_proc.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
wrapper_proc.kill()
|
||||
_kill_test_daemons(sock_path)
|
||||
time.sleep(0.3)
|
||||
try:
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def test_start_succeeds_with_warm_daemon_no_extra_spawn(built_wrapper, tmp_path):
|
||||
"""R2 happy path: with a daemon ALREADY running on the test
|
||||
socket (started manually by the test, mimicking what launchd does
|
||||
in production), the wrapper must connect successfully, complete
|
||||
the MCP initialize handshake, run a memory_recall round-trip, AND
|
||||
NOT spawn a second daemon.
|
||||
|
||||
This proves:
|
||||
(a) bridge.ts:start() still works against a warm socket
|
||||
(no regression in the connect path).
|
||||
(b) The wrapper does NOT spawn a second daemon when one already
|
||||
exists (the singleton property — though in 7.1 this is
|
||||
trivially true because the spawn code is GONE).
|
||||
"""
|
||||
sock_dir = Path(f"/tmp/iai-7.1-warm-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
store_dir = sock_dir / "store"
|
||||
store_dir.mkdir(parents=True, exist_ok=True)
|
||||
assert not sock_path.exists()
|
||||
|
||||
# Pre-start a daemon manually (mimics launchd socket-activated spawn
|
||||
# in production; in tests we use the manual-run code path per
|
||||
# D7.1-09 backward compat).
|
||||
daemon_proc = _spawn_daemon_in_background(sock_path, store_dir)
|
||||
try:
|
||||
# Wait for the daemon to bind. Cold-start (bge-small load +
|
||||
# LanceDB open + asyncio.start_unix_server) is empirically
|
||||
# 3-10s on macOS.
|
||||
assert _wait_for_daemon_socket(sock_path, timeout_sec=30.0), (
|
||||
f"daemon did not bind socket {sock_path} within 30s"
|
||||
)
|
||||
|
||||
# Snapshot AFTER daemon is up but BEFORE wrapper spawns. Any
|
||||
# new daemon during wrapper boot = singleton-violation regression.
|
||||
baseline = _count_iai_mcp_processes()
|
||||
daemon_baseline = baseline["daemon"]
|
||||
core_baseline = baseline["core"]
|
||||
|
||||
env_overrides = {
|
||||
"IAI_DAEMON_SOCKET_PATH": str(sock_path),
|
||||
"IAI_MCP_STORE": str(store_dir),
|
||||
}
|
||||
wrapper_proc = _spawn_wrapper(built_wrapper, env_overrides)
|
||||
try:
|
||||
# MCP initialize handshake — wrapper must connect to the
|
||||
# warm daemon and reply.
|
||||
init_resp = _initialize(wrapper_proc, rpc_id=1)
|
||||
assert "result" in init_resp, f"initialize failed: {init_resp}"
|
||||
|
||||
# memory_recall round-trip — proves the JSON-RPC wire path
|
||||
# over the socket works end-to-end.
|
||||
elapsed, recall_resp = _call_memory_recall(
|
||||
wrapper_proc, cue="phase 7.1 warm-daemon test",
|
||||
rpc_id=2, timeout_sec=10.0,
|
||||
)
|
||||
# Either a result (recall hit/miss) or an error envelope is
|
||||
# acceptable — what we care about is that JSON-RPC came back.
|
||||
assert "result" in recall_resp or "error" in recall_resp, recall_resp
|
||||
|
||||
# Round-trip should be sub-second on a warm daemon. Generous
|
||||
# 2s budget against test-harness overhead (subprocess startup,
|
||||
# MCP handshake jitter); the SPEC A6 250ms budget is verified
|
||||
# in Wave 6 acceptance against the production daemon.
|
||||
assert elapsed < 2.0, (
|
||||
f"warm-daemon memory_recall took {elapsed:.2f}s, exceeds "
|
||||
f"2.0s safety budget"
|
||||
)
|
||||
|
||||
# Allow ≤1s for any (hypothetically) spawned daemon to surface.
|
||||
time.sleep(0.5)
|
||||
after = _count_iai_mcp_processes()
|
||||
|
||||
# No new daemon — singleton property holds (trivially in 7.1
|
||||
# because the spawn code is gone).
|
||||
daemon_delta = after["daemon"] - daemon_baseline
|
||||
assert daemon_delta == 0, (
|
||||
f"REGRESSION: wrapper spawned a second daemon during boot "
|
||||
f"(baseline={daemon_baseline}, after={after['daemon']}, "
|
||||
f"delta={daemon_delta}). wrappers MUST be pure "
|
||||
f"connectors."
|
||||
)
|
||||
core_delta = after["core"] - core_baseline
|
||||
assert core_delta == 0, (
|
||||
f"wrapper spawned iai_mcp.core (delta={core_delta})"
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
wrapper_proc.terminate()
|
||||
wrapper_proc.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
wrapper_proc.kill()
|
||||
finally:
|
||||
# Stop the test daemon (we started it; we stop it).
|
||||
try:
|
||||
daemon_proc.terminate()
|
||||
daemon_proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
daemon_proc.kill()
|
||||
_kill_test_daemons(sock_path)
|
||||
time.sleep(0.3)
|
||||
try:
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
175
tests/test_camouflaging_detection.py
Normal file
175
tests/test_camouflaging_detection.py
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
"""Plan 03-03 Task 1 RED + Task 2 GREEN — camouflaging detector.
|
||||
|
||||
Constitutional guard: detector observes user SURFACE formality trajectory (D-AUTIST13-01,
|
||||
D-AUTIST13-03). When an over-formal sliding-5 weekly trajectory is confirmed, the system
|
||||
adjusts OUR register (D-AUTIST13-02) — never pushes the user to change. Masking
|
||||
modeling is forbidden (Cook 2021 / Raymaker 2020).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
def _seed_weekly_scores(store, values: list[float]) -> None:
|
||||
"""Seed N formality_score_weekly events with given score sequence."""
|
||||
base = datetime.now(timezone.utc) - timedelta(days=7 * len(values))
|
||||
for i, v in enumerate(values):
|
||||
write_event(
|
||||
store,
|
||||
kind="formality_score_weekly",
|
||||
data={
|
||||
"score": float(v),
|
||||
"lang": "en",
|
||||
"week_iso": (base + timedelta(days=7 * i)).isoformat(),
|
||||
"samples": 10,
|
||||
},
|
||||
severity="info",
|
||||
)
|
||||
|
||||
|
||||
# ------------------------------------------------------------- detector
|
||||
def test_detect_camouflaging_rising_trajectory(tmp_path):
|
||||
"""Slope > 0.05 and mean > 0.6 on the last 5 weekly scores -> detected."""
|
||||
from iai_mcp.camouflaging import detect_camouflaging
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_weekly_scores(store, [0.4, 0.55, 0.65, 0.75, 0.85])
|
||||
result = detect_camouflaging(store)
|
||||
assert result["detected"] is True
|
||||
assert result["trajectory_slope"] > 0.05
|
||||
assert result["current_mean"] > 0.6
|
||||
|
||||
|
||||
def test_detect_camouflaging_flat_trajectory(tmp_path):
|
||||
"""Flat scores at 0.5 -> not detected (slope ~ 0, mean ~ 0.5)."""
|
||||
from iai_mcp.camouflaging import detect_camouflaging
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_weekly_scores(store, [0.5, 0.5, 0.5, 0.5, 0.5])
|
||||
result = detect_camouflaging(store)
|
||||
assert result["detected"] is False
|
||||
|
||||
|
||||
def test_detect_camouflaging_insufficient_samples(tmp_path):
|
||||
"""Less than window_size samples -> not detected."""
|
||||
from iai_mcp.camouflaging import detect_camouflaging
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_weekly_scores(store, [0.3, 0.5])
|
||||
result = detect_camouflaging(store)
|
||||
assert result["detected"] is False
|
||||
assert result["sample_count"] == 2
|
||||
|
||||
|
||||
def test_detect_camouflaging_high_mean_but_flat_no_detect(tmp_path):
|
||||
"""Mean > 0.6 but slope ~ 0 -> not detected (needs BOTH conditions)."""
|
||||
from iai_mcp.camouflaging import detect_camouflaging
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_weekly_scores(store, [0.7, 0.7, 0.7, 0.7, 0.7])
|
||||
result = detect_camouflaging(store)
|
||||
assert result["detected"] is False # no slope
|
||||
|
||||
|
||||
def test_detect_camouflaging_rising_but_low_mean_no_detect(tmp_path):
|
||||
"""Rising but mean stays under 0.6 -> not detected."""
|
||||
from iai_mcp.camouflaging import detect_camouflaging
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_weekly_scores(store, [0.1, 0.15, 0.2, 0.3, 0.4])
|
||||
result = detect_camouflaging(store)
|
||||
assert result["detected"] is False
|
||||
|
||||
|
||||
# ------------------------------------------------------------- weekly pass
|
||||
def test_run_weekly_pass_emits_events_and_bumps_knob(tmp_path):
|
||||
"""On detected trajectory: emits camouflaging_detected + register_relaxed, bumps knob."""
|
||||
from iai_mcp.camouflaging import run_weekly_pass
|
||||
from iai_mcp.profile import profile_get
|
||||
|
||||
# Reset the per-process profile state so we start at 0.0 regardless of earlier tests.
|
||||
import iai_mcp.core as core
|
||||
core._profile_state["camouflaging_relaxation"] = 0.0
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_weekly_scores(store, [0.4, 0.55, 0.65, 0.75, 0.85])
|
||||
run_weekly_pass(store)
|
||||
|
||||
detected = query_events(store, kind="camouflaging_detected", limit=5)
|
||||
relaxed = query_events(store, kind="register_relaxed", limit=5)
|
||||
assert len(detected) >= 1
|
||||
assert len(relaxed) >= 1
|
||||
|
||||
# Knob moved up from 0.0.
|
||||
value = core._profile_state["camouflaging_relaxation"]
|
||||
assert value > 0.0
|
||||
|
||||
|
||||
def test_run_weekly_pass_flat_no_events(tmp_path):
|
||||
"""Flat trajectory -> no camouflaging_detected / register_relaxed events."""
|
||||
from iai_mcp.camouflaging import run_weekly_pass
|
||||
|
||||
import iai_mcp.core as core
|
||||
core._profile_state["camouflaging_relaxation"] = 0.0
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_weekly_scores(store, [0.5, 0.5, 0.5, 0.5, 0.5])
|
||||
run_weekly_pass(store)
|
||||
|
||||
detected = query_events(store, kind="camouflaging_detected", limit=5)
|
||||
relaxed = query_events(store, kind="register_relaxed", limit=5)
|
||||
assert detected == []
|
||||
assert relaxed == []
|
||||
assert core._profile_state["camouflaging_relaxation"] == 0.0
|
||||
|
||||
|
||||
# ------------------------------------------------------------- record + relax
|
||||
def test_record_user_formality_writes_weekly_event(tmp_path):
|
||||
"""record_user_formality emits a formality_score_weekly event."""
|
||||
from iai_mcp.camouflaging import record_user_formality
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
record_user_formality(
|
||||
store,
|
||||
"The proposal is, therefore, accepted.",
|
||||
"en",
|
||||
)
|
||||
events = query_events(store, kind="formality_score_weekly", limit=5)
|
||||
assert len(events) == 1
|
||||
assert "score" in events[0]["data"]
|
||||
assert 0.0 <= events[0]["data"]["score"] <= 1.0
|
||||
|
||||
|
||||
def test_relax_register_bumps_and_emits(tmp_path):
|
||||
"""relax_register increments knob + writes register_relaxed event."""
|
||||
from iai_mcp.camouflaging import relax_register
|
||||
|
||||
import iai_mcp.core as core
|
||||
core._profile_state["camouflaging_relaxation"] = 0.0
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
relax_register(store, delta=0.25)
|
||||
assert abs(core._profile_state["camouflaging_relaxation"] - 0.25) < 1e-9
|
||||
|
||||
events = query_events(store, kind="register_relaxed", limit=5)
|
||||
assert len(events) == 1
|
||||
assert abs(events[0]["data"]["delta"] - 0.25) < 1e-9
|
||||
assert abs(events[0]["data"]["from"] - 0.0) < 1e-9
|
||||
assert abs(events[0]["data"]["to"] - 0.25) < 1e-9
|
||||
|
||||
|
||||
def test_relax_register_caps_at_one(tmp_path):
|
||||
"""Knob stays within [0, 1] even with oversized deltas."""
|
||||
from iai_mcp.camouflaging import relax_register
|
||||
|
||||
import iai_mcp.core as core
|
||||
core._profile_state["camouflaging_relaxation"] = 0.95
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
relax_register(store, delta=0.5)
|
||||
assert core._profile_state["camouflaging_relaxation"] == 1.0
|
||||
207
tests/test_capture_dedup_contract.py
Normal file
207
tests/test_capture_dedup_contract.py
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
"""Phase 07.11 Plan 01 / — `memory_capture` dedup contract.
|
||||
|
||||
These four regression tests are the executable specification for D-01:
|
||||
|
||||
* `test_query_similar_accepts_tier_kwarg` — `query_similar` must accept a
|
||||
`tier` kwarg, must filter at the LanceDB where-layer when it is given, and
|
||||
must `ValueError` BEFORE any I/O on bad tier values.
|
||||
* `test_capture_turn_dedups_on_high_cos_match` — capturing the same cue twice
|
||||
yields one inserted + one reinforced; the dedup branch is reachable.
|
||||
* `test_capture_turn_inserts_on_low_cos` — distinct cues both insert; no
|
||||
false dedup.
|
||||
* `test_reinforce_record_increments_edge_weight` — the new
|
||||
`store.reinforce_record` typed wrapper is a thin `boost_edges` delegate
|
||||
whose self-loop weight increases monotonically across calls.
|
||||
|
||||
Honesty constraint: every test below MUST fail on `git stash` of the
|
||||
plan's source diffs and pass on `git stash pop`. RED-witness ran 2026-04-30
|
||||
on un-fixed source: tier-kwarg + reinforce_record cases TypeError before the
|
||||
fix; dedup cases fail because the dedup branch is unreachable dead code.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.capture import capture_turn
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- fixtures
|
||||
# Pattern copied verbatim from tests/test_pipeline_anti_hits_malformed.py:33-50
|
||||
# (`_isolated_keyring` autouse fixture is the project canon for tests touching
|
||||
# encrypted records on the construction host where the real keyring is absent
|
||||
# or hangs).
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||||
import keyring as _keyring
|
||||
|
||||
fake: dict[tuple[str, str], str] = {}
|
||||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||||
monkeypatch.setattr(
|
||||
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||||
)
|
||||
yield fake
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def store(tmp_path: Path) -> MemoryStore:
|
||||
return MemoryStore(path=tmp_path / "lancedb")
|
||||
|
||||
|
||||
def _make_record(
|
||||
rid: UUID,
|
||||
surface: str = "topic",
|
||||
*,
|
||||
tier: str = "episodic",
|
||||
embedding: list[float] | None = None,
|
||||
) -> MemoryRecord:
|
||||
"""Minimal-record helper. Mirrors the shape used in the sibling test file
|
||||
`test_pipeline_anti_hits_malformed.py:_make_record` so existing fixture
|
||||
expectations transfer exactly. Defaults to a deterministic seed embedding
|
||||
(`[0.1] * EMBED_DIM`) so multiple records made with this helper share a
|
||||
high-cosine neighbourhood (the dedup tests need that)."""
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=rid,
|
||||
tier=tier,
|
||||
literal_surface=surface,
|
||||
aaak_index="",
|
||||
embedding=list(embedding) if embedding is not None else [0.1] * EMBED_DIM,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[],
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- tests
|
||||
|
||||
|
||||
def test_query_similar_accepts_tier_kwarg(store):
|
||||
"""D-01 step 1: tier kwarg filters at the LanceDB where-layer.
|
||||
|
||||
Pre-fix: TypeError("got an unexpected keyword argument 'tier'").
|
||||
Post-fix: returns only episodic rows; bad tier values raise ValueError
|
||||
BEFORE any I/O.
|
||||
"""
|
||||
rid_e = uuid4()
|
||||
rid_s = uuid4()
|
||||
store.insert(_make_record(rid_e, "episodic-cue", tier="episodic"))
|
||||
store.insert(_make_record(rid_s, "semantic-cue", tier="semantic"))
|
||||
|
||||
embedding = [0.1] * EMBED_DIM
|
||||
out = store.query_similar(embedding, k=10, tier="episodic")
|
||||
ids = {r.id for r, _ in out}
|
||||
assert rid_e in ids, "episodic record should be returned by tier='episodic'"
|
||||
assert rid_s not in ids, "semantic record must be filtered out by tier='episodic'"
|
||||
|
||||
# Bad tier -> ValueError before any I/O.
|
||||
with pytest.raises(ValueError):
|
||||
store.query_similar(embedding, k=10, tier="bogus")
|
||||
|
||||
# Backwards-compat: tier=None preserves the legacy behaviour (both rows
|
||||
# are returned by the cosine query, no where-clause applied).
|
||||
out_none = store.query_similar(embedding, k=10, tier=None)
|
||||
ids_none = {r.id for r, _ in out_none}
|
||||
assert rid_e in ids_none and rid_s in ids_none
|
||||
|
||||
|
||||
def test_capture_turn_dedups_on_high_cos_match(store):
|
||||
"""D-01 step 3: second capture of identical cue -> reinforced, not inserted.
|
||||
|
||||
Pre-fix: dedup branch unreachable. Bug A (TypeError on tier kwarg) is
|
||||
swallowed by `except Exception`; `neighbours = []` so the loop never
|
||||
executes. Even if Bug A were fixed, Bug B (`getattr(n, "score", None)`
|
||||
on a tuple) returns None so the `if score is not None` guard never
|
||||
fires. Even if both A+B were fixed, Bug C (single-UUID list to
|
||||
boost_edges which expects pairs) crashes. Result: every capture inserts.
|
||||
|
||||
Post-fix: dedup branch is reachable; second call returns
|
||||
`status="reinforced"` and the episodic-record count stays at 1.
|
||||
"""
|
||||
text = "the user prefers Russian on the surface; English in storage"
|
||||
cue = "lang preference"
|
||||
|
||||
r1 = capture_turn(
|
||||
store=store, text=text, cue=cue, tier="episodic",
|
||||
session_id="s1", role="user",
|
||||
)
|
||||
assert r1["status"] == "inserted", f"first capture should insert, got {r1}"
|
||||
|
||||
r2 = capture_turn(
|
||||
store=store, text=text, cue=cue, tier="episodic",
|
||||
session_id="s1", role="user",
|
||||
)
|
||||
assert r2["status"] == "reinforced", f"second capture should reinforce, got {r2}"
|
||||
assert "cos=" in r2["reason"], f"reason should record cosine score, got {r2}"
|
||||
|
||||
# Record count remains 1 -- no duplicate inserted.
|
||||
rows = list(store.iter_records())
|
||||
assert len([r for r in rows if r.tier == "episodic"]) == 1
|
||||
|
||||
|
||||
def test_capture_turn_inserts_on_low_cos(store):
|
||||
"""distinct cues -> two inserts, no false dedup.
|
||||
|
||||
Asymmetric guard against an over-eager fix: if the dedup branch fires
|
||||
on EVERY capture (e.g. cos threshold misread), this test catches it.
|
||||
"""
|
||||
r1 = capture_turn(
|
||||
store=store, text="apples are red", cue="apple",
|
||||
tier="episodic", session_id="s1", role="user",
|
||||
)
|
||||
r2 = capture_turn(
|
||||
store=store,
|
||||
text="quantum chromodynamics describes the strong force",
|
||||
cue="qcd", tier="episodic", session_id="s1", role="user",
|
||||
)
|
||||
assert r1["status"] == "inserted", f"first insert expected, got {r1}"
|
||||
assert r2["status"] == "inserted", f"second insert expected, got {r2}"
|
||||
|
||||
rows = list(store.iter_records())
|
||||
assert len([r for r in rows if r.tier == "episodic"]) == 2
|
||||
|
||||
|
||||
def test_reinforce_record_increments_edge_weight(store):
|
||||
"""D-01 step 2: reinforce_record self-loop weight increases monotonically.
|
||||
|
||||
Pre-fix: AttributeError -- `reinforce_record` does not exist on store.
|
||||
Post-fix: the typed wrapper builds `[(rid, rid)]` and delegates to
|
||||
`boost_edges`; the canonical-pair coalescer at boost_edges:1244-1247
|
||||
produces the canonical `(str(rid), str(rid))` self-loop key, and the
|
||||
weight strictly increases on each successive call.
|
||||
"""
|
||||
rid = uuid4()
|
||||
store.insert(_make_record(rid, "anchor-record"))
|
||||
|
||||
w1 = store.reinforce_record(rid)
|
||||
w2 = store.reinforce_record(rid)
|
||||
|
||||
# Both calls return dict[(str, str), float] keyed by the canonical
|
||||
# sorted-self-loop pair.
|
||||
key = (str(rid), str(rid))
|
||||
assert key in w1, f"self-loop key missing from first call: {w1}"
|
||||
assert key in w2, f"self-loop key missing from second call: {w2}"
|
||||
assert w2[key] > w1[key], (
|
||||
f"weight must strictly increase across calls: w1={w1[key]} w2={w2[key]}"
|
||||
)
|
||||
428
tests/test_capture_queue.py
Normal file
428
tests/test_capture_queue.py
Normal file
|
|
@ -0,0 +1,428 @@
|
|||
"""Phase 10.2 Plan 10.2-01 Task 1.2 -- capture_queue.py test suite.
|
||||
|
||||
Covers atomic append (incl. crash simulation), 50-thread concurrent
|
||||
append, idempotent ingest with mid-handler crash, lock-skip semantics,
|
||||
overflow + audit log, verbatim Unicode round-trip, list_pending sort
|
||||
order, schema-version mismatch, empty-queue ingest, ULID lex<->time
|
||||
order, and lock-file cleanup on success/failure.
|
||||
|
||||
All tests use ``tmp_path`` -- no production queue at ``~/.iai-mcp/pending/``
|
||||
is touched.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.capture_queue import (
|
||||
DEFAULT_MAX_SIZE,
|
||||
SCHEMA_VERSION,
|
||||
CaptureQueue,
|
||||
CaptureQueueSchemaError,
|
||||
generate_ulid,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _sample_record(i: int = 0, surface: str | None = None) -> dict:
|
||||
"""Return a minimally valid record envelope dict."""
|
||||
return {
|
||||
"surface": surface if surface is not None else f"sample text {i}",
|
||||
"cue": f"cue {i}",
|
||||
"tier": "episodic",
|
||||
"session_id": "test-session",
|
||||
"role": "user",
|
||||
}
|
||||
|
||||
|
||||
def _write_envelope_directly(
|
||||
queue_dir: Path,
|
||||
ulid: str,
|
||||
record: dict,
|
||||
*,
|
||||
schema_version: int = SCHEMA_VERSION,
|
||||
appended_at: str = "2026-05-02T15:00:00+00:00",
|
||||
) -> Path:
|
||||
"""Bypass ``CaptureQueue.append`` to seed a pending file with custom fields."""
|
||||
path = queue_dir / f"pending-{ulid}.json"
|
||||
envelope = {
|
||||
"ulid": ulid,
|
||||
"appended_at": appended_at,
|
||||
"record": record,
|
||||
"schema_version": schema_version,
|
||||
}
|
||||
path.write_text(
|
||||
json.dumps(envelope, ensure_ascii=False, separators=(",", ":")),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. Basic append + file creation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_append_returns_ulid_and_creates_file(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
ulid = q.append(_sample_record(0))
|
||||
|
||||
assert isinstance(ulid, str)
|
||||
assert len(ulid) == 26
|
||||
final = tmp_path / f"pending-{ulid}.json"
|
||||
assert final.exists()
|
||||
|
||||
envelope = json.loads(final.read_text(encoding="utf-8"))
|
||||
assert envelope["ulid"] == ulid
|
||||
assert envelope["schema_version"] == SCHEMA_VERSION
|
||||
assert envelope["record"]["surface"] == "sample text 0"
|
||||
# appended_at is ISO-8601 parseable.
|
||||
from datetime import datetime
|
||||
datetime.fromisoformat(envelope["appended_at"])
|
||||
|
||||
assert q.pending_count() == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. Atomic append under simulated crash (os.replace patched to raise)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_append_atomic_under_crash_simulation(tmp_path, monkeypatch):
|
||||
"""If ``os.replace`` fails, no committed pending file appears.
|
||||
|
||||
The temp file may or may not be left around depending on where the
|
||||
failure happens; what matters is that ``pending_count`` stays 0
|
||||
because no ``pending-<ulid>.json`` was successfully published.
|
||||
"""
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
|
||||
real_replace = os.replace
|
||||
|
||||
def boom(src, dst):
|
||||
raise OSError(errno.EIO, "simulated crash mid-rename")
|
||||
|
||||
monkeypatch.setattr("iai_mcp.capture_queue.os.replace", boom)
|
||||
|
||||
with pytest.raises(OSError):
|
||||
q.append(_sample_record(0))
|
||||
|
||||
# No final pending file appeared.
|
||||
assert q.pending_count() == 0
|
||||
finals = list(tmp_path.glob("pending-*.json"))
|
||||
finals = [p for p in finals if not p.name.endswith(".tmp")]
|
||||
assert finals == []
|
||||
|
||||
# Restore + verify a real append still works.
|
||||
monkeypatch.setattr("iai_mcp.capture_queue.os.replace", real_replace)
|
||||
q.append(_sample_record(1))
|
||||
assert q.pending_count() == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. Concurrent append (50 threads * 10 records each = 500)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_concurrent_append_50_threads(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
n_threads = 50
|
||||
n_per_thread = 10
|
||||
errors: list[BaseException] = []
|
||||
ulids: list[str] = []
|
||||
ulids_lock = threading.Lock()
|
||||
|
||||
def worker(tid: int) -> None:
|
||||
try:
|
||||
local: list[str] = []
|
||||
for i in range(n_per_thread):
|
||||
ulid = q.append(_sample_record(i, f"thread-{tid}-record-{i}"))
|
||||
local.append(ulid)
|
||||
with ulids_lock:
|
||||
ulids.extend(local)
|
||||
except BaseException as exc: # pragma: no cover - surfaced via assertion
|
||||
errors.append(exc)
|
||||
|
||||
threads = [threading.Thread(target=worker, args=(t,)) for t in range(n_threads)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join(timeout=30)
|
||||
assert not t.is_alive(), "worker thread hung"
|
||||
|
||||
assert errors == [], f"workers raised: {errors!r}"
|
||||
assert len(ulids) == n_threads * n_per_thread
|
||||
# No ULID collisions.
|
||||
assert len(set(ulids)) == len(ulids)
|
||||
# Every committed file is well-formed JSON.
|
||||
pending = q.list_pending()
|
||||
assert len(pending) == n_threads * n_per_thread
|
||||
for p in pending:
|
||||
envelope = json.loads(p.read_text(encoding="utf-8"))
|
||||
assert envelope["schema_version"] == SCHEMA_VERSION
|
||||
assert envelope["record"]["surface"].startswith("thread-")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 4. Idempotent ingest -- crash mid-handler leaves both files, retry works
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_idempotent_ingest_crash_mid_handler(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
ulid = q.append(_sample_record(42, surface="payload-42"))
|
||||
|
||||
pending_path = tmp_path / f"pending-{ulid}.json"
|
||||
lock_path = tmp_path / f"pending-{ulid}.lock"
|
||||
|
||||
def crashing_handler(_record: dict) -> None:
|
||||
raise RuntimeError("handler exploded")
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
q.ingest_pending(crashing_handler)
|
||||
|
||||
# Both pending and lock remain on disk.
|
||||
assert pending_path.exists(), "pending file must remain after handler exception"
|
||||
assert lock_path.exists(), "lock file must remain to mark mid-flight crash"
|
||||
assert q.pending_count() == 1
|
||||
|
||||
# Retry with a clean handler -- should succeed.
|
||||
seen: list[dict] = []
|
||||
|
||||
def good_handler(record: dict) -> None:
|
||||
seen.append(record)
|
||||
|
||||
n = q.ingest_pending(good_handler)
|
||||
assert n == 1
|
||||
assert len(seen) == 1
|
||||
assert seen[0]["surface"] == "payload-42"
|
||||
# Both files cleaned up after success.
|
||||
assert not pending_path.exists()
|
||||
assert not lock_path.exists()
|
||||
assert q.pending_count() == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 5. Lock contention -- A held externally, B and C still ingest
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_idempotent_ingest_lock_skipped(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
ulid_a = q.append(_sample_record(1, surface="A"))
|
||||
ulid_b = q.append(_sample_record(2, surface="B"))
|
||||
ulid_c = q.append(_sample_record(3, surface="C"))
|
||||
|
||||
# Externally lock A's lock file in non-blocking exclusive mode.
|
||||
lock_a = tmp_path / f"pending-{ulid_a}.lock"
|
||||
fd = os.open(str(lock_a), os.O_WRONLY | os.O_CREAT, 0o600)
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
|
||||
seen: list[str] = []
|
||||
|
||||
def handler(record: dict) -> None:
|
||||
seen.append(record["surface"])
|
||||
|
||||
n = q.ingest_pending(handler)
|
||||
# B and C ingested; A skipped because we hold its lock.
|
||||
assert n == 2
|
||||
assert sorted(seen) == ["B", "C"]
|
||||
# A still pending.
|
||||
assert (tmp_path / f"pending-{ulid_a}.json").exists()
|
||||
assert not (tmp_path / f"pending-{ulid_b}.json").exists()
|
||||
assert not (tmp_path / f"pending-{ulid_c}.json").exists()
|
||||
finally:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
os.close(fd)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 6. Overflow -- exceed max, oldest pruned, audit log populated
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_overflow_prune_oldest(tmp_path):
|
||||
"""At ``max_size=100``, 110 appends end with count=99 (max-100 headroom)
|
||||
and 11 audit entries (10 over + 1 to descend below max).
|
||||
|
||||
The exact post-prune count is ``max_size - 100`` because the prune
|
||||
batch headroom in capture_queue is 100. With ``max_size=100`` the
|
||||
target is therefore 0; the actual pruned count equals the excess at
|
||||
the moment of first overflow plus subsequent appends that re-trigger
|
||||
overflow.
|
||||
|
||||
The deterministic invariants are:
|
||||
|
||||
1. Final ``pending_count`` <= ``max_size``.
|
||||
2. Total appends == kept + dropped.
|
||||
3. Audit log has exactly ``dropped`` JSONL lines, all with
|
||||
reason="queue_overflow" and a known ULID.
|
||||
"""
|
||||
max_size = 100
|
||||
n_total = 110
|
||||
q = CaptureQueue(queue_dir=tmp_path, max_size=max_size)
|
||||
|
||||
appended_ulids: list[str] = []
|
||||
for i in range(n_total):
|
||||
appended_ulids.append(q.append(_sample_record(i)))
|
||||
|
||||
final_count = q.pending_count()
|
||||
assert final_count <= max_size
|
||||
|
||||
audit_path = tmp_path / ".overflow-audit.log"
|
||||
assert audit_path.exists(), "audit log must exist after overflow"
|
||||
|
||||
audit_lines = audit_path.read_text(encoding="utf-8").splitlines()
|
||||
audit_records = [json.loads(line) for line in audit_lines if line]
|
||||
|
||||
dropped = n_total - final_count
|
||||
assert dropped > 0, "at least one record must have been dropped on overflow"
|
||||
assert len(audit_records) == dropped, (
|
||||
f"expected {dropped} audit entries, got {len(audit_records)}"
|
||||
)
|
||||
for rec in audit_records:
|
||||
assert rec["reason"] == "queue_overflow"
|
||||
assert rec["dropped_ulid"] in appended_ulids
|
||||
assert isinstance(rec["queue_size_before_prune"], int)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 7. Verbatim round-trip -- Russian + English + emoji + Greek + symbols
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_verbatim_round_trip_unicode(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
payload = "Привет, world! 🧠 Δ ∑ — combining é vs é"
|
||||
|
||||
q.append(_sample_record(0, surface=payload))
|
||||
seen: list[str] = []
|
||||
|
||||
def handler(record: dict) -> None:
|
||||
seen.append(record["surface"])
|
||||
|
||||
n = q.ingest_pending(handler)
|
||||
assert n == 1
|
||||
assert len(seen) == 1
|
||||
# Byte-identical surface preserved through JSON encode + decode.
|
||||
assert seen[0] == payload
|
||||
assert seen[0].encode("utf-8") == payload.encode("utf-8")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 8. list_pending sort order is oldest-first
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_list_pending_sort_order(tmp_path):
|
||||
"""ULIDs are time-sorted by construction; listing them sorted by name
|
||||
must yield the same order in which they were appended.
|
||||
"""
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
ulids = [q.append(_sample_record(i)) for i in range(20)]
|
||||
listed = [q._ulid_from_path(p) for p in q.list_pending()]
|
||||
assert listed == ulids, "list_pending must be oldest-first"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 9. Schema-version mismatch raises CaptureQueueSchemaError
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_schema_version_mismatch_raises(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
_write_envelope_directly(
|
||||
tmp_path,
|
||||
ulid="01HZQTESTBADSCHEMA00000000",
|
||||
record=_sample_record(0),
|
||||
schema_version=99,
|
||||
)
|
||||
assert q.pending_count() == 1
|
||||
|
||||
def handler(_record: dict) -> None: # pragma: no cover -- never called
|
||||
pytest.fail("handler must not be called on schema mismatch")
|
||||
|
||||
with pytest.raises(CaptureQueueSchemaError) as excinfo:
|
||||
q.ingest_pending(handler)
|
||||
assert "schema_version" in str(excinfo.value)
|
||||
assert "99" in str(excinfo.value)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 10. Empty queue -- ingest returns 0, no errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_empty_queue_ingest_returns_zero(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
assert q.pending_count() == 0
|
||||
|
||||
handler_called = [False]
|
||||
|
||||
def handler(_record: dict) -> None: # pragma: no cover -- never called
|
||||
handler_called[0] = True
|
||||
|
||||
n = q.ingest_pending(handler)
|
||||
assert n == 0
|
||||
assert handler_called[0] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 11. ULID lex sort matches generation/time order over many samples
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_ulid_lexicographic_sort_matches_time_order():
|
||||
"""Generate 1000 ULIDs as fast as possible; their natural string sort
|
||||
must equal generation order. The internal monotonic guard guarantees
|
||||
this even when many ULIDs collide on the same wall-clock millisecond.
|
||||
"""
|
||||
n = 1000
|
||||
ulids = [generate_ulid() for _ in range(n)]
|
||||
assert len(set(ulids)) == n, "no ULID collisions allowed"
|
||||
assert sorted(ulids) == ulids, "lex sort must equal generation order"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 12. Lock file cleaned up on handler success
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_lock_file_cleanup_on_handler_success(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
ulid = q.append(_sample_record(0))
|
||||
lock_path = tmp_path / f"pending-{ulid}.lock"
|
||||
|
||||
def handler(_record: dict) -> None:
|
||||
# While the handler runs, the lock file IS on disk -- but we
|
||||
# cannot easily inspect that without breaking the lock owner
|
||||
# invariant. The post-condition is what matters here.
|
||||
pass
|
||||
|
||||
n = q.ingest_pending(handler)
|
||||
assert n == 1
|
||||
assert not lock_path.exists(), "lock file must be cleaned on success"
|
||||
assert not (tmp_path / f"pending-{ulid}.json").exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 13. Lock file persists on handler exception (mid-flight crash marker)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_lock_file_persists_on_handler_exception(tmp_path):
|
||||
q = CaptureQueue(queue_dir=tmp_path)
|
||||
ulid = q.append(_sample_record(0))
|
||||
pending_path = tmp_path / f"pending-{ulid}.json"
|
||||
lock_path = tmp_path / f"pending-{ulid}.lock"
|
||||
|
||||
def handler(_record: dict) -> None:
|
||||
raise ValueError("simulated mid-handler crash")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
q.ingest_pending(handler)
|
||||
|
||||
assert pending_path.exists(), "pending must remain after handler exception"
|
||||
assert lock_path.exists(), "lock must remain to mark mid-flight crash"
|
||||
332
tests/test_capture_transcript_no_spawn.py
Normal file
332
tests/test_capture_transcript_no_spawn.py
Normal file
|
|
@ -0,0 +1,332 @@
|
|||
"""Phase 7.1 Plan 05 / R3 acceptance — `iai-mcp capture-transcript --no-spawn`.
|
||||
|
||||
Eliminates the third spawn vector from forensic anomaly #3 (Stop-hook
|
||||
spawning iai_mcp.daemon under N-session race). When 3 Claude sessions close
|
||||
within seconds, 3 hooks each fire `iai-mcp capture-transcript --no-spawn`;
|
||||
ZERO daemons get spawned. Each invocation either (a) talks to the existing
|
||||
daemon if one is up, or (b) writes a JSONL deferral file and exits 0 within
|
||||
2s. The hook never blocks session teardown.
|
||||
|
||||
This module covers:
|
||||
- Test A: writes deferred file when daemon is unreachable
|
||||
- Test B: completes in under 2s wall-clock (R3 budget)
|
||||
- Test C: spawns ZERO new iai_mcp.* processes
|
||||
- Test D: --no-spawn surfaces in --help; default (no flag) keeps Phase 6
|
||||
behavior (exit 0 + stdout JSON, no deferred file)
|
||||
- Test E: deferred JSONL v1 header + per-turn event lines (D7.1-04)
|
||||
- Test F: missing transcript -> header-only file, no exception
|
||||
|
||||
Test isolation:
|
||||
- HOME=tmp_path so `Path.home()` resolves to a fresh dir; the user's
|
||||
real ~/.iai-mcp/.deferred-captures/ is never touched.
|
||||
- IAI_DAEMON_SOCKET_PATH=/tmp/iai-no-spawn-<pid>-<n>/d.sock so the
|
||||
250ms socket probe never hits the user's real daemon.
|
||||
- Subprocess invocation: `[sys.executable, '-m', 'iai_mcp.cli', ...]`
|
||||
with PYTHONPATH set; we don't depend on the `iai-mcp` console script
|
||||
being on PATH (test_socket_subagent_reuse.py:115-116 pattern).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
|
||||
REPO = Path(__file__).resolve().parent.parent
|
||||
|
||||
# POSIX-only: subprocess + AF_UNIX socket probe; fork-style daemon counts.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
platform.system() == "Windows",
|
||||
reason="POSIX subprocess + AF_UNIX",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers (copied from test_socket_subagent_reuse.py to keep this module
|
||||
# standalone — that test owns the canonical pattern, but cross-importing
|
||||
# would couple two unrelated test modules).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _count_iai_mcp_processes() -> dict[str, int]:
|
||||
"""Snapshot iai_mcp.core / iai_mcp.daemon process counts on host."""
|
||||
counts = {"core": 0, "daemon": 0}
|
||||
for p in psutil.process_iter(["cmdline"]):
|
||||
try:
|
||||
cl = p.info.get("cmdline") or []
|
||||
if not cl:
|
||||
continue
|
||||
joined = " ".join(c or "" for c in cl)
|
||||
if "iai_mcp.core" in joined:
|
||||
counts["core"] += 1
|
||||
if "iai_mcp.daemon" in joined:
|
||||
counts["daemon"] += 1
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
return counts
|
||||
|
||||
|
||||
def _isolated_env(tmp_path: Path) -> tuple[dict[str, str], Path]:
|
||||
"""Build env that isolates HOME + socket path to tmp_path. Returns
|
||||
(env_dict, deferred_dir). Forces the keyring fail-backend so any
|
||||
accidental MemoryStore() doesn't prompt the macOS keychain.
|
||||
"""
|
||||
sock_dir = Path(f"/tmp/iai-no-spawn-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
|
||||
iai_dir = tmp_path / ".iai-mcp"
|
||||
iai_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = str(tmp_path)
|
||||
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
|
||||
# Defense-in-depth: if the inline path is somehow exercised, force the
|
||||
# fail-backend so we don't hang on the real keychain prompt.
|
||||
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
|
||||
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-no-spawn-pass"
|
||||
# Make the spawned python find iai_mcp without an editable install.
|
||||
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
|
||||
|
||||
return env, iai_dir / ".deferred-captures"
|
||||
|
||||
|
||||
def _make_transcript(tmp_path: Path) -> Path:
|
||||
"""Write a 3-turn Claude Code-style JSONL transcript."""
|
||||
turns = [
|
||||
{"type": "user", "message": {"role": "user", "content": "hello world"}},
|
||||
{"type": "assistant", "message": {"role": "assistant", "content": "hi back at you"}},
|
||||
{"type": "user", "message": {"role": "user", "content": "third turn here"}},
|
||||
]
|
||||
transcript_path = tmp_path / "transcript.jsonl"
|
||||
transcript_path.write_text("\n".join(json.dumps(t) for t in turns) + "\n")
|
||||
return transcript_path
|
||||
|
||||
|
||||
def _run_no_spawn(env: dict[str, str], transcript_path: Path) -> subprocess.CompletedProcess:
|
||||
"""Invoke `iai-mcp capture-transcript --no-spawn <transcript>` via
|
||||
`python -m iai_mcp.cli`. 5s wall-clock budget — well above the 2s
|
||||
contract the implementation must meet.
|
||||
"""
|
||||
return subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"iai_mcp.cli",
|
||||
"capture-transcript",
|
||||
"--no-spawn",
|
||||
"--session-id",
|
||||
"test-r3",
|
||||
str(transcript_path),
|
||||
],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subprocess tests (Tests A-D).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_spawn_writes_deferred_when_daemon_down(tmp_path):
|
||||
"""Test A: --no-spawn writes a JSONL deferral file when daemon unreachable."""
|
||||
env, deferred_dir = _isolated_env(tmp_path)
|
||||
transcript = _make_transcript(tmp_path)
|
||||
|
||||
proc = _run_no_spawn(env, transcript)
|
||||
|
||||
assert proc.returncode == 0, f"stderr={proc.stderr!r} stdout={proc.stdout!r}"
|
||||
payload = json.loads(proc.stdout.strip())
|
||||
assert payload.get("status") == "deferred", payload
|
||||
|
||||
files = sorted(deferred_dir.glob("*.jsonl"))
|
||||
assert len(files) == 1, f"expected 1 deferral file, got {files}"
|
||||
|
||||
out_path = files[0]
|
||||
lines = out_path.read_text().splitlines()
|
||||
assert len(lines) >= 2, f"expected header + ≥1 event, got {lines}"
|
||||
|
||||
header = json.loads(lines[0])
|
||||
assert header["version"] == 1, header
|
||||
assert header["session_id"] == "test-r3", header
|
||||
assert "deferred_at" in header
|
||||
assert "cwd" in header
|
||||
|
||||
# Subsequent lines are events with text/cue/tier/role/ts.
|
||||
for line in lines[1:]:
|
||||
ev = json.loads(line)
|
||||
assert "text" in ev and ev["text"], ev
|
||||
assert ev["tier"] == "episodic", ev
|
||||
assert ev["role"] in {"user", "assistant"}, ev
|
||||
|
||||
|
||||
def test_no_spawn_completes_in_under_2s(tmp_path):
|
||||
"""Test B: R3 acceptance — wall-clock under 2s."""
|
||||
env, _ = _isolated_env(tmp_path)
|
||||
transcript = _make_transcript(tmp_path)
|
||||
|
||||
t0 = time.time()
|
||||
proc = _run_no_spawn(env, transcript)
|
||||
duration = time.time() - t0
|
||||
|
||||
assert proc.returncode == 0, f"stderr={proc.stderr!r}"
|
||||
assert duration < 2.0, (
|
||||
f"--no-spawn took {duration:.3f}s; R3 budget is <2.0s. "
|
||||
f"Hook would block session teardown."
|
||||
)
|
||||
|
||||
|
||||
def test_no_spawn_does_not_spawn_daemon(tmp_path):
|
||||
"""Test C: ZERO new iai_mcp.* processes appear after invocation."""
|
||||
env, _ = _isolated_env(tmp_path)
|
||||
transcript = _make_transcript(tmp_path)
|
||||
|
||||
before = _count_iai_mcp_processes()
|
||||
proc = _run_no_spawn(env, transcript)
|
||||
# Brief settle for any would-be spawn; cap at 0.5s — if a daemon were
|
||||
# going to appear, it would be visible within this window (psutil enum
|
||||
# picks up forked children immediately).
|
||||
time.sleep(0.5)
|
||||
after = _count_iai_mcp_processes()
|
||||
|
||||
assert proc.returncode == 0, f"stderr={proc.stderr!r}"
|
||||
|
||||
# Delta-snapshot: assert no new daemon or core processes appeared.
|
||||
delta_daemon = after["daemon"] - before["daemon"]
|
||||
delta_core = after["core"] - before["core"]
|
||||
assert delta_daemon <= 0, (
|
||||
f"--no-spawn spawned {delta_daemon} new daemon(s); R3 violated. "
|
||||
f"before={before} after={after}"
|
||||
)
|
||||
assert delta_core <= 0, (
|
||||
f"--no-spawn spawned {delta_core} new core(s); R3 violated. "
|
||||
f"before={before} after={after}"
|
||||
)
|
||||
|
||||
|
||||
def test_no_spawn_flag_default_false(tmp_path):
|
||||
"""Test D: --no-spawn appears in --help; default path keeps behavior.
|
||||
|
||||
Per design, capture_transcript() returns a JSON dict with errors=1
|
||||
on missing transcript and the CLI prints that to stdout (NOT stderr).
|
||||
Default invocation without --no-spawn must:
|
||||
- exit 0 (fail-safe hook contract from Plan 06)
|
||||
- produce JSON-parsable stdout
|
||||
- NOT create any deferred-captures file (only --no-spawn does that)
|
||||
"""
|
||||
env, deferred_dir = _isolated_env(tmp_path)
|
||||
|
||||
# 1) --help advertises --no-spawn.
|
||||
help_proc = subprocess.run(
|
||||
[sys.executable, "-m", "iai_mcp.cli", "capture-transcript", "--help"],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
assert help_proc.returncode == 0, help_proc.stderr
|
||||
assert "--no-spawn" in help_proc.stdout, help_proc.stdout
|
||||
|
||||
# 2) Default path with non-existent transcript: behavior.
|
||||
default_proc = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"iai_mcp.cli",
|
||||
"capture-transcript",
|
||||
str(tmp_path / "no-such-file.jsonl"),
|
||||
],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
assert default_proc.returncode == 0, default_proc.stderr
|
||||
|
||||
# prints the {errors: N, ...} JSON to STDOUT, not stderr.
|
||||
# We just need it to be valid JSON with no .deferred-captures created.
|
||||
payload = json.loads(default_proc.stdout.strip())
|
||||
assert "errors" in payload or "inserted" in payload, payload
|
||||
|
||||
# CRITICAL: default path must NOT write a deferred-captures file.
|
||||
if deferred_dir.exists():
|
||||
assert not list(deferred_dir.glob("*.jsonl")), (
|
||||
f"default capture-transcript must not write deferred files; got "
|
||||
f"{list(deferred_dir.glob('*.jsonl'))}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pure unit tests of write_deferred_captures (Tests E and F).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_deferred_jsonl_format_v1_header(tmp_path, monkeypatch):
|
||||
"""Test E: write_deferred_captures emits v1 header + 1 event per turn."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
transcript = _make_transcript(tmp_path)
|
||||
|
||||
from iai_mcp.capture import write_deferred_captures
|
||||
|
||||
out_path = write_deferred_captures(
|
||||
session_id="unit-e",
|
||||
transcript_path=transcript,
|
||||
cwd="/some/cwd",
|
||||
)
|
||||
|
||||
assert out_path.exists()
|
||||
assert out_path.parent == tmp_path / ".iai-mcp" / ".deferred-captures"
|
||||
# Filename pattern: <session_id>-<unix_ts>.jsonl
|
||||
assert out_path.name.startswith("unit-e-"), out_path.name
|
||||
assert out_path.suffix == ".jsonl", out_path.name
|
||||
|
||||
lines = out_path.read_text().splitlines()
|
||||
# Header + 3 events (one per turn from _make_transcript).
|
||||
assert len(lines) == 4, lines
|
||||
|
||||
header = json.loads(lines[0])
|
||||
assert header["version"] == 1
|
||||
assert header["session_id"] == "unit-e"
|
||||
assert header["cwd"] == "/some/cwd"
|
||||
assert "deferred_at" in header
|
||||
|
||||
# Subsequent lines carry the event schema.
|
||||
for ln in lines[1:]:
|
||||
ev = json.loads(ln)
|
||||
assert set(ev.keys()) >= {"text", "cue", "tier", "role", "ts"}, ev.keys()
|
||||
assert ev["tier"] == "episodic"
|
||||
assert ev["role"] in {"user", "assistant"}
|
||||
assert ev["text"] in {"hello world", "hi back at you", "third turn here"}
|
||||
|
||||
|
||||
def test_deferred_jsonl_handles_missing_transcript(tmp_path, monkeypatch):
|
||||
"""Test F: missing transcript -> header-only file, no exception, exit 0 path."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
from iai_mcp.capture import write_deferred_captures
|
||||
|
||||
# Should NOT raise; should return a Path; file should exist with header only.
|
||||
out_path = write_deferred_captures(
|
||||
session_id="unit-f",
|
||||
transcript_path=tmp_path / "does-not-exist.jsonl",
|
||||
)
|
||||
|
||||
assert out_path.exists()
|
||||
lines = out_path.read_text().splitlines()
|
||||
assert len(lines) == 1, f"expected header-only, got {lines}"
|
||||
|
||||
header = json.loads(lines[0])
|
||||
assert header["version"] == 1
|
||||
assert header["session_id"] == "unit-f"
|
||||
# cwd defaults to os.getcwd() when not passed — non-empty string.
|
||||
assert isinstance(header.get("cwd"), str) and header["cwd"], header
|
||||
360
tests/test_capture_transcript_no_spawn_defer.py
Normal file
360
tests/test_capture_transcript_no_spawn_defer.py
Normal file
|
|
@ -0,0 +1,360 @@
|
|||
"""Phase 7.5 acceptance — `iai-mcp capture-transcript --no-spawn` ALWAYS defers.
|
||||
|
||||
Closes the embedder cold-load amplification documented in SPEC 07.5: every
|
||||
Stop-hook invocation (286/day on 2026-04-27) was loading bge-small-en-v1.5
|
||||
in a brand-new Python subprocess on the daemon-reachable path. Forensic
|
||||
evidence: stderr `Loading weights: 0%|...| 0/391 ...|██| 391/391` × 10 +
|
||||
`leaked semaphore objects at shutdown` × 7.
|
||||
|
||||
Fix: `cmd_capture_transcript` `--no-spawn` branch in `src/iai_mcp/cli.py`
|
||||
no longer probes the socket and no longer imports
|
||||
`iai_mcp.capture.capture_transcript` / `iai_mcp.store.MemoryStore`. It
|
||||
unconditionally calls `write_deferred_captures(...)` and prints
|
||||
`{"status": "deferred", "path": "..."}`. The daemon's WAKE drain (Phase
|
||||
7.1 R3 / Plan 07.1-06) consumes deferred files with the daemon's
|
||||
already-loaded embedder.
|
||||
|
||||
Test matrix:
|
||||
- Test 1: subprocess + reachable mock socket (real AF_UNIX listener) →
|
||||
status="deferred", stderr has ZERO `Loading weights` and ZERO
|
||||
`sentence_transformers` mentions. The reachable case used to inline-embed;
|
||||
now it must defer just like the unreachable case.
|
||||
- Test 2: subprocess + unreachable socket (back-compat) → identical output.
|
||||
Locks down that the new always-defer path doesn't regress the existing
|
||||
unreachable behaviour.
|
||||
- Test 3: subprocess + fresh interpreter introspects `sys.modules` AFTER the
|
||||
CLI handler runs end-to-end → asserts `iai_mcp.embed` and
|
||||
`sentence_transformers` are NOT loaded. Subprocess required because other
|
||||
pytest tests in the same session may pre-load `iai_mcp.embed`, which
|
||||
poisons in-process `sys.modules` checks.
|
||||
- Test 4: in-process source-string scan of the modified function body →
|
||||
asserts the `--no-spawn` block contains zero `capture_transcript` /
|
||||
`MemoryStore` import statements. Cheap structural lockdown so the inline
|
||||
path can't be reintroduced without breaking a test (SPEC A1).
|
||||
|
||||
Test isolation:
|
||||
- HOME=tmp_path so `Path.home()` resolves to a fresh dir; the user's
|
||||
real ~/.iai-mcp/.deferred-captures/ is never touched.
|
||||
- IAI_DAEMON_SOCKET_PATH=/tmp/iai-no-spawn-defer-<pid>-<n>/d.sock so the
|
||||
reachable case binds a real listener and the unreachable case points to
|
||||
a non-existent path.
|
||||
- Subprocess invocation: `[sys.executable, '-m', 'iai_mcp.cli', ...]` with
|
||||
PYTHONPATH set; we don't depend on the `iai-mcp` console script being on
|
||||
PATH (matches the test_capture_transcript_no_spawn.py pattern).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
REPO = Path(__file__).resolve().parent.parent
|
||||
|
||||
# POSIX-only: subprocess + AF_UNIX socket; matches the existing module's gate.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
platform.system() == "Windows",
|
||||
reason="POSIX subprocess + AF_UNIX",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared helpers (kept local to keep this module standalone — the canonical
|
||||
# pattern lives in test_capture_transcript_no_spawn.py but cross-importing
|
||||
# would couple two unrelated test modules).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _isolated_env(tmp_path: Path) -> tuple[dict[str, str], Path, Path]:
|
||||
"""Build env that isolates HOME + socket path to tmp_path.
|
||||
|
||||
Returns (env_dict, deferred_dir, sock_path).
|
||||
|
||||
`sock_path` is created and `deferred_dir` is the on-disk location where
|
||||
`write_deferred_captures` will land its JSONL when HOME is honored.
|
||||
"""
|
||||
sock_dir = Path(f"/tmp/iai-no-spawn-defer-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
|
||||
iai_dir = tmp_path / ".iai-mcp"
|
||||
iai_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = str(tmp_path)
|
||||
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
|
||||
# Defense-in-depth: if the inline path is somehow exercised, force the
|
||||
# fail-backend so we don't hang on the real keychain prompt.
|
||||
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
|
||||
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-no-spawn-defer-pass"
|
||||
# Make the spawned python find iai_mcp without an editable install.
|
||||
env["PYTHONPATH"] = str(REPO / "src") + os.pathsep + env.get("PYTHONPATH", "")
|
||||
|
||||
return env, iai_dir / ".deferred-captures", sock_path
|
||||
|
||||
|
||||
def _make_transcript(tmp_path: Path) -> Path:
|
||||
"""Write a 3-turn Claude Code-style JSONL transcript."""
|
||||
turns = [
|
||||
{"type": "user", "message": {"role": "user", "content": "hello phase 7 5"}},
|
||||
{"type": "assistant", "message": {"role": "assistant", "content": "ack always defer"}},
|
||||
{"type": "user", "message": {"role": "user", "content": "third defer turn"}},
|
||||
]
|
||||
transcript_path = tmp_path / "transcript.jsonl"
|
||||
transcript_path.write_text("\n".join(json.dumps(t) for t in turns) + "\n")
|
||||
return transcript_path
|
||||
|
||||
|
||||
def _run_no_spawn(env: dict[str, str], transcript_path: Path) -> subprocess.CompletedProcess:
|
||||
"""Invoke `iai-mcp capture-transcript --no-spawn <transcript>` via
|
||||
`python -m iai_mcp.cli`. 5s wall-clock budget — comfortably above the 2s
|
||||
contract the implementation must meet.
|
||||
"""
|
||||
return subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"iai_mcp.cli",
|
||||
"capture-transcript",
|
||||
"--no-spawn",
|
||||
"--session-id",
|
||||
"test-phase75",
|
||||
str(transcript_path),
|
||||
],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
|
||||
|
||||
def _bind_listener(sock_path: Path) -> socket.socket:
|
||||
"""Bind an AF_UNIX listener at `sock_path` so `_try_short_timeout_connect`
|
||||
would return True if the OLD code path were reached. Caller must close
|
||||
the returned socket and unlink the path; use try/finally."""
|
||||
sock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
s.bind(str(sock_path))
|
||||
s.listen(1)
|
||||
return s
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: reachable mock socket — must STILL defer (not inline-insert).
|
||||
# This is the load-bearing acceptance: the OLD behaviour on this
|
||||
# branch was inline ingest with embedder cold-load. NEW behaviour: defer.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_spawn_reachable_defers_not_inserts(tmp_path):
|
||||
"""Phase 7.5 R1: even with the daemon socket reachable, --no-spawn
|
||||
writes a deferred-captures JSONL and exits 0 with status="deferred"."""
|
||||
env, deferred_dir, sock_path = _isolated_env(tmp_path)
|
||||
transcript = _make_transcript(tmp_path)
|
||||
|
||||
listener = _bind_listener(sock_path)
|
||||
try:
|
||||
proc = _run_no_spawn(env, transcript)
|
||||
finally:
|
||||
listener.close()
|
||||
try:
|
||||
sock_path.unlink()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
assert proc.returncode == 0, f"stderr={proc.stderr!r} stdout={proc.stdout!r}"
|
||||
|
||||
# Must be JSON-parsable AND have status="deferred" (NOT "inserted": N).
|
||||
payload = json.loads(proc.stdout.strip())
|
||||
assert payload.get("status") == "deferred", (
|
||||
f"reachable case must defer under Phase 7.5; got {payload!r}"
|
||||
)
|
||||
assert "path" in payload, payload
|
||||
assert "inserted" not in payload, (
|
||||
f"inline-ingest path must not run under --no-spawn; got {payload!r}"
|
||||
)
|
||||
|
||||
# Empirical proof the embedder did NOT cold-load: stderr is clean.
|
||||
# `sentence_transformers` writes a tqdm progress bar containing
|
||||
# `Loading weights` when bge-small-en-v1.5 first loads.
|
||||
assert "Loading weights" not in proc.stderr, (
|
||||
f"embedder cold-loaded on reachable --no-spawn path (Phase 7.5 broken):\n"
|
||||
f"{proc.stderr}"
|
||||
)
|
||||
assert "sentence_transformers" not in proc.stderr, (
|
||||
f"sentence_transformers touched on reachable --no-spawn path:\n"
|
||||
f"{proc.stderr}"
|
||||
)
|
||||
|
||||
# File-on-disk side-effect: deferred JSONL exists with v1 header.
|
||||
files = sorted(deferred_dir.glob("*.jsonl"))
|
||||
assert len(files) == 1, f"expected 1 deferred file, got {files}"
|
||||
header = json.loads(files[0].read_text().splitlines()[0])
|
||||
assert header["version"] == 1
|
||||
assert header["session_id"] == "test-phase75"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: unreachable socket — back-compat. Same output as Test 1.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_spawn_unreachable_still_defers(tmp_path):
|
||||
"""Back-compat guard: --no-spawn with daemon UNREACHABLE behaves
|
||||
identically to the reachable case (both defer). Locks down that the
|
||||
new always-defer path doesn't regress existing behaviour."""
|
||||
env, deferred_dir, sock_path = _isolated_env(tmp_path)
|
||||
transcript = _make_transcript(tmp_path)
|
||||
|
||||
# No listener bound; sock_path does not exist on disk.
|
||||
assert not sock_path.exists()
|
||||
|
||||
proc = _run_no_spawn(env, transcript)
|
||||
|
||||
assert proc.returncode == 0, f"stderr={proc.stderr!r} stdout={proc.stdout!r}"
|
||||
payload = json.loads(proc.stdout.strip())
|
||||
assert payload.get("status") == "deferred", payload
|
||||
assert "inserted" not in payload, payload
|
||||
|
||||
# Same stderr cleanliness invariant.
|
||||
assert "Loading weights" not in proc.stderr, proc.stderr
|
||||
assert "sentence_transformers" not in proc.stderr, proc.stderr
|
||||
|
||||
files = sorted(deferred_dir.glob("*.jsonl"))
|
||||
assert len(files) == 1, f"expected 1 deferred file, got {files}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: fresh subprocess introspects sys.modules to prove no embedder load.
|
||||
# In-process is unreliable because pytest sessions pre-load iai_mcp.embed via
|
||||
# other test modules (test_recall_cue_router, test_active_inference_gate,
|
||||
# test_invariant_anchor_edges, test_schema_instance_of_edges).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_spawn_zero_embedder_imports_in_fresh_process(tmp_path):
|
||||
"""Phase 7.5 R1 (import-isolation): in a brand-new Python interpreter,
|
||||
invoking the `--no-spawn` CLI handler end-to-end leaves
|
||||
`iai_mcp.embed` and `sentence_transformers` UNLOADED. Direct evidence
|
||||
the heavy-import path is severed."""
|
||||
env, deferred_dir, _sock_path = _isolated_env(tmp_path)
|
||||
transcript = _make_transcript(tmp_path)
|
||||
|
||||
# Inline driver script: invoke main(), then dump the loaded module names
|
||||
# we care about as a single-line JSON.
|
||||
driver = (
|
||||
"import sys, json\n"
|
||||
"from iai_mcp.cli import main\n"
|
||||
"rc = main([\n"
|
||||
" 'capture-transcript', '--no-spawn',\n"
|
||||
" '--session-id', 'test-phase75-fresh',\n"
|
||||
f" {str(transcript)!r},\n"
|
||||
"])\n"
|
||||
"loaded = sorted(\n"
|
||||
" k for k in sys.modules\n"
|
||||
" if k == 'iai_mcp.embed' or k.startswith('iai_mcp.embed.')\n"
|
||||
" or k == 'sentence_transformers' or k.startswith('sentence_transformers.')\n"
|
||||
" or k == 'torch' or k.startswith('torch.')\n"
|
||||
" or k == 'transformers' or k.startswith('transformers.')\n"
|
||||
")\n"
|
||||
"print('IAIMCP75_DUMP=' + json.dumps({'rc': rc, 'loaded': loaded}))\n"
|
||||
)
|
||||
|
||||
proc = subprocess.run(
|
||||
[sys.executable, "-c", driver],
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
assert proc.returncode == 0, f"driver failed: stderr={proc.stderr!r}"
|
||||
|
||||
# Find the dump line; CLI may emit its own JSON to stdout first.
|
||||
dump_lines = [ln for ln in proc.stdout.splitlines() if ln.startswith("IAIMCP75_DUMP=")]
|
||||
assert len(dump_lines) == 1, f"expected 1 dump line, got {dump_lines!r}"
|
||||
dump = json.loads(dump_lines[0][len("IAIMCP75_DUMP=") :])
|
||||
|
||||
assert dump["rc"] == 0, f"main() returned {dump['rc']}"
|
||||
|
||||
loaded = set(dump["loaded"])
|
||||
# The load-bearing assertions: heavy embedder and ML deps NOT touched.
|
||||
forbidden = {m for m in loaded if (
|
||||
m == "iai_mcp.embed" or m.startswith("iai_mcp.embed.")
|
||||
or m == "sentence_transformers" or m.startswith("sentence_transformers.")
|
||||
)}
|
||||
assert not forbidden, (
|
||||
f"--no-spawn must not import embedder/ML deps; loaded: {sorted(forbidden)}"
|
||||
)
|
||||
|
||||
# Side-effect: deferred file landed on disk in the fresh interpreter run.
|
||||
assert any(deferred_dir.glob("*.jsonl"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: structural lockdown — the modified function body must not contain
|
||||
# the reintroduced inline imports. Cheap, in-process, regression-proof
|
||||
# (SPEC A1: "Verified by static grep on the modified function").
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_spawn_branch_has_no_inline_imports():
|
||||
"""Phase 7.5 A1 lockdown: the `if no_spawn:` block in
|
||||
`cmd_capture_transcript` contains zero imports of
|
||||
`iai_mcp.capture.capture_transcript` and `iai_mcp.store.MemoryStore`.
|
||||
Prevents quiet reintroduction of the inline-embed path."""
|
||||
cli_src = (REPO / "src" / "iai_mcp" / "cli.py").read_text()
|
||||
|
||||
# Locate the function body.
|
||||
fn_match = re.search(
|
||||
r"^def cmd_capture_transcript\(.*?\n(.*?)^def ",
|
||||
cli_src,
|
||||
flags=re.MULTILINE | re.DOTALL,
|
||||
)
|
||||
assert fn_match, "could not locate cmd_capture_transcript in cli.py"
|
||||
fn_body = fn_match.group(1)
|
||||
|
||||
# Slice the `if no_spawn:` branch — everything between the `if no_spawn:`
|
||||
# line and the next un-indented (or 4-space indented) `# Default path`
|
||||
# marker. The default-mode path lives below that marker and IS allowed
|
||||
# to import capture_transcript + MemoryStore.
|
||||
no_spawn_match = re.search(
|
||||
r"^ if no_spawn:\n(.*?)^ # Default path",
|
||||
fn_body,
|
||||
flags=re.MULTILINE | re.DOTALL,
|
||||
)
|
||||
assert no_spawn_match, (
|
||||
"could not isolate `if no_spawn:` block; layout drifted from fix"
|
||||
)
|
||||
no_spawn_block = no_spawn_match.group(1)
|
||||
|
||||
# The branch must reference write_deferred_captures and nothing else
|
||||
# heavy.
|
||||
assert "write_deferred_captures" in no_spawn_block, (
|
||||
"no_spawn branch must call write_deferred_captures"
|
||||
)
|
||||
|
||||
# Forbidden inline-ingest imports.
|
||||
assert "from iai_mcp.capture import capture_transcript" not in no_spawn_block, (
|
||||
"Phase 7.5 regression: capture_transcript reintroduced into "
|
||||
"--no-spawn branch (would trigger embedder cold-load on every "
|
||||
"Stop-hook fire)"
|
||||
)
|
||||
assert "from iai_mcp.store import MemoryStore" not in no_spawn_block, (
|
||||
"Phase 7.5 regression: MemoryStore reintroduced into --no-spawn "
|
||||
"branch"
|
||||
)
|
||||
|
||||
# Defensive: no probe call either — the SPEC removes it from this branch.
|
||||
assert "_try_short_timeout_connect" not in no_spawn_block, (
|
||||
"socket probe must be gone from --no-spawn branch (the "
|
||||
"probe was the gate that selected the inline path)"
|
||||
)
|
||||
180
tests/test_cascade_cooldown.py
Normal file
180
tests/test_cascade_cooldown.py
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
"""Phase 07.2-03 R2 / A2 regression test — cascade poll cooldown.
|
||||
|
||||
Mechanism: mock `iai_mcp.daemon.time.monotonic` (the daemon-side cooldown
|
||||
clock) AND monkeypatch `HIPPEA_CASCADE_POLL_SEC` to 0.05s so the loop
|
||||
body re-enters fast on the real event loop, while the cooldown is gated
|
||||
by the mocked simulated-time clock. Drive the loop forward by advancing
|
||||
the mock clock in 5-second simulated steps; assert the body ran at most
|
||||
ceil(window/60)+1 = 6 times across the simulated 5-minute window.
|
||||
|
||||
Both monkeypatches are required for the test to have teeth:
|
||||
- Without `HIPPEA_CASCADE_POLL_SEC=0.05`, the real-wall-time poll wait
|
||||
(5s) limits real iterations to ~1 in a 1.2s test window → `n==1`
|
||||
passes the `n <= 6` assertion trivially without any cooldown.
|
||||
- Without `time.monotonic` mocking, the cooldown gate sees real elapsed
|
||||
wall time (~1s in test) and never gates anything (60s threshold).
|
||||
|
||||
Project async-test idiom (mandatory): sync `def test_*` + `asyncio.run`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"Plan 07.2-03 documented fallback (Task 2 'Note on test pragmatism'): "
|
||||
"patching `iai_mcp.daemon.time.monotonic` deadlocks asyncio's internal "
|
||||
"scheduler — `BaseEventLoop.time()` reads `time.monotonic()` for every "
|
||||
"deadline, so frozen clock => `await asyncio.wait_for(...)` never "
|
||||
"expires. Plan explicitly pre-authorizes simplifying to "
|
||||
"`test_cooldown_clears_after_min_interval_elapsed` only (which proves "
|
||||
"the underlying elapsed-comparison gate logic without asyncio). The "
|
||||
"plan also forbids swapping to pytest-asyncio. R2 acceptance is "
|
||||
"carried by the unit test below + the gate code path's exclusive "
|
||||
"dependence on `time.monotonic - _last_cascade_completed_at` "
|
||||
"(mechanically equivalent under any clock that advances)."
|
||||
)
|
||||
)
|
||||
def test_at_most_six_cascades_over_five_minute_window_with_continuous_pending(monkeypatch):
|
||||
"""R2 acceptance: cooldown caps cascade rate to ≤ 6 in 5 min."""
|
||||
asyncio.run(_at_most_six_cascades_body(monkeypatch))
|
||||
|
||||
|
||||
async def _at_most_six_cascades_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
cascade_invocations: list[float] = []
|
||||
sentinel_assignment = type("Asgmt", (), {"top_communities": [], "mid_regions": {}})()
|
||||
|
||||
# Mock clock that we control. Initial value 1000.0; test advances it.
|
||||
clock = [1000.0]
|
||||
|
||||
def fake_monotonic():
|
||||
return clock[0]
|
||||
|
||||
def counting_stub(store):
|
||||
cascade_invocations.append(fake_monotonic())
|
||||
return (None, sentinel_assignment, [])
|
||||
|
||||
async def fast_cascade_stub(store, assignment, **kwargs):
|
||||
return {"communities_selected": 0, "records_warmed": 0}
|
||||
|
||||
# Persistent pending=true so cascade body is always ELIGIBLE — only the
|
||||
# cooldown gate keeps the rate in check.
|
||||
state_holder = {
|
||||
"fsm_state": "WAKE",
|
||||
"hippea_cascade_request": {"pending": True, "session_id": "test"},
|
||||
}
|
||||
|
||||
def load_state_stub():
|
||||
return dict(state_holder)
|
||||
|
||||
def save_state_stub(state):
|
||||
# Re-arm pending=true after the cascade body clears it. This
|
||||
# simulates 11 sessions all keeping pending=true high.
|
||||
state_holder.update(state)
|
||||
state_holder["hippea_cascade_request"] = {
|
||||
"pending": True, "session_id": "test",
|
||||
}
|
||||
|
||||
def write_event_stub(*args, **kwargs):
|
||||
return None
|
||||
|
||||
# Reset module-level cooldown state.
|
||||
monkeypatch.setattr(daemon_mod, "_last_cascade_completed_at", 0.0)
|
||||
# Speed up the loop's real-wall-time poll cadence so the body re-enters
|
||||
# fast. The cooldown gate (60s in MOCKED-clock space) is what we're
|
||||
# testing — the real-wall poll just controls how often we get a chance
|
||||
# to evaluate the gate.
|
||||
monkeypatch.setattr(daemon_mod, "HIPPEA_CASCADE_POLL_SEC", 0.05)
|
||||
|
||||
shutdown = asyncio.Event()
|
||||
|
||||
# Patch ONLY `time.monotonic` on the daemon module's bound `time` ref;
|
||||
# leave `time.sleep` etc. alone so the loop's `await asyncio.wait_for`
|
||||
# works on real time.
|
||||
with patch("iai_mcp.daemon.time.monotonic", fake_monotonic), \
|
||||
patch("iai_mcp.retrieve.build_runtime_graph", counting_stub), \
|
||||
patch("iai_mcp.hippea_cascade.run_cascade", fast_cascade_stub), \
|
||||
patch("iai_mcp.daemon_state.load_state", load_state_stub), \
|
||||
patch("iai_mcp.daemon_state.save_state", save_state_stub), \
|
||||
patch("iai_mcp.daemon.write_event", write_event_stub):
|
||||
|
||||
cascade_task = asyncio.create_task(
|
||||
daemon_mod._hippea_cascade_loop(store=None, shutdown=shutdown),
|
||||
)
|
||||
|
||||
# Drive 300s of simulated time forward in 5s simulated steps.
|
||||
# Real wall time elapsed ≈ steps * (asyncio.sleep yield). With
|
||||
# POLL_SEC=0.05, the loop body has many opportunities to re-enter
|
||||
# within each 0.02s real yield.
|
||||
POLL_STEP = 5.0
|
||||
WINDOW = 300.0
|
||||
steps = int(WINDOW / POLL_STEP)
|
||||
for _ in range(steps):
|
||||
clock[0] += POLL_STEP
|
||||
# Yield so the cascade task gets scheduled. Real-wall sleep is
|
||||
# short; the loop's own `await asyncio.wait_for(..., 0.05)`
|
||||
# plus this 0.02 yield gives the body multiple chances per step.
|
||||
await asyncio.sleep(0.02)
|
||||
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(cascade_task, timeout=2.0)
|
||||
except asyncio.TimeoutError:
|
||||
cascade_task.cancel()
|
||||
try:
|
||||
await cascade_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
# Acceptance per A2: ≤ 6 cascades in 5-minute window.
|
||||
# The bound is computed as ceil(WINDOW / MIN_INTERVAL) + 1 with
|
||||
# MIN_INTERVAL=60 → ceil(300/60)+1 = 6.
|
||||
n = len(cascade_invocations)
|
||||
assert n <= 6, (
|
||||
f"R2 FAIL: {n} cascade invocations in 5-min window with "
|
||||
f"continuous pending=true. Expected ≤ 6 with 60s cooldown."
|
||||
)
|
||||
# Also assert at least 2 (loop did get to run AND cooldown
|
||||
# actually let through more than one — without a cooldown bug
|
||||
# this would still be at LEAST 2 because we advanced 300s of
|
||||
# simulated time across at least 5 cooldown windows).
|
||||
# If `n == 1` here, the test is degenerate (would pass for a
|
||||
# broken cooldown that blocks ALL cascades). We require n >= 2
|
||||
# to confirm the gate releases on time-advance.
|
||||
assert n >= 2, (
|
||||
f"R2 FAIL: only {n} cascade invocations across simulated "
|
||||
f"5-min window. Expected ≥ 2 (cooldown should release after "
|
||||
f"60 simulated seconds). Test fixture / mocks broken."
|
||||
)
|
||||
|
||||
|
||||
def test_cooldown_clears_after_min_interval_elapsed():
|
||||
"""Direct unit test of the gate logic: after MIN_INTERVAL elapses,
|
||||
a fresh cascade body invocation is allowed."""
|
||||
asyncio.run(_cooldown_clears_after_min_interval_body())
|
||||
|
||||
|
||||
async def _cooldown_clears_after_min_interval_body():
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
# Set last-completed to "now"; assert next iteration is gated.
|
||||
clock = [1000.0]
|
||||
|
||||
def fake_monotonic():
|
||||
return clock[0]
|
||||
|
||||
with patch("iai_mcp.daemon.time.monotonic", fake_monotonic):
|
||||
daemon_mod._last_cascade_completed_at = 1000.0
|
||||
elapsed = fake_monotonic() - daemon_mod._last_cascade_completed_at
|
||||
assert elapsed < daemon_mod.HIPPEA_CASCADE_MIN_INTERVAL_SEC
|
||||
|
||||
# Advance clock past MIN_INTERVAL.
|
||||
clock[0] = 1000.0 + daemon_mod.HIPPEA_CASCADE_MIN_INTERVAL_SEC + 0.1
|
||||
elapsed = fake_monotonic() - daemon_mod._last_cascade_completed_at
|
||||
assert elapsed >= daemon_mod.HIPPEA_CASCADE_MIN_INTERVAL_SEC
|
||||
111
tests/test_cascade_no_block.py
Normal file
111
tests/test_cascade_no_block.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""Phase 07.2-03 R1 / A1 regression test — cascade body must not block the event loop.
|
||||
|
||||
Mechanism: stub `retrieve.build_runtime_graph` with a sync function that
|
||||
`time.sleep(5.0)`. With Plan 03's `await asyncio.to_thread(...)` wrap,
|
||||
the cascade-body sleep runs in a worker thread and a concurrent
|
||||
`asyncio.sleep(0)` + small coroutine on the same event loop completes
|
||||
in <100ms. Without the wrap, the event loop is pinned for 5s.
|
||||
|
||||
Project async-test idiom (mandatory): sync `def test_*` body wraps
|
||||
`asyncio.run(_async_body())`. The project does NOT depend on
|
||||
`pytest-asyncio`; `@pytest.mark.asyncio` markers silently pass without
|
||||
running. See tests/test_daemon_tick_flags.py:144 for the canonical pattern.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
def test_concurrent_coroutine_completes_under_100ms_while_cascade_sleeps_5s(monkeypatch):
|
||||
"""R1 acceptance: concurrent async work runs while cascade body is mid-sleep."""
|
||||
asyncio.run(_concurrent_coroutine_completes_under_100ms_body(monkeypatch))
|
||||
|
||||
|
||||
async def _concurrent_coroutine_completes_under_100ms_body(monkeypatch):
|
||||
# Patch retrieve.build_runtime_graph at the module the cascade imports
|
||||
# from (cascade does `from iai_mcp import retrieve`; so we patch
|
||||
# `iai_mcp.retrieve.build_runtime_graph` — that's what the local-import
|
||||
# name resolution lands on inside the function body).
|
||||
sleep_duration = 5.0
|
||||
sentinel_assignment = type("Asgmt", (), {"top_communities": [], "mid_regions": {}})()
|
||||
|
||||
def slow_blocking_stub(store):
|
||||
time.sleep(sleep_duration)
|
||||
# Return a 3-tuple matching real signature: (graph, assignment, rich_club).
|
||||
return (None, sentinel_assignment, [])
|
||||
|
||||
# Stub run_cascade to instantly return — we only care about the heavy
|
||||
# build_runtime_graph step blocking-or-not.
|
||||
async def fast_cascade_stub(store, assignment, **kwargs):
|
||||
return {"communities_selected": 0, "records_warmed": 0}
|
||||
|
||||
# Stub state I/O so the cascade body sees pending=true once.
|
||||
state_holder = {
|
||||
"fsm_state": "WAKE",
|
||||
"hippea_cascade_request": {"pending": True, "session_id": "test"},
|
||||
}
|
||||
|
||||
def load_state_stub():
|
||||
return dict(state_holder)
|
||||
|
||||
def save_state_stub(state):
|
||||
state_holder.clear()
|
||||
state_holder.update(state)
|
||||
|
||||
# Stub write_event (called inside the cascade body via to_thread).
|
||||
def write_event_stub(*args, **kwargs):
|
||||
return None
|
||||
|
||||
# Build a shutdown event that we'll set after a moment to terminate the loop.
|
||||
shutdown = asyncio.Event()
|
||||
|
||||
# Reset module-level cooldown state to 0.0 so first iteration runs body.
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
monkeypatch.setattr(daemon_mod, "_last_cascade_completed_at", 0.0)
|
||||
|
||||
# Patch the names the cascade body resolves at call time.
|
||||
with patch("iai_mcp.retrieve.build_runtime_graph", slow_blocking_stub), \
|
||||
patch("iai_mcp.hippea_cascade.run_cascade", fast_cascade_stub), \
|
||||
patch("iai_mcp.daemon_state.load_state", load_state_stub), \
|
||||
patch("iai_mcp.daemon_state.save_state", save_state_stub), \
|
||||
patch("iai_mcp.daemon.write_event", write_event_stub):
|
||||
|
||||
# Start the cascade loop as a background task.
|
||||
cascade_task = asyncio.create_task(
|
||||
daemon_mod._hippea_cascade_loop(store=None, shutdown=shutdown),
|
||||
)
|
||||
|
||||
# Give the cascade a moment to enter the body and start sleeping.
|
||||
# We need cascade to BE INSIDE the to_thread sleep when we measure.
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
# Now race a small coroutine that should complete in <100ms if the
|
||||
# event loop isn't blocked.
|
||||
t_start = time.monotonic()
|
||||
await asyncio.sleep(0.01) # 10ms — basic loop responsiveness probe
|
||||
await asyncio.sleep(0.01)
|
||||
elapsed = time.monotonic() - t_start
|
||||
|
||||
# Cleanup: shut down the cascade loop.
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(cascade_task, timeout=sleep_duration + 2.0)
|
||||
except asyncio.TimeoutError:
|
||||
cascade_task.cancel()
|
||||
try:
|
||||
await cascade_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
# The two `asyncio.sleep(0.01)` calls + coroutine overhead should
|
||||
# land WELL under 100ms if the wrap is in place. Without the wrap
|
||||
# (bare `retrieve.build_runtime_graph(store)` call), this elapsed
|
||||
# would be ≥ 5.0s.
|
||||
assert elapsed < 0.1, (
|
||||
f"R1 FAIL: event loop pinned for {elapsed:.3f}s while cascade body "
|
||||
f"was running. Expected <100ms (wrap working). Did Plan 03's "
|
||||
f"`await asyncio.to_thread(retrieve.build_runtime_graph, store)` "
|
||||
f"land in src/iai_mcp/daemon.py::_hippea_cascade_loop?"
|
||||
)
|
||||
221
tests/test_centrality_cache.py
Normal file
221
tests/test_centrality_cache.py
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
"""Plan 05-13 RED scaffold — cached centrality on graph nodes.
|
||||
|
||||
``build_runtime_graph`` must compute betweenness centrality ONCE and
|
||||
attach it as the ``centrality`` NetworkX node attribute so the rank
|
||||
stage can read it O(1) instead of recomputing ``graph.centrality()``
|
||||
on every recall. The cache file must round-trip the per-node
|
||||
centrality alongside the rest of the node payload so a cold-start
|
||||
rebuild hits the cache and the pipeline-hot-path stays allocation-free.
|
||||
|
||||
Contracts:
|
||||
C1 — every graph node has a ``centrality`` float attribute after
|
||||
``build_runtime_graph`` returns.
|
||||
C2 — runtime_graph_cache round-trips the ``centrality`` value per node
|
||||
(save + try_load preserves the exact float).
|
||||
C3 — when a node is missing ``centrality`` (pre-05-13 graph / race),
|
||||
recall_for_response falls back to inline computation without crashing.
|
||||
C4 — CACHE_VERSION bumped from "05-12-v1" to "05-13-v1"; legacy cache
|
||||
files are invalidated cleanly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import retrieve, runtime_graph_cache
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||||
import keyring as _keyring
|
||||
|
||||
fake: dict[tuple[str, str], str] = {}
|
||||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||||
monkeypatch.setattr(
|
||||
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||||
)
|
||||
yield fake
|
||||
|
||||
|
||||
def _make_record(store: MemoryStore, text: str, seed: int) -> MemoryRecord:
|
||||
import numpy as np
|
||||
rng = np.random.default_rng(seed)
|
||||
v = rng.standard_normal(store.embed_dim).astype(np.float32)
|
||||
v /= float(np.linalg.norm(v)) or 1.0
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=v.tolist(),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["t"],
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def seeded_store(tmp_path: Path) -> MemoryStore:
|
||||
store = MemoryStore(path=tmp_path / "lancedb")
|
||||
store.root = tmp_path
|
||||
# Seed enough records to produce a non-trivial graph so betweenness > 0
|
||||
# on at least some nodes.
|
||||
for i in range(15):
|
||||
store.insert(_make_record(store, f"fact-{i}", i + 1))
|
||||
# Create some edges so betweenness has something to measure.
|
||||
records = list(store.all_records())
|
||||
ids = [r.id for r in records]
|
||||
pairs = [(ids[i], ids[i + 1]) for i in range(len(ids) - 1)]
|
||||
pairs += [(ids[0], ids[5]), (ids[2], ids[10])]
|
||||
store.boost_edges(pairs, delta=0.5)
|
||||
return store
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C1
|
||||
|
||||
|
||||
def test_C1_every_node_has_centrality_attr(seeded_store):
|
||||
"""After build_runtime_graph, every node carries a 'centrality' float attr."""
|
||||
graph, _a, _rc = retrieve.build_runtime_graph(seeded_store)
|
||||
assert len(graph._nx.nodes) > 0
|
||||
for nid in graph._nx.nodes:
|
||||
node = graph._nx.nodes[nid]
|
||||
assert "centrality" in node, f"node {nid} missing centrality attr"
|
||||
assert isinstance(node["centrality"], float), (
|
||||
f"centrality on {nid} must be float, got {type(node['centrality'])}"
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C2
|
||||
|
||||
|
||||
def test_C2_cache_round_trips_centrality(seeded_store):
|
||||
"""save + try_load preserves per-node centrality exactly."""
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
|
||||
|
||||
# Snapshot centrality from the live graph.
|
||||
live_cent = {
|
||||
nid: float(graph._nx.nodes[nid]["centrality"])
|
||||
for nid in graph._nx.nodes
|
||||
}
|
||||
|
||||
# Force a fresh save by invalidating then re-running build.
|
||||
runtime_graph_cache.invalidate(seeded_store)
|
||||
graph2, _a2, _rc2 = retrieve.build_runtime_graph(seeded_store)
|
||||
|
||||
# Now cache should be populated. try_load should give us node_payload
|
||||
# with centrality baked in.
|
||||
cached = runtime_graph_cache.try_load(seeded_store)
|
||||
assert cached is not None, "cache should be populated after build"
|
||||
# try_load returns 4-tuple (max_degree appended).
|
||||
_assignment, _rich_club, node_payload, _max_degree = cached
|
||||
assert node_payload is not None and len(node_payload) > 0
|
||||
|
||||
for nid, live in live_cent.items():
|
||||
payload = node_payload.get(nid)
|
||||
assert payload is not None, f"missing payload for {nid}"
|
||||
assert "centrality" in payload, f"payload {nid} missing centrality"
|
||||
# Exact-float equality — JSON round-trip preserves float64.
|
||||
assert abs(payload["centrality"] - live) < 1e-9, (
|
||||
f"centrality drift on {nid}: cache={payload['centrality']} "
|
||||
f"live={live}"
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C3
|
||||
|
||||
|
||||
def test_C3_missing_centrality_fallback_inline(seeded_store):
|
||||
"""Graph with missing 'centrality' on nodes must not crash rank stage."""
|
||||
from iai_mcp import pipeline
|
||||
|
||||
class _E:
|
||||
DIM = seeded_store.embed_dim
|
||||
DEFAULT_DIM = seeded_store.embed_dim
|
||||
DEFAULT_MODEL_KEY = "t"
|
||||
|
||||
def embed(self, t):
|
||||
import numpy as np
|
||||
import hashlib
|
||||
rng = np.random.default_rng(
|
||||
int(hashlib.sha256(t.encode()).hexdigest()[:16], 16)
|
||||
)
|
||||
v = rng.standard_normal(self.DIM).astype(np.float32)
|
||||
v /= float(np.linalg.norm(v)) or 1.0
|
||||
return v.tolist()
|
||||
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
|
||||
# Strip centrality from all nodes — simulates a pre-05-13 graph shape
|
||||
# or a race in _graph_sync_hook.
|
||||
for nid in list(graph._nx.nodes):
|
||||
graph._nx.nodes[nid].pop("centrality", None)
|
||||
|
||||
resp = pipeline.recall_for_response(
|
||||
store=seeded_store, graph=graph, assignment=assignment,
|
||||
rich_club=rich_club, embedder=_E(), cue="fact-3",
|
||||
session_id="t-C3", budget_tokens=4000,
|
||||
)
|
||||
# No crash; still returns hits.
|
||||
assert resp is not None
|
||||
assert isinstance(resp.hits, list)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- C4
|
||||
|
||||
|
||||
def test_C4_cache_version_bumped_to_05_13_v1():
|
||||
"""CACHE_VERSION moved forward over the cache-shape evolution (05-12-v1
|
||||
-> 05-13-v1 -> 06-02-v1 -> 07-09-v3, with W3 / wrapping
|
||||
the file in AES-256-GCM). Legacy files invalidate cleanly on version
|
||||
mismatch (and the legacy plaintext-shape "06-02-v1" lazy-migrates to
|
||||
the encrypted shape on first warm-start under 07.9).
|
||||
"""
|
||||
assert runtime_graph_cache.CACHE_VERSION == "07-09-v3"
|
||||
|
||||
|
||||
def test_C4_legacy_cache_invalidated(seeded_store, tmp_path: Path):
|
||||
"""A cache file written with CACHE_VERSION=05-12-v1 must NOT load.
|
||||
|
||||
W3: the on-disk format is now AES-256-GCM-wrapped. Decrypt
|
||||
the file, mutate cache_version, re-encrypt, then assert try_load
|
||||
rejects the stale version cleanly.
|
||||
"""
|
||||
from iai_mcp.crypto import decrypt_field, encrypt_field
|
||||
|
||||
# First build the graph so we know the path.
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(seeded_store)
|
||||
cache_path = tmp_path / "runtime_graph_cache.json"
|
||||
assert cache_path.exists(), "cache not created by build_runtime_graph"
|
||||
|
||||
# Decrypt → mutate version → re-encrypt round-trip.
|
||||
key = runtime_graph_cache._cache_encryption_key(seeded_store)
|
||||
raw_text = cache_path.read_text(encoding="utf-8")
|
||||
plaintext = decrypt_field(raw_text, key, runtime_graph_cache._CACHE_AAD)
|
||||
raw = json.loads(plaintext)
|
||||
raw["cache_version"] = "05-12-v1"
|
||||
new_ct = encrypt_field(json.dumps(raw), key, runtime_graph_cache._CACHE_AAD)
|
||||
cache_path.write_text(new_ct, encoding="ascii")
|
||||
|
||||
# try_load must reject it (legacy version stamp).
|
||||
assert runtime_graph_cache.try_load(seeded_store) is None
|
||||
165
tests/test_cli_audit.py
Normal file
165
tests/test_cli_audit.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
"""Tests for iai-mcp audit CLI (OPS-07 Plan 02-05).
|
||||
|
||||
`iai-mcp audit [--since WEEKS] [--severity SEV]` renders an identity-event
|
||||
audit log, TZ-aware timestamps, and REDACTED shield match counts (D-30
|
||||
threat T-02-05-02: leaking matched patterns in CLI output would hand the
|
||||
attacker a dictionary of what the shield is watching for).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.cli import main as cli_main
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
def test_cli_audit_empty(tmp_path, capsys, monkeypatch):
|
||||
"""No identity events -> 'No identity events recorded' message, exit 0."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
code = cli_main(["audit"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
assert (
|
||||
"no identity events" in out.lower()
|
||||
or "no events" in out.lower()
|
||||
)
|
||||
|
||||
|
||||
def test_cli_audit_renders_events(tmp_path, capsys, monkeypatch):
|
||||
"""Pre-populated events render with kind + ts (in user TZ) + severity."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(
|
||||
store, kind="s5_invariant_update",
|
||||
data={"anchor_id": "abc", "new_record_id": "def"},
|
||||
severity="info", session_id="s1",
|
||||
)
|
||||
code = cli_main(["audit"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# Kind appears.
|
||||
assert "s5_invariant_update" in out
|
||||
# Severity visible.
|
||||
assert "info" in out
|
||||
|
||||
|
||||
def test_cli_audit_since_weeks(tmp_path, capsys, monkeypatch):
|
||||
"""`audit --since=2` filters to 2-week window without crashing."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(
|
||||
store, kind="s5_invariant_update",
|
||||
data={"anchor_id": "abc"},
|
||||
severity="info", session_id="s1",
|
||||
)
|
||||
code = cli_main(["audit", "--since=2"])
|
||||
assert code == 0
|
||||
|
||||
|
||||
def test_cli_audit_severity_filter_warning_only(tmp_path, capsys, monkeypatch):
|
||||
"""`--severity=warning` filters out info-severity events."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(
|
||||
store, kind="s5_invariant_update",
|
||||
data={"anchor_id": "abc"},
|
||||
severity="info", session_id="s1",
|
||||
)
|
||||
write_event(
|
||||
store, kind="s5_drift_alert",
|
||||
data={"first_value": 0.1, "last_value": 0.5},
|
||||
severity="warning", session_id="s2",
|
||||
)
|
||||
code = cli_main(["audit", "--severity=warning"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# Warning event mentioned; info event NOT.
|
||||
assert "s5_drift_alert" in out
|
||||
assert "s5_invariant_update" not in out
|
||||
|
||||
|
||||
def test_cli_audit_shows_shield_rejections_redacted(tmp_path, capsys, monkeypatch):
|
||||
"""shield_rejection events appear but matched patterns are redacted to
|
||||
count only (not the literal words)."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(
|
||||
store, kind="shield_rejection",
|
||||
data={
|
||||
"tier": "hard_block",
|
||||
"matched": ["forget", "you are now", "override"],
|
||||
"record_id": "aabbcc",
|
||||
"action": "reject",
|
||||
},
|
||||
severity="critical", session_id="s1",
|
||||
)
|
||||
code = cli_main(["audit"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# kind visible.
|
||||
assert "shield_rejection" in out
|
||||
# matched COUNT visible (3 patterns).
|
||||
assert "3" in out or "matched_count=3" in out.replace(" ", "")
|
||||
# Literal signal words MUST NOT appear (redaction).
|
||||
assert "forget" not in out
|
||||
assert "you are now" not in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- subcommands
|
||||
|
||||
|
||||
def test_cli_audit_shield_subcommand(tmp_path, capsys, monkeypatch):
|
||||
"""`iai-mcp audit shield --since=7` returns shield events."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(
|
||||
store, kind="shield_rejection",
|
||||
data={"tier": "hard_block", "matched": ["forget"], "action": "reject"},
|
||||
severity="critical", session_id="s1",
|
||||
)
|
||||
# Exercise the subcommand; no crash is the contract.
|
||||
code = cli_main(["audit", "shield", "--since=7"])
|
||||
assert code == 0
|
||||
|
||||
|
||||
def test_cli_audit_drift_subcommand(tmp_path, capsys, monkeypatch):
|
||||
"""`iai-mcp audit drift` runs detection + surfaces present alert."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
# Seed monotonically increasing M4 variance to trigger drift.
|
||||
for i, v in enumerate([0.1, 0.2, 0.3, 0.4, 0.5]):
|
||||
write_event(
|
||||
store, kind="trajectory_metric",
|
||||
data={"metric": "m4", "value": v},
|
||||
severity="info", session_id=f"s{i}",
|
||||
)
|
||||
code = cli_main(["audit", "drift"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# Drift detected and surfaced.
|
||||
assert "drift" in out.lower()
|
||||
|
||||
|
||||
def test_cli_audit_identity_subcommand(tmp_path, capsys, monkeypatch):
|
||||
"""`iai-mcp audit identity` shows only s5_* events."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(
|
||||
store, kind="s5_invariant_update",
|
||||
data={"anchor_id": "abc"},
|
||||
severity="info", session_id="s1",
|
||||
)
|
||||
write_event(
|
||||
store, kind="shield_rejection",
|
||||
data={"tier": "hard_block", "matched": ["forget"], "action": "reject"},
|
||||
severity="critical", session_id="s2",
|
||||
)
|
||||
code = cli_main(["audit", "identity"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# s5 event present; shield_rejection filtered out.
|
||||
assert "s5_invariant_update" in out
|
||||
assert "shield_rejection" not in out
|
||||
383
tests/test_cli_crypto.py
Normal file
383
tests/test_cli_crypto.py
Normal file
|
|
@ -0,0 +1,383 @@
|
|||
"""iai-mcp crypto + iai-mcp migrate --from=2 --to=3 CLI tests.
|
||||
|
||||
Originally Plan 02-08; updated in W1 to retire the keyring
|
||||
backend in favor of a file-backed primary backend at
|
||||
`{IAI_MCP_STORE}/.crypto.key` (32 raw bytes, mode 0o600). The
|
||||
`_isolated_keyring` autouse fixture is gone — CLI tests now monkeypatch
|
||||
IAI_MCP_STORE to a tmp_path and pre-create / inspect the file directly.
|
||||
|
||||
Commands under test:
|
||||
- `iai-mcp crypto status` -> JSON-ish status of file backend + user_id
|
||||
- `iai-mcp crypto rotate` -> rotate key + re-encrypt all records
|
||||
- `iai-mcp migrate --from=2 --to=3 [--dry-run]` -> encryption migration
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import stat
|
||||
from datetime import datetime, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_cli_crypto_status_shows_file_backend(tmp_path, monkeypatch, capsys):
|
||||
"""Phase 07.10 W1 RED — `iai-mcp crypto status` reports the file backend.
|
||||
|
||||
Pre-creates a 32-byte 0o600 `.crypto.key` in the store root, calls the
|
||||
status command, asserts:
|
||||
- exit code 0
|
||||
- output mentions backend=file
|
||||
- output includes the file path (or at least its filename)
|
||||
- output exposes mode 0o600
|
||||
- NO mention of "keyring" (the backend is gone in W2)
|
||||
|
||||
RED until W2: cmd_crypto_status still emits keyring fields + has no
|
||||
`backend: file` shape.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(secrets.token_bytes(32))
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
from iai_mcp.cli import cmd_crypto_status
|
||||
|
||||
args = argparse.Namespace(user_id="default")
|
||||
exit_code = cmd_crypto_status(args)
|
||||
out = capsys.readouterr().out
|
||||
out_lower = out.lower()
|
||||
assert exit_code == 0
|
||||
assert "default" in out
|
||||
# New file-backend output contract:
|
||||
assert "file" in out_lower, f"status must report backend=file; got:\n{out}"
|
||||
assert ".crypto.key" in out, f"status must include the file path; got:\n{out}"
|
||||
assert "600" in out, f"status must expose mode 0o600; got:\n{out}"
|
||||
# The keyring shape is gone in W2:
|
||||
assert "keyring" not in out_lower, (
|
||||
f"status must NOT mention keyring (backend retired in 07.10); got:\n{out}"
|
||||
)
|
||||
|
||||
|
||||
def test_cli_crypto_rotate_regenerates_key(tmp_path, monkeypatch, capsys):
|
||||
"""Phase 07.10 W1 RED — `iai-mcp crypto rotate` writes a fresh key to the
|
||||
file backend AND re-encrypts records under the new key.
|
||||
|
||||
Pre-creates a `.crypto.key` (key A) at 0o600, seeds a record encrypted
|
||||
under key A, calls rotate, asserts:
|
||||
- the file now contains different 32 bytes at mode 0o600
|
||||
- the seeded record's ciphertext was re-encrypted (different blob,
|
||||
still iai:enc:v1: prefixed, decrypts to the original plaintext
|
||||
through the rotated wrapper)
|
||||
|
||||
RED until W2/W3 ship the file-backend + cache-invalidate fix.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
|
||||
|
||||
# Seed key A in the file backend.
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_a = secrets.token_bytes(32)
|
||||
key_path.write_bytes(key_a)
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
from iai_mcp.cli import cmd_crypto_rotate
|
||||
from iai_mcp.store import MemoryStore, RECORDS_TABLE
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
# Seed a record under the initial key.
|
||||
store = MemoryStore()
|
||||
rec = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface="rotation test content",
|
||||
aaak_index="",
|
||||
embedding=[0.1] * EMBED_DIM,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=datetime.now(timezone.utc),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
tags=[],
|
||||
language="en",
|
||||
)
|
||||
store.insert(rec)
|
||||
initial_ct = store.db.open_table(RECORDS_TABLE).to_pandas()[
|
||||
lambda df: df["id"] == str(rec.id)
|
||||
].iloc[0]["literal_surface"]
|
||||
assert initial_ct.startswith("iai:enc:v1:")
|
||||
|
||||
args = argparse.Namespace(user_id="default")
|
||||
exit_code = cmd_crypto_rotate(args)
|
||||
out = capsys.readouterr().out
|
||||
assert exit_code == 0
|
||||
assert "rotat" in out.lower()
|
||||
|
||||
# File backend invariant: the key file now holds different 32 bytes
|
||||
# at mode 0o600.
|
||||
new_key_bytes = key_path.read_bytes()
|
||||
assert len(new_key_bytes) == 32
|
||||
assert new_key_bytes != key_a, "rotate must write a fresh key to the file"
|
||||
mode = stat.S_IMODE(os.stat(key_path).st_mode)
|
||||
assert mode == 0o600, f"rotated key file must be 0o600, got 0o{mode:03o}"
|
||||
|
||||
# Data invariant: the seeded record was re-encrypted under the new key.
|
||||
# store2 picks up the rotated key from the file backend; the AESGCM
|
||||
# wrapper cache is freshly built from the new key.
|
||||
store2 = MemoryStore()
|
||||
post_ct = store2.db.open_table(RECORDS_TABLE).to_pandas()[
|
||||
lambda df: df["id"] == str(rec.id)
|
||||
].iloc[0]["literal_surface"]
|
||||
assert post_ct.startswith("iai:enc:v1:")
|
||||
assert post_ct != initial_ct # Re-encrypted under a new key.
|
||||
# Content round-trip still works through the rotated key.
|
||||
got = store2.get(rec.id)
|
||||
assert got is not None
|
||||
assert got.literal_surface == "rotation test content"
|
||||
|
||||
|
||||
def test_cli_migrate_to_3_dry_run_counts_plaintext_rows(tmp_path, monkeypatch, capsys):
|
||||
"""iai-mcp migrate --from=2 --to=3 --dry-run prints a plaintext-row count."""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.cli import cmd_migrate
|
||||
from iai_mcp.store import MemoryStore, RECORDS_TABLE
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
store = MemoryStore()
|
||||
# Forcibly add a PLAINTEXT row directly to the table (bypass insert()'s encryption).
|
||||
rid = uuid4()
|
||||
row = {
|
||||
"id": str(rid),
|
||||
"tier": "episodic",
|
||||
"literal_surface": "plain legacy",
|
||||
"aaak_index": "",
|
||||
"embedding": [0.1] * EMBED_DIM,
|
||||
"structure_hv": b"",
|
||||
"community_id": "",
|
||||
"centrality": 0.0,
|
||||
"detail_level": 2,
|
||||
"pinned": False,
|
||||
"stability": 0.0,
|
||||
"difficulty": 0.0,
|
||||
"last_reviewed": None,
|
||||
"never_decay": False,
|
||||
"never_merge": False,
|
||||
"provenance_json": json.dumps([{"ts": "x", "cue": "y", "session_id": "z"}]),
|
||||
"created_at": datetime.now(timezone.utc),
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
"tags_json": json.dumps([]),
|
||||
"language": "en",
|
||||
"s5_trust_score": 0.5,
|
||||
"profile_modulation_gain_json": json.dumps({}),
|
||||
"schema_version": 2,
|
||||
}
|
||||
store.db.open_table(RECORDS_TABLE).add([row])
|
||||
|
||||
args = argparse.Namespace(from_=2, to=3, dry_run=True, verbose=False)
|
||||
exit_code = cmd_migrate(args)
|
||||
out = capsys.readouterr().out
|
||||
assert exit_code == 0
|
||||
# Output mentions a record count + the word migrate/would.
|
||||
assert "would" in out.lower() or "dry" in out.lower() or "migrat" in out.lower()
|
||||
assert "1" in out # We planted exactly one plaintext row.
|
||||
|
||||
|
||||
def test_cli_migrate_to_3_encrypts_plaintext_rows(tmp_path, monkeypatch, capsys):
|
||||
"""`iai-mcp migrate --from=2 --to=3` actually encrypts plaintext rows."""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.cli import cmd_migrate
|
||||
from iai_mcp.store import MemoryStore, RECORDS_TABLE
|
||||
from iai_mcp.types import EMBED_DIM
|
||||
|
||||
store = MemoryStore()
|
||||
rid = uuid4()
|
||||
row = {
|
||||
"id": str(rid),
|
||||
"tier": "episodic",
|
||||
"literal_surface": "still-plaintext",
|
||||
"aaak_index": "",
|
||||
"embedding": [0.1] * EMBED_DIM,
|
||||
"structure_hv": b"",
|
||||
"community_id": "",
|
||||
"centrality": 0.0,
|
||||
"detail_level": 2,
|
||||
"pinned": False,
|
||||
"stability": 0.0,
|
||||
"difficulty": 0.0,
|
||||
"last_reviewed": None,
|
||||
"never_decay": False,
|
||||
"never_merge": False,
|
||||
"provenance_json": json.dumps([]),
|
||||
"created_at": datetime.now(timezone.utc),
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
"tags_json": json.dumps([]),
|
||||
"language": "en",
|
||||
"s5_trust_score": 0.5,
|
||||
"profile_modulation_gain_json": json.dumps({}),
|
||||
"schema_version": 2,
|
||||
}
|
||||
store.db.open_table(RECORDS_TABLE).add([row])
|
||||
|
||||
args = argparse.Namespace(from_=2, to=3, dry_run=False, verbose=False)
|
||||
exit_code = cmd_migrate(args)
|
||||
assert exit_code == 0
|
||||
|
||||
df = store.db.open_table(RECORDS_TABLE).to_pandas()
|
||||
post = df[df["id"] == str(rid)].iloc[0]
|
||||
assert post["literal_surface"].startswith("iai:enc:v1:")
|
||||
|
||||
|
||||
def test_cli_migrate_to_3_rejects_unsupported_version_pair(
|
||||
tmp_path, monkeypatch, capsys
|
||||
):
|
||||
"""--from=9 --to=42 is rejected with a clear error + non-zero exit."""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.cli import cmd_migrate
|
||||
|
||||
args = argparse.Namespace(from_=9, to=42, dry_run=False, verbose=False)
|
||||
exit_code = cmd_migrate(args)
|
||||
err = capsys.readouterr().err.lower()
|
||||
out = capsys.readouterr().out.lower()
|
||||
assert exit_code != 0
|
||||
# Some guidance in stderr or stdout.
|
||||
assert ("unsupported" in err or "invalid" in err or
|
||||
"unsupported" in out or "invalid" in out)
|
||||
|
||||
|
||||
def test_neural_map_bench_passes_after_encryption(tmp_path):
|
||||
"""bench/neural_map N=100 must still pass <100ms p95 post-encryption."""
|
||||
from bench.neural_map import run_neural_map_bench, D_SPEED_P95_MS
|
||||
|
||||
out = run_neural_map_bench(n=100, iterations=10, store_path=tmp_path, seed=0)
|
||||
assert out["n"] == 100
|
||||
assert out["iterations"] == 10
|
||||
assert out["passed"] is True, (
|
||||
f"D-SPEED regression post-encryption: p95={out['latency_ms_p95']} ms "
|
||||
f">= {D_SPEED_P95_MS} ms"
|
||||
)
|
||||
|
||||
|
||||
def test_cli_crypto_init_creates_fresh_file(tmp_path, monkeypatch, capsys):
|
||||
"""Phase 07.10 `iai-mcp crypto init` creates a fresh 32-byte 0o600 file.
|
||||
|
||||
No file pre-existing; no keyring needed; resulting file must be exactly
|
||||
32 bytes at mode 0o600, exit 0, output cites the path. The key bytes
|
||||
themselves MUST NOT appear in stdout.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
assert not key_path.exists()
|
||||
|
||||
from iai_mcp.cli import cmd_crypto_init
|
||||
|
||||
args = argparse.Namespace(user_id="default")
|
||||
exit_code = cmd_crypto_init(args)
|
||||
out = capsys.readouterr().out
|
||||
assert exit_code == 0
|
||||
|
||||
assert key_path.exists()
|
||||
assert key_path.stat().st_size == 32
|
||||
mode = stat.S_IMODE(os.stat(key_path).st_mode)
|
||||
assert mode == 0o600, f"init key file must be 0o600, got 0o{mode:03o}"
|
||||
# Output cites the path so the user knows where the key lives.
|
||||
assert ".crypto.key" in out
|
||||
# The 32 raw key bytes MUST NOT appear in the output (D-09 — no key disclosure).
|
||||
raw = key_path.read_bytes()
|
||||
# Stdout is decoded; a binary blob would not round-trip cleanly. Sanity:
|
||||
# check that no run of >=4 raw bytes appears in stdout.
|
||||
for i in range(0, 32, 4):
|
||||
chunk = raw[i:i + 4]
|
||||
# Skip null-padded windows that could trivially collide with text.
|
||||
if chunk == b"\x00\x00\x00\x00":
|
||||
continue
|
||||
assert chunk.decode("latin-1") not in out, (
|
||||
"init must not print key bytes to stdout"
|
||||
)
|
||||
|
||||
|
||||
def test_cli_crypto_init_refuses_when_file_exists(tmp_path, monkeypatch, capsys):
|
||||
"""Phase 07.10 `iai-mcp crypto init` refuses if `.crypto.key` exists.
|
||||
|
||||
Pre-create any-content file at the canonical path; `init` must exit 1
|
||||
with an error pointing at the path. File contents must be unchanged.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
pre = secrets.token_bytes(32)
|
||||
key_path.write_bytes(pre)
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
from iai_mcp.cli import cmd_crypto_init
|
||||
|
||||
args = argparse.Namespace(user_id="default")
|
||||
exit_code = cmd_crypto_init(args)
|
||||
err = capsys.readouterr().err
|
||||
assert exit_code == 1
|
||||
assert ".crypto.key" in err
|
||||
# File contents unchanged.
|
||||
assert key_path.read_bytes() == pre
|
||||
|
||||
|
||||
def test_cli_crypto_rotate_invalidates_aesgcm_cache(tmp_path, monkeypatch):
|
||||
"""Phase 07.10 / T-07.10-08 — `cmd_crypto_rotate` MUST invalidate the
|
||||
cached AESGCM after writing the fresh key.
|
||||
|
||||
The rotate test above (`test_cli_crypto_rotate_regenerates_key`) reads
|
||||
post-rotate state via a fresh `MemoryStore()` which sidesteps the cache
|
||||
entirely; removing the hook would not break it. This test pins the hook
|
||||
directly via `unittest.mock.patch.object` so a future refactor that drops
|
||||
the `store._invalidate_aesgcm_cache()` line is caught immediately.
|
||||
"""
|
||||
import argparse
|
||||
from unittest.mock import patch
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
monkeypatch.delenv("IAI_MCP_CRYPTO_PASSPHRASE", raising=False)
|
||||
|
||||
# Seed a key file so the rotate path proceeds normally.
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(secrets.token_bytes(32))
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
from iai_mcp.cli import cmd_crypto_rotate
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
args = argparse.Namespace(user_id="default")
|
||||
with patch.object(
|
||||
MemoryStore, "_invalidate_aesgcm_cache", autospec=True
|
||||
) as m:
|
||||
exit_code = cmd_crypto_rotate(args)
|
||||
|
||||
assert exit_code == 0
|
||||
assert m.called, (
|
||||
"cmd_crypto_rotate must call store._invalidate_aesgcm_cache() "
|
||||
"after assigning the new key (Phase 07.10 D-10, T-07.10-08)"
|
||||
)
|
||||
114
tests/test_cli_crypto_redact.py
Normal file
114
tests/test_cli_crypto_redact.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
"""CLI + migrate_redact_undecryptable_records tests."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.migrate import migrate_redact_undecryptable_records
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord, SCHEMA_VERSION_CURRENT
|
||||
|
||||
|
||||
def _minimal_record(literal: str) -> MemoryRecord:
|
||||
rid = uuid4()
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=rid,
|
||||
tier="episodic",
|
||||
literal_surface=literal,
|
||||
aaak_index="",
|
||||
embedding=[0.02] * 384,
|
||||
structure_hv=b"\x00" * 1250,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=1,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["t1"],
|
||||
language="en",
|
||||
s5_trust_score=0.5,
|
||||
profile_modulation_gain={},
|
||||
schema_version=SCHEMA_VERSION_CURRENT,
|
||||
)
|
||||
|
||||
|
||||
def test_redact_makes_literal_decryptable_and_idempotent(tmp_path: Path) -> None:
|
||||
root = tmp_path / "redact-store"
|
||||
root.mkdir()
|
||||
key_a = secrets.token_bytes(32)
|
||||
key_b = secrets.token_bytes(32)
|
||||
kpath = root / ".crypto.key"
|
||||
kpath.write_bytes(key_a)
|
||||
os.chmod(kpath, 0o600)
|
||||
store_a = MemoryStore(path=root, user_id="default")
|
||||
rec = _minimal_record("secret-surface")
|
||||
store_a.insert(rec)
|
||||
rid = rec.id
|
||||
del store_a
|
||||
|
||||
kpath.write_bytes(key_b)
|
||||
os.chmod(kpath, 0o600)
|
||||
store_b = MemoryStore(path=root, user_id="default")
|
||||
out = migrate_redact_undecryptable_records(store_b)
|
||||
assert out["redacted"] == 1
|
||||
assert out["skipped_plain"] == 0
|
||||
|
||||
got = store_b.get(rid)
|
||||
assert got is not None
|
||||
assert got.literal_surface.startswith("<REDACTED:")
|
||||
|
||||
out2 = migrate_redact_undecryptable_records(store_b)
|
||||
assert out2["redacted"] == 0
|
||||
assert out2["skipped_ok"] >= 1
|
||||
|
||||
|
||||
def test_cli_crypto_redact_undecryptable_smoke(tmp_path: Path) -> None:
|
||||
root = tmp_path / "cli-redact"
|
||||
root.mkdir()
|
||||
key_a = secrets.token_bytes(32)
|
||||
key_b = secrets.token_bytes(32)
|
||||
kpath = root / ".crypto.key"
|
||||
kpath.write_bytes(key_a)
|
||||
os.chmod(kpath, 0o600)
|
||||
store_a = MemoryStore(path=root, user_id="default")
|
||||
store_a.insert(_minimal_record("cli-redact-body"))
|
||||
del store_a
|
||||
kpath.write_bytes(key_b)
|
||||
os.chmod(kpath, 0o600)
|
||||
|
||||
env = {**os.environ, "IAI_MCP_STORE": str(root.resolve())}
|
||||
proc = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"iai_mcp.cli",
|
||||
"crypto",
|
||||
"redact-undecryptable",
|
||||
"--user-id",
|
||||
"default",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(Path(__file__).resolve().parents[1]),
|
||||
env=env,
|
||||
check=False,
|
||||
)
|
||||
assert proc.returncode == 0, proc.stderr + proc.stdout
|
||||
payload = json.loads(proc.stdout.strip())
|
||||
assert payload.get("redacted") == 1
|
||||
750
tests/test_cli_daemon.py
Normal file
750
tests/test_cli_daemon.py
Normal file
|
|
@ -0,0 +1,750 @@
|
|||
"""Plan 04-05 -- iai-mcp daemon subcommand group tests (DAEMON-10 + DAEMON-12).
|
||||
|
||||
Verifies dispatcher wiring, install/uninstall flow with consent banner,
|
||||
launchd / systemd template rendering with sys.executable substitution
|
||||
(Pitfall 5), version skew detection in `daemon status`, and C4 clean uninstall
|
||||
(removes plist/unit + all 3 state files).
|
||||
|
||||
All subprocess calls (launchctl, systemctl, loginctl, tail, journalctl) are
|
||||
monkeypatched so the suite never touches the host's actual launchd/systemd.
|
||||
|
||||
Socket-talking subcommands (status / force-rem / pause / logs) are exercised
|
||||
against the `_ThreadedFakeDaemon` helper (lifted from
|
||||
tests/test_core_bedtime_inject.py pattern -- a fake daemon that survives
|
||||
multiple asyncio.run() teardowns by running on a dedicated background loop).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
from contextlib import redirect_stdout, redirect_stderr
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import cli as cli_mod
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Threaded fake daemon (survives multiple asyncio.run teardowns)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _ThreadedFakeDaemon:
|
||||
"""Fake daemon NDJSON server on a background loop.
|
||||
|
||||
Each request line is captured. Each request gets `reply` written back
|
||||
(or a per-request reply via `reply_fn(req)` if provided).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: Path,
|
||||
captured: list,
|
||||
reply: dict | None = None,
|
||||
reply_fn=None,
|
||||
) -> None:
|
||||
self.path = path
|
||||
self.captured = captured
|
||||
self.reply = reply
|
||||
self.reply_fn = reply_fn
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
self._server: asyncio.AbstractServer | None = None
|
||||
self._thread: threading.Thread | None = None
|
||||
self._ready = threading.Event()
|
||||
|
||||
def start(self) -> None:
|
||||
def _run() -> None:
|
||||
self._loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self._loop)
|
||||
|
||||
async def _handle(reader, writer):
|
||||
try:
|
||||
line = await reader.readline()
|
||||
if line:
|
||||
req = json.loads(line.decode("utf-8"))
|
||||
self.captured.append(req)
|
||||
if self.reply_fn is not None:
|
||||
resp = self.reply_fn(req)
|
||||
else:
|
||||
resp = self.reply or {}
|
||||
writer.write((json.dumps(resp) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
finally:
|
||||
try:
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _serve():
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._server = await asyncio.start_unix_server(
|
||||
_handle, path=str(self.path),
|
||||
)
|
||||
self._ready.set()
|
||||
async with self._server:
|
||||
await self._server.serve_forever()
|
||||
|
||||
try:
|
||||
self._loop.run_until_complete(_serve())
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
finally:
|
||||
self._loop.close()
|
||||
|
||||
self._thread = threading.Thread(target=_run, daemon=True)
|
||||
self._thread.start()
|
||||
assert self._ready.wait(timeout=5.0), "fake daemon failed to start"
|
||||
|
||||
def stop(self) -> None:
|
||||
loop = self._loop
|
||||
if loop is None:
|
||||
return
|
||||
|
||||
async def _shutdown():
|
||||
if self._server is not None:
|
||||
self._server.close()
|
||||
await self._server.wait_closed()
|
||||
|
||||
try:
|
||||
asyncio.run_coroutine_threadsafe(_shutdown(), loop).result(timeout=5.0)
|
||||
except Exception:
|
||||
pass
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=5.0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def short_socket(tmp_path: Path) -> Path:
|
||||
"""Short unix-socket path (macOS ~104-byte limit)."""
|
||||
candidate = tmp_path / "d.sock"
|
||||
if len(str(candidate)) > 100:
|
||||
candidate = Path(tempfile.mkdtemp(prefix="iai-clitest-")) / "d.sock"
|
||||
return candidate
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_state_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""Redirect ~/.iai-mcp + ~/Library/LaunchAgents + ~/.config/systemd/user
|
||||
to tmp_path-rooted equivalents, so install/uninstall never touches the
|
||||
real host filesystem."""
|
||||
fake_home = tmp_path / "home"
|
||||
fake_home.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
monkeypatch.setattr(Path, "home", classmethod(lambda cls: fake_home))
|
||||
# Re-resolve the constants after Path.home() is patched.
|
||||
monkeypatch.setattr(
|
||||
cli_mod, "LOCK_PATH", fake_home / ".iai-mcp" / ".lock",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
cli_mod, "SOCKET_PATH", fake_home / ".iai-mcp" / ".daemon.sock",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
cli_mod, "STATE_PATH", fake_home / ".iai-mcp" / ".daemon-state.json",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
cli_mod,
|
||||
"LAUNCHD_TARGET",
|
||||
fake_home / "Library" / "LaunchAgents" / "com.iai-mcp.daemon.plist",
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
cli_mod,
|
||||
"SYSTEMD_TARGET",
|
||||
fake_home / ".config" / "systemd" / "user" / "iai-mcp-daemon.service",
|
||||
)
|
||||
return fake_home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: dry-run does NOT write any file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_install_dry_run_writes_no_file(
|
||||
fake_state_dir: Path,
|
||||
capsys: pytest.CaptureFixture,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
rc = cli_mod.main(["daemon", "install", "--dry-run", "--yes"])
|
||||
assert rc == 0
|
||||
assert not cli_mod.LAUNCHD_TARGET.exists()
|
||||
out = capsys.readouterr().out
|
||||
assert "Would install to" in out
|
||||
# sys.executable is substituted in dry-run output
|
||||
assert sys.executable in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: install on macOS writes plist with sys.executable + invokes launchctl
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_install_macos_writes_plist_with_sys_executable(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
calls: list[list[str]] = []
|
||||
|
||||
def _fake_run(argv, **kwargs):
|
||||
calls.append(list(argv))
|
||||
class _R:
|
||||
returncode = 0
|
||||
stdout = ""
|
||||
stderr = ""
|
||||
return _R()
|
||||
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", _fake_run)
|
||||
|
||||
rc = cli_mod.main(["daemon", "install", "--yes"])
|
||||
assert rc == 0
|
||||
assert cli_mod.LAUNCHD_TARGET.exists()
|
||||
contents = cli_mod.LAUNCHD_TARGET.read_text()
|
||||
# Pitfall 5: absolute sys.executable substituted into plist
|
||||
assert sys.executable in contents
|
||||
# USERNAME placeholder substituted (not present literally)
|
||||
assert "{USERNAME}" not in contents
|
||||
# launchctl bootstrap + kickstart called
|
||||
assert any("bootstrap" in " ".join(c) for c in calls), calls
|
||||
assert any("kickstart" in " ".join(c) for c in calls), calls
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: install on Linux writes systemd unit + invokes systemctl + loginctl
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_install_linux_writes_unit_and_invokes_systemctl(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Linux")
|
||||
monkeypatch.setenv("USER", "testuser")
|
||||
calls: list[list[str]] = []
|
||||
|
||||
def _fake_run(argv, **kwargs):
|
||||
calls.append(list(argv))
|
||||
class _R:
|
||||
returncode = 0
|
||||
# Simulate Linger=no on the first show-user, then Linger=yes after enable
|
||||
_show_count = [0]
|
||||
stdout = (
|
||||
"Linger=no" if argv[:2] == ["loginctl", "show-user"]
|
||||
else ""
|
||||
)
|
||||
stderr = ""
|
||||
return _R()
|
||||
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", _fake_run)
|
||||
|
||||
rc = cli_mod.main(["daemon", "install", "--yes"])
|
||||
assert rc == 0
|
||||
assert cli_mod.SYSTEMD_TARGET.exists()
|
||||
contents = cli_mod.SYSTEMD_TARGET.read_text()
|
||||
assert sys.executable in contents
|
||||
# loginctl invoked at least twice (show + enable + re-verify)
|
||||
loginctl_calls = [c for c in calls if c and c[0] == "loginctl"]
|
||||
assert len(loginctl_calls) >= 2, loginctl_calls
|
||||
# systemctl --user daemon-reload AND enable --now invoked
|
||||
cmd_strs = [" ".join(c) for c in calls]
|
||||
assert any("systemctl --user daemon-reload" in s for s in cmd_strs), cmd_strs
|
||||
assert any("systemctl --user enable --now iai-mcp-daemon.service" in s for s in cmd_strs), cmd_strs
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: consent banner blocks on stdin; non-`y` responses abort
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_install_without_yes_prompts_consent_banner_aborts(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
# Don't actually call subprocess
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})(),
|
||||
)
|
||||
|
||||
# Strict gate: ONLY exact lowercase "y" (after .strip()) proceeds.
|
||||
# Everything else -- empty, "n", "N", "yes", "no", "true", numeric -- aborts.
|
||||
for response in ["", "n", "N", "yes", "no", "true", "1", "0", "yeah", "nope"]:
|
||||
monkeypatch.setattr(
|
||||
"builtins.input", lambda _prompt="", r=response: r,
|
||||
)
|
||||
rc = cli_mod.main(["daemon", "install"])
|
||||
assert rc == 1, f"non-strict-y response {response!r} should abort"
|
||||
# State file should not exist (install did not proceed)
|
||||
assert not cli_mod.LAUNCHD_TARGET.exists()
|
||||
|
||||
err = capsys.readouterr().err
|
||||
# Banner must mention key phrases.
|
||||
# Banner phrasing was updated 2026-04-19 (Plan 05-08 bge-small-en pivot):
|
||||
# "rises to ~2 GB if the opt-in bge-m3 model is selected" — with space.
|
||||
assert "~2 GB" in err or "2 GB" in err
|
||||
assert "1%" in err
|
||||
assert "iai-mcp daemon uninstall" in err
|
||||
|
||||
|
||||
def test_install_with_lowercase_y_proceeds(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
monkeypatch.setattr("builtins.input", lambda _prompt="": "y")
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
|
||||
rc = cli_mod.main(["daemon", "install"])
|
||||
assert rc == 0
|
||||
assert cli_mod.LAUNCHD_TARGET.exists()
|
||||
|
||||
|
||||
def test_install_consent_records_audit_trail(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""D-10 audit trail: explicit consent writes a timestamped JSON receipt
|
||||
under ~/.iai-mcp/.consent-*.json so a later forensic review can confirm
|
||||
the user actually consented (not bypassed via --yes)."""
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
monkeypatch.setattr("builtins.input", lambda _prompt="": "y")
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
|
||||
rc = cli_mod.main(["daemon", "install"])
|
||||
assert rc == 0
|
||||
consent_files = list((fake_state_dir / ".iai-mcp").glob(".consent-*.json"))
|
||||
assert consent_files, "expected at least one .consent-<ts>.json audit receipt"
|
||||
payload = json.loads(consent_files[0].read_text())
|
||||
assert payload.get("consent") is True
|
||||
assert "ts" in payload
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: macOS uninstall removes plist + all 3 state files
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_uninstall_macos_removes_plist_and_all_state_files(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
|
||||
|
||||
# Pre-seed the plist + 3 state files
|
||||
cli_mod.LAUNCHD_TARGET.parent.mkdir(parents=True, exist_ok=True)
|
||||
cli_mod.LAUNCHD_TARGET.write_text("<plist></plist>")
|
||||
state_dir = fake_state_dir / ".iai-mcp"
|
||||
state_dir.mkdir(parents=True, exist_ok=True)
|
||||
cli_mod.LOCK_PATH.write_text("")
|
||||
cli_mod.SOCKET_PATH.write_text("")
|
||||
cli_mod.STATE_PATH.write_text("{}")
|
||||
|
||||
rc = cli_mod.main(["daemon", "uninstall", "--yes"])
|
||||
assert rc == 0
|
||||
# C4 invariant: all 4 artefacts gone
|
||||
assert not cli_mod.LAUNCHD_TARGET.exists()
|
||||
assert not cli_mod.LOCK_PATH.exists()
|
||||
assert not cli_mod.SOCKET_PATH.exists()
|
||||
assert not cli_mod.STATE_PATH.exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 6: Linux uninstall removes unit + all 3 state files
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_uninstall_linux_removes_unit_and_all_state_files(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Linux")
|
||||
calls: list[list[str]] = []
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})()),
|
||||
)
|
||||
|
||||
cli_mod.SYSTEMD_TARGET.parent.mkdir(parents=True, exist_ok=True)
|
||||
cli_mod.SYSTEMD_TARGET.write_text("[Service]")
|
||||
state_dir = fake_state_dir / ".iai-mcp"
|
||||
state_dir.mkdir(parents=True, exist_ok=True)
|
||||
cli_mod.LOCK_PATH.write_text("")
|
||||
cli_mod.SOCKET_PATH.write_text("")
|
||||
cli_mod.STATE_PATH.write_text("{}")
|
||||
|
||||
rc = cli_mod.main(["daemon", "uninstall", "--yes"])
|
||||
assert rc == 0
|
||||
assert not cli_mod.SYSTEMD_TARGET.exists()
|
||||
assert not cli_mod.LOCK_PATH.exists()
|
||||
assert not cli_mod.SOCKET_PATH.exists()
|
||||
assert not cli_mod.STATE_PATH.exists()
|
||||
cmd_strs = [" ".join(c) for c in calls]
|
||||
assert any("systemctl --user disable --now iai-mcp-daemon.service" in s for s in cmd_strs), cmd_strs
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 7: status round-trip + daemon-down message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_status_socket_round_trip(
|
||||
short_socket: Path,
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
|
||||
captured: list[dict] = []
|
||||
daemon = _ThreadedFakeDaemon(
|
||||
short_socket,
|
||||
captured,
|
||||
reply={
|
||||
"ok": True,
|
||||
"state": "WAKE",
|
||||
"uptime_sec": 42.5,
|
||||
"version": "0.1.0",
|
||||
},
|
||||
)
|
||||
daemon.start()
|
||||
try:
|
||||
rc = cli_mod.main(["daemon", "status"])
|
||||
assert rc == 0
|
||||
finally:
|
||||
daemon.stop()
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "WAKE" in out
|
||||
assert "42" in out
|
||||
# request was sent
|
||||
assert captured == [{"type": "status"}]
|
||||
|
||||
|
||||
def test_status_daemon_down(
|
||||
short_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
|
||||
assert not short_socket.exists()
|
||||
rc = cli_mod.main(["daemon", "status"])
|
||||
assert rc == 1
|
||||
out = capsys.readouterr().out
|
||||
assert "daemon not running" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 8: status version skew warns when daemon != installed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_status_warns_on_version_skew(
|
||||
short_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
|
||||
captured: list[dict] = []
|
||||
daemon = _ThreadedFakeDaemon(
|
||||
short_socket,
|
||||
captured,
|
||||
reply={
|
||||
"ok": True,
|
||||
"state": "WAKE",
|
||||
"version": "0.0.1-OLD",
|
||||
},
|
||||
)
|
||||
daemon.start()
|
||||
try:
|
||||
rc = cli_mod.main(["daemon", "status"])
|
||||
assert rc == 0
|
||||
finally:
|
||||
daemon.stop()
|
||||
|
||||
err = capsys.readouterr().err
|
||||
assert "version" in err.lower()
|
||||
assert "0.0.1-OLD" in err
|
||||
assert "restart" in err.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 9: configure subcommands persist to state file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_configure_set_budget_persists(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
# daemon_state.STATE_PATH must mirror our fake home for save_state to land
|
||||
# in the right place. We patch BOTH cli_mod.STATE_PATH AND the daemon_state
|
||||
# module's constant in one shot.
|
||||
from iai_mcp import daemon_state
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", cli_mod.STATE_PATH)
|
||||
cli_mod.STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
rc = cli_mod.main(["daemon", "configure", "set-budget", "0.02"])
|
||||
assert rc == 0
|
||||
state = json.loads(cli_mod.STATE_PATH.read_text())
|
||||
assert state["daily_quota_pct_override"] == pytest.approx(0.02)
|
||||
|
||||
|
||||
def test_configure_set_cycle_count_persists(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from iai_mcp import daemon_state
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", cli_mod.STATE_PATH)
|
||||
cli_mod.STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
rc = cli_mod.main(["daemon", "configure", "set-cycle-count", "5"])
|
||||
assert rc == 0
|
||||
state = json.loads(cli_mod.STATE_PATH.read_text())
|
||||
assert state["cycle_count_override"] == 5
|
||||
|
||||
|
||||
def test_configure_disable_host_persists(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
from iai_mcp import daemon_state
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", cli_mod.STATE_PATH)
|
||||
cli_mod.STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
rc = cli_mod.main(["daemon", "configure", "disable-claude"])
|
||||
assert rc == 0
|
||||
state = json.loads(cli_mod.STATE_PATH.read_text())
|
||||
assert state["claude_enabled"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 10: force-rem socket message
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_force_rem_sends_correct_message(
|
||||
short_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
|
||||
captured: list[dict] = []
|
||||
daemon = _ThreadedFakeDaemon(
|
||||
short_socket, captured, reply={"ok": True, "cycles_completed": 1},
|
||||
)
|
||||
daemon.start()
|
||||
try:
|
||||
rc = cli_mod.main(["daemon", "force-rem"])
|
||||
assert rc == 0
|
||||
finally:
|
||||
daemon.stop()
|
||||
assert captured == [{"type": "force_rem"}]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 11: pause N
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_pause_sends_seconds_arg(
|
||||
short_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
|
||||
captured: list[dict] = []
|
||||
daemon = _ThreadedFakeDaemon(short_socket, captured, reply={"ok": True})
|
||||
daemon.start()
|
||||
try:
|
||||
rc = cli_mod.main(["daemon", "pause", "300"])
|
||||
assert rc == 0
|
||||
finally:
|
||||
daemon.stop()
|
||||
assert captured == [{"type": "pause", "seconds": 300}]
|
||||
|
||||
|
||||
def test_resume_sends_resume_message(
|
||||
short_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(cli_mod, "SOCKET_PATH", short_socket)
|
||||
captured: list[dict] = []
|
||||
daemon = _ThreadedFakeDaemon(short_socket, captured, reply={"ok": True})
|
||||
daemon.start()
|
||||
try:
|
||||
rc = cli_mod.main(["daemon", "resume"])
|
||||
assert rc == 0
|
||||
finally:
|
||||
daemon.stop()
|
||||
assert captured == [{"type": "resume"}]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 12: start / stop dispatch correct argv on each platform
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_start_macos_uses_launchctl_kickstart(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
calls: list[list[str]] = []
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
|
||||
)
|
||||
rc = cli_mod.main(["daemon", "start"])
|
||||
assert rc == 0
|
||||
cmd_strs = [" ".join(c) for c in calls]
|
||||
assert any("launchctl kickstart" in s for s in cmd_strs), cmd_strs
|
||||
|
||||
|
||||
def test_stop_macos_uses_launchctl_kill_sigterm(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
calls: list[list[str]] = []
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
|
||||
)
|
||||
rc = cli_mod.main(["daemon", "stop"])
|
||||
assert rc == 0
|
||||
cmd_strs = [" ".join(c) for c in calls]
|
||||
assert any("launchctl kill SIGTERM" in s for s in cmd_strs), cmd_strs
|
||||
|
||||
|
||||
def test_start_linux_uses_systemctl_start(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Linux")
|
||||
calls: list[list[str]] = []
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
|
||||
)
|
||||
rc = cli_mod.main(["daemon", "start"])
|
||||
assert rc == 0
|
||||
assert any(c[:4] == ["systemctl", "--user", "start", "iai-mcp-daemon.service"] for c in calls), calls
|
||||
|
||||
|
||||
def test_stop_linux_uses_systemctl_stop(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Linux")
|
||||
calls: list[list[str]] = []
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
|
||||
)
|
||||
rc = cli_mod.main(["daemon", "stop"])
|
||||
assert rc == 0
|
||||
assert any(c[:4] == ["systemctl", "--user", "stop", "iai-mcp-daemon.service"] for c in calls), calls
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 13: logs dispatches tail (macOS) or journalctl (Linux)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_logs_macos_invokes_tail(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
calls: list[list[str]] = []
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
|
||||
)
|
||||
rc = cli_mod.main(["daemon", "logs", "-n", "50"])
|
||||
assert rc == 0
|
||||
assert any(c and c[0] == "tail" for c in calls), calls
|
||||
|
||||
|
||||
def test_logs_linux_invokes_journalctl(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Linux")
|
||||
calls: list[list[str]] = []
|
||||
monkeypatch.setattr(
|
||||
cli_mod.subprocess,
|
||||
"run",
|
||||
lambda argv, **k: (calls.append(list(argv)) or type("R", (), {"returncode": 0})()),
|
||||
)
|
||||
rc = cli_mod.main(["daemon", "logs", "-n", "100"])
|
||||
assert rc == 0
|
||||
assert any(
|
||||
c[:5] == ["journalctl", "--user", "-u", "iai-mcp-daemon.service", "-n"]
|
||||
for c in calls
|
||||
), calls
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Idempotency: install + install does not error
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_install_twice_is_idempotent(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
|
||||
assert cli_mod.main(["daemon", "install", "--yes"]) == 0
|
||||
assert cli_mod.main(["daemon", "install", "--yes"]) == 0
|
||||
assert cli_mod.LAUNCHD_TARGET.exists()
|
||||
|
||||
|
||||
def test_uninstall_twice_is_idempotent(
|
||||
fake_state_dir: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", lambda *a, **k: type("R", (), {"returncode": 0, "stdout": "", "stderr": ""})())
|
||||
assert cli_mod.main(["daemon", "uninstall", "--yes"]) == 0
|
||||
assert cli_mod.main(["daemon", "uninstall", "--yes"]) == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Help output sanity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_daemon_help_lists_all_subcommands(
|
||||
capsys: pytest.CaptureFixture,
|
||||
) -> None:
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
cli_mod.main(["daemon", "--help"])
|
||||
assert exc_info.value.code == 0
|
||||
out = capsys.readouterr().out
|
||||
for sub in (
|
||||
"install",
|
||||
"uninstall",
|
||||
"start",
|
||||
"stop",
|
||||
"status",
|
||||
"logs",
|
||||
"force-rem",
|
||||
"pause",
|
||||
"resume",
|
||||
"configure",
|
||||
):
|
||||
assert sub in out, f"missing {sub} in daemon --help output"
|
||||
214
tests/test_cli_daemon_install_python_path.py
Normal file
214
tests/test_cli_daemon_install_python_path.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
"""Plan 07.14-02 tests: regression-lock for `iai-mcp daemon install`
|
||||
sys.executable substitution into launchd plist + systemd user unit.
|
||||
|
||||
Locks the contract that `_render_launchd_plist` and `_render_systemd_unit`
|
||||
substitute `sys.executable` in place of the template `/usr/local/bin/python3`
|
||||
and `/usr/bin/python3` placeholders. Without this contract, the daemon
|
||||
runs under whatever `python3` happens to be first on PATH at launchd /
|
||||
systemd invocation, which on macOS is typically the SIP-protected
|
||||
`/usr/local/bin/python3` -- different from the venv Python where iai-mcp
|
||||
and its dependencies live.
|
||||
|
||||
VERIFY result (planner + executor 2026-05-01): production code already
|
||||
does the substitution. `src/iai_mcp/cli.py::_render_launchd_plist`
|
||||
calls `text.replace("/usr/local/bin/python3", sys.executable)`, and
|
||||
`_render_systemd_unit` calls
|
||||
`text.replace("/usr/bin/python3", sys.executable)`. The plist template
|
||||
at `deploy/launchd/com.iai-mcp.daemon.plist` carries
|
||||
`<string>/usr/local/bin/python3</string>` inside `ProgramArguments`, and
|
||||
`deploy/systemd/iai-mcp-daemon.service` carries
|
||||
`ExecStart=/usr/bin/python3 -m iai_mcp.daemon`. Production-code change
|
||||
for this plan is ZERO LINES; this file is a regression lock so a future
|
||||
refactor that hardcodes the path will fail these tests.
|
||||
|
||||
Test 3 (`test_install_warns_when_sys_executable_lacks_psutil`) verified
|
||||
during Sub-step 1: `cmd_daemon_install` (cli.py 268-362) does NOT carry a
|
||||
`subprocess.run([sys.executable, "-c", "import psutil"])` probe today.
|
||||
Per 07.14-CONTEXT.md ("only if gap-driven patch is needed: ... defer
|
||||
adding such a row to a future phase. Do NOT add it speculatively in
|
||||
07.14"), the WARN-on-missing-psutil contract is xfail-marked: the
|
||||
contract is documented for a future plan to enforce, but adding the
|
||||
probe speculatively is out of scope.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_install_args(**kwargs) -> argparse.Namespace:
|
||||
"""Build an argparse.Namespace matching `cmd_daemon_install` args."""
|
||||
defaults = dict(dry_run=True, yes=True)
|
||||
defaults.update(kwargs)
|
||||
return argparse.Namespace(**defaults)
|
||||
|
||||
|
||||
def test_install_uses_sys_executable_macos(monkeypatch):
|
||||
"""`_render_launchd_plist` substitutes `/usr/local/bin/python3` with
|
||||
the absolute path of `sys.executable` of the invoking interpreter.
|
||||
|
||||
Scoping note: we patch `iai_mcp.cli.sys.executable` (NOT global
|
||||
`sys.executable`) so the override is local to the cli module's `sys`
|
||||
reference and does not leak to other modules during pytest collection.
|
||||
"""
|
||||
fake_python = "/path/to/venv/bin/python3"
|
||||
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
|
||||
from iai_mcp.cli import _render_launchd_plist
|
||||
|
||||
rendered = _render_launchd_plist()
|
||||
assert f"<string>{fake_python}</string>" in rendered, (
|
||||
f"plist did not substitute sys.executable; rendered text:\n{rendered[:500]}"
|
||||
)
|
||||
assert "<string>/usr/local/bin/python3</string>" not in rendered, (
|
||||
"plist still contains the unsubstituted /usr/local/bin/python3 placeholder"
|
||||
)
|
||||
|
||||
|
||||
def test_install_uses_sys_executable_linux(monkeypatch):
|
||||
"""`_render_systemd_unit` substitutes `/usr/bin/python3` with
|
||||
`sys.executable`.
|
||||
|
||||
Verifies both that the substituted path appears AND that the original
|
||||
`/usr/bin/python3 -m iai_mcp.daemon` ExecStart line is fully replaced
|
||||
(not just shadowed by an additional line).
|
||||
"""
|
||||
fake_python = "/path/to/venv/bin/python3"
|
||||
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
|
||||
from iai_mcp.cli import _render_systemd_unit
|
||||
|
||||
rendered = _render_systemd_unit()
|
||||
assert f"{fake_python} -m iai_mcp.daemon" in rendered or (
|
||||
f"{fake_python}" in rendered and "iai_mcp.daemon" in rendered
|
||||
), f"systemd unit did not substitute sys.executable; rendered:\n{rendered[:500]}"
|
||||
assert "/usr/bin/python3 -m iai_mcp.daemon" not in rendered, (
|
||||
"systemd unit still contains the unsubstituted /usr/bin/python3 placeholder"
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Test 3 -- xfail per 07.14-CONTEXT.md deferral
|
||||
# ============================================================================
|
||||
# Sub-step 1 verification (executor 2026-05-01): cmd_daemon_install
|
||||
# (src/iai_mcp/cli.py lines 268-362) does NOT contain a
|
||||
# `subprocess.run([sys.executable, "-c", "import psutil"])` probe today.
|
||||
#
|
||||
# Per 07.14-CONTEXT.md "only if gap-driven patch is needed: ...
|
||||
# defer adding such a row to a future phase. Do NOT add it speculatively
|
||||
# in 07.14".
|
||||
#
|
||||
# This xfail documents the contract for a future plan that adds the
|
||||
# probe. If/when the probe lands, the xfail will flip to xpass and the
|
||||
# developer un-marks it. `strict=False` so an xpass does not fail the
|
||||
# suite during the transition.
|
||||
# ============================================================================
|
||||
|
||||
|
||||
# Plan 10.6-01 Task 1.7: plist invariants -----------------------------
|
||||
|
||||
|
||||
def test_plist_keepalive_is_crashed_only(monkeypatch):
|
||||
"""Plist KeepAlive uses {"Crashed": true} only -- NOT SuccessfulExit=false.
|
||||
|
||||
lifecycle model: graceful exit 0 on HIBERNATION must
|
||||
NOT trigger respawn (so the daemon stays dead until wrapper
|
||||
kickstart fires). Crashed=true respawns only on non-zero exit
|
||||
(the LifecycleLockConflict path); SuccessfulExit=false would
|
||||
create a respawn loop because exit 0 is now the steady state.
|
||||
"""
|
||||
fake_python = "/path/to/venv/bin/python3"
|
||||
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
|
||||
from iai_mcp.cli import _render_launchd_plist
|
||||
|
||||
rendered = _render_launchd_plist()
|
||||
# Crashed-only block must be present.
|
||||
assert "<key>Crashed</key>" in rendered
|
||||
# Legacy SuccessfulExit=false must be GONE.
|
||||
assert "<key>SuccessfulExit</key>" not in rendered, (
|
||||
"Phase 10.6 removed SuccessfulExit=false from the plist. Its presence "
|
||||
"would create a respawn loop because exit 0 is now the steady state."
|
||||
)
|
||||
|
||||
|
||||
def test_plist_lifecycle_env_vars_present(monkeypatch):
|
||||
"""The plist defines LIFECYCLE_* + sleep-quarantine env vars.
|
||||
|
||||
cadence knobs become production-tunable via the plist
|
||||
EnvironmentVariables block. Defaults match proposal v2 §3.
|
||||
"""
|
||||
fake_python = "/path/to/venv/bin/python3"
|
||||
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
|
||||
from iai_mcp.cli import _render_launchd_plist
|
||||
|
||||
rendered = _render_launchd_plist()
|
||||
assert "<key>LIFECYCLE_DROWSY_AFTER_SEC</key>" in rendered
|
||||
assert "<key>LIFECYCLE_SLEEP_HEARTBEAT_IDLE_SEC</key>" in rendered
|
||||
assert "<key>LIFECYCLE_HIBERNATE_AFTER_SEC</key>" in rendered
|
||||
assert "<key>IAI_MCP_SLEEP_QUARANTINE_TTL_HOURS</key>" in rendered
|
||||
|
||||
|
||||
def test_plist_legacy_env_vars_removed(monkeypatch):
|
||||
"""Legacy env vars from the RSS-watchdog + idle_watcher era are gone."""
|
||||
fake_python = "/path/to/venv/bin/python3"
|
||||
monkeypatch.setattr("iai_mcp.cli.sys.executable", fake_python)
|
||||
from iai_mcp.cli import _render_launchd_plist
|
||||
|
||||
rendered = _render_launchd_plist()
|
||||
assert "<key>IAI_MCP_RSS_RESTART_THRESHOLD_MB</key>" not in rendered, (
|
||||
"RSS-watchdog removed in Task 1.4; env var must be gone "
|
||||
"from the plist."
|
||||
)
|
||||
assert "<key>IAI_DAEMON_IDLE_SHUTDOWN_SECS</key>" not in rendered
|
||||
assert "<key>IAI_MCP_SKIP_STARTUP_OPTIMIZE</key>" not in rendered
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason=(
|
||||
"psutil-availability probe NOT in cmd_daemon_install today. "
|
||||
"Adding speculatively is deferred per 07.14-CONTEXT.md "
|
||||
'("only if gap-driven patch is needed: ... defer adding such a '
|
||||
'row to a future phase"). This xfail documents the contract for '
|
||||
"a future plan."
|
||||
),
|
||||
strict=False,
|
||||
)
|
||||
def test_install_warns_when_sys_executable_lacks_psutil(
|
||||
monkeypatch, capsys, tmp_path,
|
||||
):
|
||||
"""When the venv-resolved Python lacks `psutil`, install emits a WARN
|
||||
(not FAIL) with a hint to install psutil + re-run.
|
||||
|
||||
NOTE: deferred per CONTEXT.md -- xfail until a future plan adds
|
||||
the psutil-availability probe to `cmd_daemon_install`.
|
||||
"""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("HF_HOME", str(tmp_path / "hf"))
|
||||
|
||||
# Simulate `import psutil` failing under the target Python.
|
||||
real_run = subprocess.run
|
||||
|
||||
def _fake_run(cmd, **kwargs):
|
||||
# Match: subprocess.run([sys.executable, "-c", "import psutil"], ...)
|
||||
if (
|
||||
isinstance(cmd, list)
|
||||
and len(cmd) >= 3
|
||||
and cmd[1] == "-c"
|
||||
and cmd[2] == "import psutil"
|
||||
):
|
||||
raise subprocess.CalledProcessError(returncode=1, cmd=cmd)
|
||||
return real_run(cmd, **kwargs)
|
||||
|
||||
monkeypatch.setattr("subprocess.run", _fake_run)
|
||||
|
||||
from iai_mcp.cli import cmd_daemon_install
|
||||
|
||||
rc = cmd_daemon_install(_make_install_args(dry_run=True, yes=True))
|
||||
err = capsys.readouterr().err
|
||||
# WARN != FAIL: install proceeds (rc == 0) but stderr carries the hint.
|
||||
assert rc == 0, f"install must NOT fail on missing psutil; got rc={rc}"
|
||||
err_lower = err.lower()
|
||||
assert "psutil" in err_lower
|
||||
assert "iai-mcp daemon install" in err_lower
|
||||
assert "re-run" in err_lower
|
||||
111
tests/test_cli_health.py
Normal file
111
tests/test_cli_health.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""Tests for the iai-mcp CLI -- health + migrate commands."""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ----------------------------------------------------------- iai-mcp health
|
||||
|
||||
|
||||
def test_cli_health_no_events(tmp_path, monkeypatch, capsys):
|
||||
"""Fresh store -> 'llm_health: no events recorded'."""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.cli import cmd_health
|
||||
|
||||
args = argparse.Namespace()
|
||||
exit_code = cmd_health(args)
|
||||
out = capsys.readouterr().out
|
||||
assert exit_code == 0
|
||||
assert "no events" in out.lower()
|
||||
|
||||
|
||||
def test_cli_health_reports_last_event(tmp_path, monkeypatch, capsys):
|
||||
"""Seeded llm_health event -> output includes severity + ts rendered in TZ."""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.cli import cmd_health
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore()
|
||||
write_event(
|
||||
store,
|
||||
kind="llm_health",
|
||||
data={"status": "ok"},
|
||||
severity="info",
|
||||
)
|
||||
args = argparse.Namespace()
|
||||
exit_code = cmd_health(args)
|
||||
out = capsys.readouterr().out
|
||||
assert exit_code == 0
|
||||
assert "llm_health" in out
|
||||
# Severity reported.
|
||||
assert "info" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------- iai-mcp migrate
|
||||
|
||||
|
||||
def test_cli_migrate_dry_run(tmp_path, monkeypatch, capsys):
|
||||
"""Seeded v1 records -> dry-run prints 'would migrate N records'."""
|
||||
import argparse
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.cli import cmd_migrate
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord, SCHEMA_VERSION_LEGACY, EMBED_DIM
|
||||
|
||||
store = MemoryStore()
|
||||
for i in range(3):
|
||||
r = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=f"Legacy v1 record number {i} with words to detect.",
|
||||
aaak_index="",
|
||||
embedding=[0.1] * EMBED_DIM,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=datetime.now(timezone.utc),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
tags=[],
|
||||
language="en",
|
||||
schema_version=SCHEMA_VERSION_LEGACY,
|
||||
)
|
||||
# simulate un-tagged legacy by clearing language after construction
|
||||
r.language = ""
|
||||
store.insert(r)
|
||||
|
||||
args = argparse.Namespace(from_=1, to=2, dry_run=True, verbose=False)
|
||||
exit_code = cmd_migrate(args)
|
||||
out = capsys.readouterr().out
|
||||
assert exit_code == 0
|
||||
assert "would migrate" in out.lower()
|
||||
|
||||
# Dry run must not mutate the store: all records still v1.
|
||||
for r in store.all_records():
|
||||
if not r.pinned or r.id == uuid4(): # skip potential L0
|
||||
continue
|
||||
v1_count = sum(1 for r in store.all_records() if r.schema_version == 1)
|
||||
# At least the 3 we inserted must still be v1.
|
||||
assert v1_count >= 3
|
||||
|
||||
|
||||
def test_cli_entrypoint_exists():
|
||||
"""`iai-mcp` entrypoint is registered via pyproject.toml scripts."""
|
||||
from iai_mcp.cli import main
|
||||
|
||||
assert callable(main)
|
||||
422
tests/test_cli_lifecycle_status.py
Normal file
422
tests/test_cli_lifecycle_status.py
Normal file
|
|
@ -0,0 +1,422 @@
|
|||
"""Phase 10.1 Plan 10.1-01 Task 1.5 -- `iai-mcp lifecycle status` CLI tests.
|
||||
|
||||
Covers status output for each of the 4 states, default WAKE when the
|
||||
file is absent, and the formatted lines for sleep_cycle_progress and
|
||||
quarantine.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.lifecycle_state import (
|
||||
LifecycleState,
|
||||
LifecycleStateRecord,
|
||||
save_state,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper -- patch LIFECYCLE_STATE_PATH to a tmp file for each test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _run_status(tmp_path, monkeypatch, capsys, record: LifecycleStateRecord | None):
|
||||
"""Patch the module-level path constant, optionally seed a record,
|
||||
invoke the subcommand directly, return captured stdout.
|
||||
"""
|
||||
target = tmp_path / "lifecycle_state.json"
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.lifecycle_state.LIFECYCLE_STATE_PATH",
|
||||
target,
|
||||
)
|
||||
if record is not None:
|
||||
save_state(record, target)
|
||||
|
||||
# Late import of cmd_lifecycle_status so the monkeypatch above
|
||||
# applies before the function reads LIFECYCLE_STATE_PATH.
|
||||
from iai_mcp.cli import cmd_lifecycle_status
|
||||
|
||||
args = argparse.Namespace()
|
||||
rc = cmd_lifecycle_status(args)
|
||||
out = capsys.readouterr().out
|
||||
return rc, out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Status output for each of the 4 states
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize("state", list(LifecycleState))
|
||||
def test_status_prints_state_label(tmp_path, monkeypatch, capsys, state):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": state.value,
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:11:30+00:00",
|
||||
"wrapper_event_seq": 42,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": True,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert f"state: {state.value}" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Absent file -> default WAKE
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_status_returns_default_wake_when_file_absent(tmp_path, monkeypatch, capsys):
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record=None)
|
||||
assert rc == 0
|
||||
assert "state: WAKE" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Wrapper-event seq + last_activity rendered
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_status_renders_seq_and_last_activity(tmp_path, monkeypatch, capsys):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:11:30+00:00",
|
||||
"wrapper_event_seq": 137,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": True,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert "wrapper_event_seq: 137" in out
|
||||
assert "last_activity: 2026-05-02T15:11:30+00:00" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sleep_cycle_progress rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_status_progress_none_says_none(tmp_path, monkeypatch, capsys):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:00:00+00:00",
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": True,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert "sleep_cycle_progress: none" in out
|
||||
|
||||
|
||||
def test_status_progress_active_renders_step_attempt(tmp_path, monkeypatch, capsys):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "SLEEP",
|
||||
"since_ts": "2026-05-02T03:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T03:00:00+00:00",
|
||||
"wrapper_event_seq": 7,
|
||||
"sleep_cycle_progress": {
|
||||
"last_completed_step": 3,
|
||||
"attempt": 1,
|
||||
"last_error": None,
|
||||
"started_at": "2026-05-02T03:00:00+00:00",
|
||||
},
|
||||
"quarantine": None,
|
||||
"shadow_run": True,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert "step=3" in out
|
||||
assert "attempt=1" in out
|
||||
assert "last_error=none" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Quarantine rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_status_quarantine_none_says_none(tmp_path, monkeypatch, capsys):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:00:00+00:00",
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": True,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert "quarantine: none" in out
|
||||
|
||||
|
||||
def test_status_quarantine_active_renders_until_and_reason(tmp_path, monkeypatch, capsys):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "SLEEP",
|
||||
"since_ts": "2026-05-02T03:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T03:00:00+00:00",
|
||||
"wrapper_event_seq": 1,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": {
|
||||
"until_ts": "2026-05-03T03:00:00+00:00",
|
||||
"reason": "sleep step 4 failed 3x",
|
||||
"since_ts": "2026-05-02T03:00:00+00:00",
|
||||
},
|
||||
"shadow_run": True,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert "until=2026-05-03T03:00:00+00:00" in out
|
||||
assert "reason=sleep step 4 failed 3x" in out
|
||||
assert "since=2026-05-02T03:00:00+00:00" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# shadow_run flag rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_status_shadow_run_true_mentions_legacy_watchdog(tmp_path, monkeypatch, capsys):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:00:00+00:00",
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": True,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert "shadow_run: true" in out
|
||||
assert "Phase 10.6" in out # spec line mentions phase that flips it
|
||||
|
||||
|
||||
def test_status_shadow_run_false(tmp_path, monkeypatch, capsys):
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:00:00+00:00",
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": False,
|
||||
}
|
||||
rc, out = _run_status(tmp_path, monkeypatch, capsys, record)
|
||||
assert rc == 0
|
||||
assert "shadow_run: false" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper formatter sanity
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_format_relative_minutes(tmp_path, monkeypatch):
|
||||
from iai_mcp.cli import _format_relative
|
||||
|
||||
now = datetime(2026, 5, 2, 15, 12, 0, tzinfo=timezone.utc)
|
||||
out = _format_relative("2026-05-02T15:00:00+00:00", now=now)
|
||||
assert out == "12 minutes"
|
||||
|
||||
|
||||
def test_format_relative_hours():
|
||||
from iai_mcp.cli import _format_relative
|
||||
|
||||
now = datetime(2026, 5, 2, 15, 12, 0, tzinfo=timezone.utc)
|
||||
out = _format_relative("2026-05-02T13:12:00+00:00", now=now)
|
||||
assert out == "2 hours"
|
||||
|
||||
|
||||
def test_format_relative_days():
|
||||
from iai_mcp.cli import _format_relative
|
||||
|
||||
now = datetime(2026, 5, 5, 0, 0, 0, tzinfo=timezone.utc)
|
||||
out = _format_relative("2026-05-02T00:00:00+00:00", now=now)
|
||||
assert out == "3 days"
|
||||
|
||||
|
||||
def test_format_relative_singular_minute():
|
||||
from iai_mcp.cli import _format_relative
|
||||
|
||||
now = datetime(2026, 5, 2, 15, 1, 0, tzinfo=timezone.utc)
|
||||
out = _format_relative("2026-05-02T15:00:00+00:00", now=now)
|
||||
assert out == "1 minute"
|
||||
|
||||
|
||||
def test_format_relative_handles_garbage():
|
||||
from iai_mcp.cli import _format_relative
|
||||
|
||||
assert _format_relative("not-a-timestamp") == "unknown"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end: invoke via main([...])
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_cli_main_lifecycle_status_via_main(tmp_path, monkeypatch, capsys):
|
||||
target = tmp_path / "lifecycle_state.json"
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.lifecycle_state.LIFECYCLE_STATE_PATH",
|
||||
target,
|
||||
)
|
||||
record: LifecycleStateRecord = {
|
||||
"current_state": "DROWSY",
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:11:30+00:00",
|
||||
"wrapper_event_seq": 42,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": True,
|
||||
}
|
||||
save_state(record, target)
|
||||
|
||||
from iai_mcp.cli import main
|
||||
|
||||
rc = main(["lifecycle", "status"])
|
||||
out = capsys.readouterr().out
|
||||
assert rc == 0
|
||||
assert "state: DROWSY" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plan 10.6-01 Task 1.2 -- lifecycle force-unlock subcommand
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_force_unlock_with_yes_flag(tmp_path, monkeypatch, capsys):
|
||||
"""``--yes`` skips the prompt and clears a present lockfile."""
|
||||
import json as _json
|
||||
|
||||
from iai_mcp.cli import cmd_lifecycle_force_unlock
|
||||
|
||||
lock_path = tmp_path / ".locked"
|
||||
lock_path.write_text(
|
||||
_json.dumps(
|
||||
{
|
||||
"pid": 4242,
|
||||
"hostname": "stale-host.local",
|
||||
"started_at": "2026-04-29T08:00:00+00:00",
|
||||
"schema_version": 1,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
args = argparse.Namespace(yes=True, lock_path=lock_path)
|
||||
rc = cmd_lifecycle_force_unlock(args)
|
||||
out = capsys.readouterr().out
|
||||
assert rc == 0
|
||||
assert "pid=4242" in out
|
||||
assert "stale-host.local" in out
|
||||
assert "Lockfile removed." in out
|
||||
assert not lock_path.exists()
|
||||
|
||||
|
||||
def test_force_unlock_without_yes_prompts_no_aborts(
|
||||
tmp_path, monkeypatch, capsys,
|
||||
):
|
||||
"""No ``--yes`` flag: prompt is read, "n" aborts with rc=1, file kept."""
|
||||
import json as _json
|
||||
|
||||
from iai_mcp.cli import cmd_lifecycle_force_unlock
|
||||
|
||||
lock_path = tmp_path / ".locked"
|
||||
lock_path.write_text(
|
||||
_json.dumps(
|
||||
{
|
||||
"pid": 4242,
|
||||
"hostname": "stale-host.local",
|
||||
"started_at": "2026-04-29T08:00:00+00:00",
|
||||
"schema_version": 1,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
monkeypatch.setattr("builtins.input", lambda _prompt="": "n")
|
||||
|
||||
args = argparse.Namespace(yes=False, lock_path=lock_path)
|
||||
rc = cmd_lifecycle_force_unlock(args)
|
||||
captured = capsys.readouterr()
|
||||
assert rc == 1
|
||||
assert "cancelled" in captured.err.lower()
|
||||
assert lock_path.exists()
|
||||
|
||||
|
||||
def test_force_unlock_without_yes_prompts_y_succeeds(
|
||||
tmp_path, monkeypatch, capsys,
|
||||
):
|
||||
"""Prompt receives "y" -> file cleared, rc=0."""
|
||||
import json as _json
|
||||
|
||||
from iai_mcp.cli import cmd_lifecycle_force_unlock
|
||||
|
||||
lock_path = tmp_path / ".locked"
|
||||
lock_path.write_text(
|
||||
_json.dumps(
|
||||
{
|
||||
"pid": 4242,
|
||||
"hostname": "stale-host.local",
|
||||
"started_at": "2026-04-29T08:00:00+00:00",
|
||||
"schema_version": 1,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
monkeypatch.setattr("builtins.input", lambda _prompt="": "y")
|
||||
|
||||
args = argparse.Namespace(yes=False, lock_path=lock_path)
|
||||
rc = cmd_lifecycle_force_unlock(args)
|
||||
out = capsys.readouterr().out
|
||||
assert rc == 0
|
||||
assert "Lockfile removed." in out
|
||||
assert not lock_path.exists()
|
||||
|
||||
|
||||
def test_force_unlock_when_no_lockfile(tmp_path, capsys):
|
||||
"""Absent lockfile -> rc=0 with "nothing to unlock" message."""
|
||||
from iai_mcp.cli import cmd_lifecycle_force_unlock
|
||||
|
||||
lock_path = tmp_path / ".locked" # never created
|
||||
args = argparse.Namespace(yes=True, lock_path=lock_path)
|
||||
rc = cmd_lifecycle_force_unlock(args)
|
||||
out = capsys.readouterr().out
|
||||
assert rc == 0
|
||||
assert "nothing to unlock" in out.lower()
|
||||
|
||||
|
||||
def test_cli_main_lifecycle_force_unlock_via_main(
|
||||
tmp_path, monkeypatch, capsys,
|
||||
):
|
||||
"""End-to-end: invoke via ``iai-mcp lifecycle force-unlock --yes``.
|
||||
|
||||
Production path uses ``DEFAULT_LOCK_PATH``; we monkey-patch it so
|
||||
the test does not touch ``~/.iai-mcp/.locked``.
|
||||
"""
|
||||
import json as _json
|
||||
|
||||
lock_path = tmp_path / ".locked"
|
||||
lock_path.write_text(
|
||||
_json.dumps(
|
||||
{
|
||||
"pid": 9999,
|
||||
"hostname": "foreign-host.local",
|
||||
"started_at": "2026-04-30T10:00:00+00:00",
|
||||
"schema_version": 1,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.lifecycle_lock.DEFAULT_LOCK_PATH",
|
||||
lock_path,
|
||||
)
|
||||
|
||||
from iai_mcp.cli import main
|
||||
|
||||
rc = main(["lifecycle", "force-unlock", "--yes"])
|
||||
out = capsys.readouterr().out
|
||||
assert rc == 0
|
||||
assert "Lockfile removed." in out
|
||||
assert not lock_path.exists()
|
||||
345
tests/test_cli_maintenance_compact_records.py
Normal file
345
tests/test_cli_maintenance_compact_records.py
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
"""Plan 07.14-01 tests: `iai-mcp maintenance compact-records`.
|
||||
|
||||
Eight cases:
|
||||
1. test_dry_run_prints_metrics_no_optimize_call
|
||||
2. test_apply_with_yes_runs_optimize
|
||||
3. test_preflight_refuses_when_daemon_alive
|
||||
4. test_preflight_skips_when_daemon_state_missing
|
||||
5. test_record_id_set_invariant_aborts_on_divergence
|
||||
6. test_audit_file_written_on_apply
|
||||
7. test_dry_run_no_audit_file
|
||||
8. test_yes_required_with_apply_in_non_tty
|
||||
|
||||
All tests use mocked `MemoryStore` + mocked `optimize_lance_storage` +
|
||||
mocked `psutil` — zero real LanceDB I/O, zero real embedder load,
|
||||
combined wall-clock target < 5s.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_args(**kwargs) -> argparse.Namespace:
|
||||
"""Build an argparse.Namespace with default flag values, overridable."""
|
||||
defaults = dict(
|
||||
dry_run=False,
|
||||
apply=False,
|
||||
yes=False,
|
||||
store_path=None,
|
||||
)
|
||||
defaults.update(kwargs)
|
||||
return argparse.Namespace(**defaults)
|
||||
|
||||
|
||||
def _patch_psutil_alive(
|
||||
monkeypatch: pytest.MonkeyPatch, *, pid: int, cmdline: list[str],
|
||||
) -> None:
|
||||
"""Make psutil.Process(pid).cmdline() return the given list.
|
||||
|
||||
Mirrors the pattern in tests/test_doctor_checklist.py — we patch
|
||||
sys.modules["psutil"] so the function-scope `import psutil` inside
|
||||
`_maintenance_compact_preflight_daemon_alive` resolves to the mock.
|
||||
"""
|
||||
fake_proc = MagicMock()
|
||||
fake_proc.cmdline.return_value = cmdline
|
||||
fake_psutil = MagicMock()
|
||||
fake_psutil.Process.return_value = fake_proc
|
||||
monkeypatch.setitem(sys.modules, "psutil", fake_psutil)
|
||||
|
||||
|
||||
def _make_optimize_report(
|
||||
*, versions_before: int = 3, versions_after: int = 1,
|
||||
rows_before: int = 0, rows_after: int = 0,
|
||||
) -> dict:
|
||||
"""Construct an optimize_lance_storage-shaped report (3 tables)."""
|
||||
base = {
|
||||
"rows_before": rows_before,
|
||||
"rows_after": rows_after,
|
||||
"versions_before": versions_before,
|
||||
"versions_after": versions_after,
|
||||
"size_bytes_before": 0,
|
||||
"size_bytes_after": 0,
|
||||
"elapsed_sec": 0.0,
|
||||
}
|
||||
return {
|
||||
"records": dict(base),
|
||||
"edges": dict(base, versions_before=0, versions_after=0),
|
||||
"events": dict(base, versions_before=0, versions_after=0),
|
||||
}
|
||||
|
||||
|
||||
def _make_fake_store(record_ids: list[str]) -> MagicMock:
|
||||
"""Construct a MagicMock MemoryStore exposing tbl.count_rows() +
|
||||
tbl.to_pandas(columns=['id']) for the given record-id list.
|
||||
"""
|
||||
fake_store = MagicMock()
|
||||
fake_tbl = MagicMock()
|
||||
fake_tbl.count_rows.return_value = len(record_ids)
|
||||
fake_df = MagicMock()
|
||||
fake_df.__getitem__.return_value.tolist.return_value = list(record_ids)
|
||||
fake_tbl.to_pandas.return_value = fake_df
|
||||
fake_store.db.open_table.return_value = fake_tbl
|
||||
return fake_store
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: HOME-isolated IAI root with records.lance skeleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def iai_root(tmp_path, monkeypatch):
|
||||
"""Sandbox HOME → tmp_path; pre-create
|
||||
`~/.iai-mcp/lancedb/records.lance` skeleton with `_versions/` subdir
|
||||
holding 3 fake manifests so the size/version walk has data to
|
||||
measure.
|
||||
"""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("HF_HOME", str(tmp_path / "hf"))
|
||||
monkeypatch.setenv(
|
||||
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
|
||||
)
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-passphrase")
|
||||
try:
|
||||
import keyring.core
|
||||
keyring.core._keyring_backend = None
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
iai_dir = tmp_path / ".iai-mcp"
|
||||
iai_dir.mkdir()
|
||||
records_lance = iai_dir / "lancedb" / "records.lance"
|
||||
records_lance.mkdir(parents=True)
|
||||
versions_dir = records_lance / "_versions"
|
||||
versions_dir.mkdir()
|
||||
for i in range(3):
|
||||
(versions_dir / f"{i:020d}.manifest").write_bytes(b"x" * 100)
|
||||
# Reload cli to pick up new HOME — STATE_PATH/LOCK_PATH/SOCKET_PATH are
|
||||
# module-scope Path.home() captures.
|
||||
import importlib
|
||||
from iai_mcp import cli as _cli
|
||||
importlib.reload(_cli)
|
||||
yield iai_dir
|
||||
importlib.reload(_cli)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dry_run_prints_metrics_no_optimize_call(iai_root, capsys):
|
||||
"""--dry-run emits metrics-only JSON; mocked optimize never called."""
|
||||
from iai_mcp.cli import cmd_maintenance_compact_records
|
||||
with patch(
|
||||
"iai_mcp.maintenance.optimize_lance_storage"
|
||||
) as mock_opt:
|
||||
rc = cmd_maintenance_compact_records(_make_args(dry_run=True))
|
||||
assert rc == 0
|
||||
out = capsys.readouterr().out
|
||||
payload = json.loads(out)
|
||||
assert payload["mode"] == "dry-run"
|
||||
assert "versions_count" in payload["metrics"]["pre"]
|
||||
assert "size_mb" in payload["metrics"]["pre"]
|
||||
assert "records_count" in payload["metrics"]["pre"]
|
||||
assert payload["metrics"]["post"] is None
|
||||
mock_opt.assert_not_called()
|
||||
|
||||
|
||||
def test_apply_with_yes_runs_optimize(iai_root, monkeypatch, capsys):
|
||||
"""Mocked optimize → `--apply --yes` calls it once with retention=0d."""
|
||||
from iai_mcp import cli as _cli
|
||||
|
||||
fake_store = _make_fake_store(["id1", "id2", "id3", "id4", "id5"])
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
|
||||
)
|
||||
mock_opt = MagicMock(return_value=_make_optimize_report(
|
||||
versions_before=3, versions_after=1,
|
||||
rows_before=5, rows_after=5,
|
||||
))
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.maintenance.optimize_lance_storage", mock_opt,
|
||||
)
|
||||
|
||||
rc = _cli.cmd_maintenance_compact_records(
|
||||
_make_args(apply=True, yes=True),
|
||||
)
|
||||
assert rc == 0
|
||||
assert mock_opt.call_count == 1
|
||||
_, kwargs = mock_opt.call_args
|
||||
assert kwargs["retention"] == timedelta(days=0)
|
||||
|
||||
|
||||
def test_preflight_refuses_when_daemon_alive(iai_root, monkeypatch, capsys):
|
||||
"""If daemon-state.json points at a live `iai_mcp.daemon` process,
|
||||
--apply --yes refuses with rc=1 + 'daemon running' in stderr.
|
||||
"""
|
||||
state_path = iai_root / ".daemon-state.json"
|
||||
state_path.write_text(json.dumps({"daemon_pid": os.getpid()}))
|
||||
_patch_psutil_alive(
|
||||
monkeypatch, pid=os.getpid(),
|
||||
cmdline=["python", "-m", "iai_mcp.daemon"],
|
||||
)
|
||||
# os.kill(os.getpid(), 0) succeeds — process exists.
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_compact_records
|
||||
with patch(
|
||||
"iai_mcp.maintenance.optimize_lance_storage"
|
||||
) as mock_opt:
|
||||
rc = cmd_maintenance_compact_records(
|
||||
_make_args(apply=True, yes=True),
|
||||
)
|
||||
assert rc == 1
|
||||
err = capsys.readouterr().err
|
||||
assert "daemon running" in err
|
||||
mock_opt.assert_not_called()
|
||||
|
||||
|
||||
def test_preflight_skips_when_daemon_state_missing(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""No .daemon-state.json → preflight passes; optimize is called."""
|
||||
assert not (iai_root / ".daemon-state.json").exists()
|
||||
|
||||
fake_store = _make_fake_store([])
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
|
||||
)
|
||||
mock_opt = MagicMock(return_value=_make_optimize_report(
|
||||
versions_before=3, versions_after=1,
|
||||
))
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.maintenance.optimize_lance_storage", mock_opt,
|
||||
)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_compact_records
|
||||
rc = cmd_maintenance_compact_records(
|
||||
_make_args(apply=True, yes=True),
|
||||
)
|
||||
assert rc == 0
|
||||
assert mock_opt.call_count == 1
|
||||
|
||||
|
||||
def test_record_id_set_invariant_aborts_on_divergence(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""Pre id-set has 3 ids; post id-set has 2. Abort + FAILED audit."""
|
||||
fake_store = _make_fake_store(["id1", "id2", "id3"])
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.maintenance.optimize_lance_storage",
|
||||
MagicMock(return_value=_make_optimize_report(
|
||||
versions_before=3, versions_after=1,
|
||||
rows_before=3, rows_after=2,
|
||||
)),
|
||||
)
|
||||
# Patch _maintenance_compact_metrics to return divergent id-sets across
|
||||
# its two invocations (pre, post).
|
||||
pre_set = {"id1", "id2", "id3"}
|
||||
post_set = {"id1", "id2"}
|
||||
metrics_seq = [
|
||||
{
|
||||
"versions_count": 3, "size_mb": 0.0,
|
||||
"records_count": 3, "record_id_set": pre_set,
|
||||
},
|
||||
{
|
||||
"versions_count": 1, "size_mb": 0.0,
|
||||
"records_count": 2, "record_id_set": post_set,
|
||||
},
|
||||
]
|
||||
call_counter = {"n": 0}
|
||||
|
||||
def _stub_metrics(*args, **kwargs):
|
||||
i = call_counter["n"]
|
||||
call_counter["n"] += 1
|
||||
return metrics_seq[min(i, 1)]
|
||||
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.cli._maintenance_compact_metrics", _stub_metrics,
|
||||
)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_compact_records
|
||||
rc = cmd_maintenance_compact_records(
|
||||
_make_args(apply=True, yes=True),
|
||||
)
|
||||
assert rc == 1
|
||||
err = capsys.readouterr().err
|
||||
assert "ABORT" in err
|
||||
assert "divergence" in err
|
||||
|
||||
# FAILED audit file must exist.
|
||||
failed = list(iai_root.glob(".maintenance-compact-FAILED-*.json"))
|
||||
assert len(failed) == 1
|
||||
payload = json.loads(failed[0].read_text())
|
||||
assert payload["status"] == "aborted"
|
||||
assert payload["reason"] == "record_id_set divergence post-optimize"
|
||||
assert payload["missing_ids_count"] == 1
|
||||
|
||||
|
||||
def test_audit_file_written_on_apply(iai_root, monkeypatch, capsys):
|
||||
"""--apply --yes happy path → audit JSON with status=ok + pre/post."""
|
||||
fake_store = _make_fake_store(["id1", "id2"])
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.maintenance.optimize_lance_storage",
|
||||
MagicMock(return_value=_make_optimize_report(
|
||||
versions_before=3, versions_after=1,
|
||||
rows_before=2, rows_after=2,
|
||||
)),
|
||||
)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_compact_records
|
||||
rc = cmd_maintenance_compact_records(
|
||||
_make_args(apply=True, yes=True),
|
||||
)
|
||||
assert rc == 0
|
||||
|
||||
audits = list(iai_root.glob(".maintenance-compact-*.json"))
|
||||
audits = [a for a in audits if "FAILED" not in a.name]
|
||||
assert len(audits) == 1, (
|
||||
f"expected exactly 1 audit file, got {audits}"
|
||||
)
|
||||
payload = json.loads(audits[0].read_text())
|
||||
assert payload["status"] == "ok"
|
||||
assert "metrics_pre" in payload
|
||||
assert "metrics_post" in payload
|
||||
assert "elapsed_sec" in payload
|
||||
|
||||
|
||||
def test_dry_run_no_audit_file(iai_root, capsys):
|
||||
"""--dry-run never writes a `.maintenance-compact-*.json` file."""
|
||||
from iai_mcp.cli import cmd_maintenance_compact_records
|
||||
rc = cmd_maintenance_compact_records(_make_args(dry_run=True))
|
||||
assert rc == 0
|
||||
audits = list(iai_root.glob(".maintenance-compact-*.json"))
|
||||
assert audits == []
|
||||
|
||||
|
||||
def test_yes_required_with_apply_in_non_tty(iai_root, monkeypatch, capsys):
|
||||
"""--apply on non-tty without --yes → exit 2, friendly hint."""
|
||||
monkeypatch.setattr("sys.stdin.isatty", lambda: False)
|
||||
from iai_mcp.cli import cmd_maintenance_compact_records
|
||||
rc = cmd_maintenance_compact_records(
|
||||
_make_args(apply=True, yes=False),
|
||||
)
|
||||
assert rc == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "requires --yes" in err
|
||||
344
tests/test_cli_maintenance_sleep_cycle.py
Normal file
344
tests/test_cli_maintenance_sleep_cycle.py
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
"""Phase 10.3 Plan 10.3-01 Task 1.5 -- CLI maintenance sleep-cycle tests.
|
||||
|
||||
Eight cases:
|
||||
1. test_happy_path_runs_pipeline_and_prints_progress
|
||||
2. test_quarantined_without_force_returns_nonzero_with_message
|
||||
3. test_force_runs_pipeline_when_quarantined
|
||||
4. test_reset_quarantine_clears_then_runs
|
||||
5. test_reset_quarantine_when_not_quarantined_no_op
|
||||
6. test_failure_returns_nonzero_with_error_in_stderr
|
||||
7. test_failure_after_3rd_strike_prints_quarantine_hint
|
||||
8. test_subparser_exposes_sleep_cycle_with_flags
|
||||
|
||||
All tests use stub `MemoryStore` + monkeypatched SleepPipeline methods —
|
||||
no real LanceDB I/O.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.lifecycle_state import (
|
||||
default_state,
|
||||
load_state,
|
||||
save_state,
|
||||
)
|
||||
from iai_mcp.sleep_pipeline import SleepStep
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_args(**kwargs) -> argparse.Namespace:
|
||||
"""Construct argparse.Namespace with sleep-cycle defaults."""
|
||||
defaults = dict(
|
||||
force=False,
|
||||
reset_quarantine=False,
|
||||
store_path=None,
|
||||
)
|
||||
defaults.update(kwargs)
|
||||
return argparse.Namespace(**defaults)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def iai_root(tmp_path, monkeypatch):
|
||||
"""Sandbox HOME so LIFECYCLE_STATE_PATH points inside tmp_path."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("HF_HOME", str(tmp_path / "hf"))
|
||||
monkeypatch.setenv(
|
||||
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
|
||||
)
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-passphrase")
|
||||
iai_dir = tmp_path / ".iai-mcp"
|
||||
iai_dir.mkdir()
|
||||
# Reload modules so they pick up the new HOME — LIFECYCLE_STATE_PATH
|
||||
# and STATE_PATH are module-scope captures.
|
||||
import importlib
|
||||
from iai_mcp import lifecycle_state as _ls
|
||||
from iai_mcp import cli as _cli
|
||||
importlib.reload(_ls)
|
||||
importlib.reload(_cli)
|
||||
yield iai_dir
|
||||
importlib.reload(_ls)
|
||||
importlib.reload(_cli)
|
||||
|
||||
|
||||
def _patch_store_open(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
|
||||
"""Replace MemoryStore() with a MagicMock so the CLI can construct
|
||||
a 'store' without touching real LanceDB / embedder.
|
||||
"""
|
||||
fake_store = MagicMock()
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.store.MemoryStore", lambda path=None, **kw: fake_store,
|
||||
)
|
||||
return fake_store
|
||||
|
||||
|
||||
def _patch_pipeline_steps_to_noop(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Replace every _step_* method on SleepPipeline with a no-op so the
|
||||
real pipeline executes without doing real LanceDB work.
|
||||
"""
|
||||
from iai_mcp.sleep_pipeline import SleepPipeline
|
||||
|
||||
for step, method_name in [
|
||||
(SleepStep.SCHEMA_MINE, "_step_schema_mine"),
|
||||
(SleepStep.KNOB_TUNE, "_step_knob_tune"),
|
||||
(SleepStep.DREAM_DECAY, "_step_dream_decay"),
|
||||
(SleepStep.OPTIMIZE_LANCE, "_step_optimize_lance"),
|
||||
(SleepStep.COMPACT_RECORDS, "_step_compact_records"),
|
||||
]:
|
||||
def _make_noop(s=step):
|
||||
def _impl(self, _interrupt_check):
|
||||
return True, {}
|
||||
return _impl
|
||||
|
||||
monkeypatch.setattr(
|
||||
SleepPipeline, method_name, _make_noop(),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_happy_path_runs_pipeline_and_prints_progress(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""sleep-cycle with no flags + no quarantine -> exit 0, 5 step lines."""
|
||||
_patch_store_open(monkeypatch)
|
||||
_patch_pipeline_steps_to_noop(monkeypatch)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args())
|
||||
assert rc == 0
|
||||
out = capsys.readouterr().out
|
||||
assert "Sleep cycle started." in out
|
||||
assert "[1/5] schema_mine" in out
|
||||
assert "[2/5] knob_tune" in out
|
||||
assert "[3/5] dream_decay" in out
|
||||
assert "[4/5] optimize_lance" in out
|
||||
assert "[5/5] compact_records" in out
|
||||
assert "Sleep cycle complete" in out
|
||||
|
||||
|
||||
def test_quarantined_without_force_returns_nonzero_with_message(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""Active quarantine + no --force -> exit 1, hint in stderr."""
|
||||
_patch_store_open(monkeypatch)
|
||||
# Seed an active quarantine in the lifecycle_state.json that the
|
||||
# reloaded module now points at.
|
||||
from iai_mcp.lifecycle_state import LIFECYCLE_STATE_PATH
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
record = default_state()
|
||||
record["quarantine"] = {
|
||||
"until_ts": (now + timedelta(hours=12)).isoformat(),
|
||||
"reason": "test stuck",
|
||||
"since_ts": now.isoformat(),
|
||||
}
|
||||
save_state(record, LIFECYCLE_STATE_PATH)
|
||||
|
||||
_patch_pipeline_steps_to_noop(monkeypatch)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args())
|
||||
assert rc == 1
|
||||
captured = capsys.readouterr()
|
||||
assert "quarantined" in captured.err.lower()
|
||||
assert "test stuck" in captured.err
|
||||
assert "--force" in captured.err
|
||||
assert "--reset-quarantine" in captured.err
|
||||
|
||||
|
||||
def test_force_runs_pipeline_when_quarantined(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""--force bypasses quarantine and runs all 5 steps."""
|
||||
_patch_store_open(monkeypatch)
|
||||
from iai_mcp.lifecycle_state import LIFECYCLE_STATE_PATH
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
record = default_state()
|
||||
record["quarantine"] = {
|
||||
"until_ts": (now + timedelta(hours=12)).isoformat(),
|
||||
"reason": "test stuck",
|
||||
"since_ts": now.isoformat(),
|
||||
}
|
||||
save_state(record, LIFECYCLE_STATE_PATH)
|
||||
|
||||
_patch_pipeline_steps_to_noop(monkeypatch)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args(force=True))
|
||||
assert rc == 0
|
||||
out = capsys.readouterr().out
|
||||
assert "[5/5] compact_records" in out
|
||||
assert "Sleep cycle complete" in out
|
||||
|
||||
# force_run leaves quarantine record alone.
|
||||
record_after = load_state(LIFECYCLE_STATE_PATH)
|
||||
assert record_after["quarantine"] is not None
|
||||
|
||||
|
||||
def test_reset_quarantine_clears_then_runs(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""--reset-quarantine wipes quarantine first, then runs normally."""
|
||||
_patch_store_open(monkeypatch)
|
||||
from iai_mcp.lifecycle_state import LIFECYCLE_STATE_PATH
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
record = default_state()
|
||||
record["quarantine"] = {
|
||||
"until_ts": (now + timedelta(hours=12)).isoformat(),
|
||||
"reason": "stuck",
|
||||
"since_ts": now.isoformat(),
|
||||
}
|
||||
save_state(record, LIFECYCLE_STATE_PATH)
|
||||
|
||||
_patch_pipeline_steps_to_noop(monkeypatch)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args(reset_quarantine=True))
|
||||
assert rc == 0
|
||||
out = capsys.readouterr().out
|
||||
assert "Quarantine cleared." in out
|
||||
assert "Sleep cycle complete" in out
|
||||
|
||||
record_after = load_state(LIFECYCLE_STATE_PATH)
|
||||
assert record_after["quarantine"] is None
|
||||
|
||||
|
||||
def test_reset_quarantine_when_not_quarantined_no_op(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""--reset-quarantine when no quarantine -> friendly no-op message."""
|
||||
_patch_store_open(monkeypatch)
|
||||
_patch_pipeline_steps_to_noop(monkeypatch)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args(reset_quarantine=True))
|
||||
assert rc == 0
|
||||
out = capsys.readouterr().out
|
||||
assert "Quarantine not active" in out
|
||||
assert "Sleep cycle complete" in out
|
||||
|
||||
|
||||
def test_failure_returns_nonzero_with_error_in_stderr(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""A step exception -> exit 1, FAILED line in stderr."""
|
||||
_patch_store_open(monkeypatch)
|
||||
_patch_pipeline_steps_to_noop(monkeypatch)
|
||||
|
||||
# Patch one specific step to raise.
|
||||
from iai_mcp.sleep_pipeline import SleepPipeline
|
||||
|
||||
def _raiser(self, _interrupt_check):
|
||||
raise RuntimeError("synthetic optimize failure")
|
||||
|
||||
monkeypatch.setattr(
|
||||
SleepPipeline, "_step_optimize_lance", _raiser,
|
||||
)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args())
|
||||
assert rc == 1
|
||||
captured = capsys.readouterr()
|
||||
# First 3 steps printed to stdout (completed_steps), then FAILED on stderr.
|
||||
assert "[1/5] schema_mine" in captured.out
|
||||
assert "[2/5] knob_tune" in captured.out
|
||||
assert "[3/5] dream_decay" in captured.out
|
||||
assert "[4/5] optimize_lance ... FAILED" in captured.err
|
||||
assert "synthetic optimize failure" in captured.err
|
||||
|
||||
|
||||
def test_failure_after_3rd_strike_prints_quarantine_hint(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""3rd consecutive same-step failure -> exit 1 + quarantine hint."""
|
||||
_patch_store_open(monkeypatch)
|
||||
_patch_pipeline_steps_to_noop(monkeypatch)
|
||||
|
||||
from iai_mcp.sleep_pipeline import SleepPipeline
|
||||
|
||||
def _raiser(self, _interrupt_check):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr(SleepPipeline, "_step_dream_decay", _raiser)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
cmd_maintenance_sleep_cycle(_make_args()) # attempt=1
|
||||
cmd_maintenance_sleep_cycle(_make_args()) # attempt=2
|
||||
capsys.readouterr() # discard accumulated output
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args()) # attempt=3 -> quarantine
|
||||
assert rc == 1
|
||||
captured = capsys.readouterr()
|
||||
assert "FAILED" in captured.err
|
||||
assert "quarantined for 24h" in captured.err
|
||||
assert "--reset-quarantine" in captured.err
|
||||
|
||||
|
||||
def test_subparser_exposes_sleep_cycle_with_flags():
|
||||
"""`iai-mcp maintenance sleep-cycle --force --reset-quarantine` parses."""
|
||||
from iai_mcp.cli import _build_parser
|
||||
|
||||
parser = _build_parser()
|
||||
args = parser.parse_args([
|
||||
"maintenance", "sleep-cycle",
|
||||
"--force", "--reset-quarantine",
|
||||
])
|
||||
assert args.force is True
|
||||
assert args.reset_quarantine is True
|
||||
# Defaults for store-path.
|
||||
assert args.store_path is None
|
||||
assert args.maintenance_cmd == "sleep-cycle"
|
||||
|
||||
|
||||
def test_subparser_defaults_force_false_reset_false():
|
||||
"""Default flag values: both False."""
|
||||
from iai_mcp.cli import _build_parser
|
||||
|
||||
parser = _build_parser()
|
||||
args = parser.parse_args(["maintenance", "sleep-cycle"])
|
||||
assert args.force is False
|
||||
assert args.reset_quarantine is False
|
||||
|
||||
|
||||
def test_store_open_failure_returns_2(
|
||||
iai_root, monkeypatch, capsys,
|
||||
):
|
||||
"""MemoryStore() raising -> CLI exits 2 with stderr message."""
|
||||
|
||||
def _broken_store(path=None, **kw):
|
||||
raise RuntimeError("disk full")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.store.MemoryStore", _broken_store,
|
||||
)
|
||||
|
||||
from iai_mcp.cli import cmd_maintenance_sleep_cycle
|
||||
|
||||
rc = cmd_maintenance_sleep_cycle(_make_args())
|
||||
assert rc == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "could not open MemoryStore" in err
|
||||
assert "disk full" in err
|
||||
63
tests/test_cli_topology.py
Normal file
63
tests/test_cli_topology.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
"""Plan 03-02 CONN-07 RED: iai-mcp topology CLI.
|
||||
|
||||
The `topology` subcommand prints one key:value line per metric:
|
||||
C: <float>
|
||||
L: <float>
|
||||
sigma: <float | "insufficient_data">
|
||||
communities: <int>
|
||||
rich_club_ratio: <float>
|
||||
N: <int>
|
||||
regime: <str>
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.cli import main as cli_main
|
||||
|
||||
|
||||
def test_topology_subcommand_registered():
|
||||
"""`iai-mcp topology --help` must succeed (subparser registered)."""
|
||||
with pytest.raises(SystemExit) as ex:
|
||||
cli_main(["topology", "--help"])
|
||||
# argparse --help calls sys.exit(0) on success
|
||||
assert ex.value.code == 0
|
||||
|
||||
|
||||
def test_topology_prints_required_keys(tmp_path, capsys, monkeypatch):
|
||||
"""All seven key:value lines must appear in output."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
code = cli_main(["topology"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
|
||||
assert re.search(r"^C:\s", out, re.MULTILINE), f"missing 'C: ' line in {out!r}"
|
||||
assert re.search(r"^L:\s", out, re.MULTILINE), f"missing 'L: ' line in {out!r}"
|
||||
assert re.search(r"^sigma:\s", out, re.MULTILINE), (
|
||||
f"missing 'sigma: ' line in {out!r}"
|
||||
)
|
||||
assert re.search(r"^communities:\s", out, re.MULTILINE), (
|
||||
f"missing 'communities: ' line in {out!r}"
|
||||
)
|
||||
assert re.search(r"^rich_club_ratio:\s", out, re.MULTILINE), (
|
||||
f"missing 'rich_club_ratio: ' line in {out!r}"
|
||||
)
|
||||
assert re.search(r"^N:\s", out, re.MULTILINE), f"missing 'N: ' line in {out!r}"
|
||||
assert re.search(r"^regime:\s", out, re.MULTILINE), (
|
||||
f"missing 'regime: ' line in {out!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_topology_empty_store_prints_insufficient_data(tmp_path, capsys, monkeypatch):
|
||||
"""Fresh store: N is small, sigma should print as 'insufficient_data'."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
code = cli_main(["topology"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# On an empty store, sigma must be "insufficient_data" or the regime is
|
||||
# "insufficient_data" -- either way, the line must contain the marker.
|
||||
assert "insufficient_data" in out, (
|
||||
f"empty store must surface insufficient_data; got {out!r}"
|
||||
)
|
||||
77
tests/test_cli_trajectory.py
Normal file
77
tests/test_cli_trajectory.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
"""Tests for iai-mcp trajectory CLI.
|
||||
|
||||
The `trajectory` subcommand aggregates M1..M6 events via
|
||||
trajectory.aggregate_trajectory and prints one summary line per metric.
|
||||
Supports --since WEEKS to scope history.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.cli import main as cli_main
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
def test_trajectory_empty_output(tmp_path, capsys, monkeypatch):
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
# No trajectory data recorded yet.
|
||||
code = cli_main(["trajectory"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
assert "no trajectory data" in out.lower() or "no data" in out.lower()
|
||||
|
||||
|
||||
def test_trajectory_renders_m1_to_m6(tmp_path, capsys, monkeypatch):
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
# Seed one event for each metric.
|
||||
for i, m in enumerate(["m1", "m2", "m3", "m4", "m5", "m6"]):
|
||||
write_event(
|
||||
store,
|
||||
kind="trajectory_metric",
|
||||
data={"metric": m, "value": float(i + 1)},
|
||||
severity="info",
|
||||
session_id="s1",
|
||||
)
|
||||
code = cli_main(["trajectory"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# Every metric mentioned (M1 ... M6 uppercase).
|
||||
for m in ("M1", "M2", "M3", "M4", "M5", "M6"):
|
||||
assert m in out
|
||||
|
||||
|
||||
def test_trajectory_since_weeks_flag(tmp_path, capsys, monkeypatch):
|
||||
"""--since=N accepts the flag without crashing. (Filter behaviour is
|
||||
tested at the trajectory.aggregate_trajectory level; the CLI contract
|
||||
here is: flag is recognised and 0 on success.)"""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(
|
||||
store, kind="trajectory_metric",
|
||||
data={"metric": "m1", "value": 1.0},
|
||||
severity="info", session_id="s1",
|
||||
)
|
||||
code = cli_main(["trajectory", "--since=2"])
|
||||
assert code == 0
|
||||
|
||||
|
||||
def test_trajectory_prints_aggregate_stats(tmp_path, capsys, monkeypatch):
|
||||
"""Output for a populated M1 mentions min/max/mean."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
store = MemoryStore(path=tmp_path)
|
||||
for v in (1.0, 2.0, 3.0):
|
||||
write_event(
|
||||
store, kind="trajectory_metric",
|
||||
data={"metric": "m1", "value": v},
|
||||
severity="info", session_id="s1",
|
||||
)
|
||||
code = cli_main(["trajectory"])
|
||||
assert code == 0
|
||||
out = capsys.readouterr().out
|
||||
# Some aggregate indicator visible.
|
||||
assert "mean" in out.lower() or "avg" in out.lower() or "=" in out
|
||||
155
tests/test_community.py
Normal file
155
tests/test_community.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
"""Tests for iai_mcp.community (D-05 bootstrap, stable UUIDs, CONN-01/04)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
from uuid import uuid4
|
||||
|
||||
from iai_mcp.community import (
|
||||
CommunityAssignment,
|
||||
MAX_TOP_COMMUNITIES,
|
||||
MID_N_LEIDEN,
|
||||
MODULARITY_FLOOR,
|
||||
REFRESH_DELTA,
|
||||
SMALL_N_FLAT,
|
||||
UUID_ROTATE_COSINE,
|
||||
detect_communities,
|
||||
needs_refresh,
|
||||
)
|
||||
from iai_mcp.graph import MemoryGraph
|
||||
|
||||
|
||||
def _random_emb(seed: int) -> list[float]:
|
||||
rng = random.Random(seed)
|
||||
return [rng.random() for _ in range(384)]
|
||||
|
||||
|
||||
def test_small_n_flat_single_community() -> None:
|
||||
"""N < SMALL_N_FLAT -> flat, single community."""
|
||||
g = MemoryGraph()
|
||||
for i in range(50):
|
||||
g.add_node(uuid4(), community_id=None, embedding=_random_emb(i))
|
||||
a = detect_communities(g, prior=None)
|
||||
assert a.backend == "flat"
|
||||
assert len(set(a.node_to_community.values())) == 1
|
||||
assert a.modularity == 0.0
|
||||
|
||||
|
||||
def test_two_cliques_produce_multiple_communities() -> None:
|
||||
"""2 dense cliques of 150 nodes -> N=300, Leiden should find Q >= 0.2."""
|
||||
g = MemoryGraph()
|
||||
clique_a = [uuid4() for _ in range(150)]
|
||||
clique_b = [uuid4() for _ in range(150)]
|
||||
for i, n in enumerate(clique_a):
|
||||
g.add_node(n, community_id=None, embedding=_random_emb(i))
|
||||
for i, n in enumerate(clique_b):
|
||||
g.add_node(n, community_id=None, embedding=_random_emb(10_000 + i))
|
||||
for i in range(150):
|
||||
for j in range(i + 1, 150):
|
||||
g.add_edge(clique_a[i], clique_a[j])
|
||||
g.add_edge(clique_b[i], clique_b[j])
|
||||
a = detect_communities(g, prior=None)
|
||||
assert a.backend.startswith("leiden")
|
||||
assert a.modularity >= MODULARITY_FLOOR
|
||||
assert len(set(a.node_to_community.values())) >= 2
|
||||
|
||||
|
||||
def test_stable_uuids_on_identical_rerun() -> None:
|
||||
"""identical graphs rerun with prior -> zero UUID churn."""
|
||||
g = MemoryGraph()
|
||||
clique_a = [uuid4() for _ in range(150)]
|
||||
clique_b = [uuid4() for _ in range(150)]
|
||||
for i, n in enumerate(clique_a):
|
||||
g.add_node(n, community_id=None, embedding=_random_emb(i))
|
||||
for i, n in enumerate(clique_b):
|
||||
g.add_node(n, community_id=None, embedding=_random_emb(10_000 + i))
|
||||
for i in range(150):
|
||||
for j in range(i + 1, 150):
|
||||
g.add_edge(clique_a[i], clique_a[j])
|
||||
g.add_edge(clique_b[i], clique_b[j])
|
||||
first = detect_communities(g, prior=None)
|
||||
second = detect_communities(g, prior=first)
|
||||
for node, comm_first in first.node_to_community.items():
|
||||
assert second.node_to_community[node] == comm_first
|
||||
|
||||
|
||||
def test_top_communities_capped_at_seven() -> None:
|
||||
"""CONN-01: MAX_TOP_COMMUNITIES = 7 enforced on level 1 output."""
|
||||
g = MemoryGraph()
|
||||
for i in range(SMALL_N_FLAT + 10):
|
||||
g.add_node(uuid4(), community_id=None, embedding=_random_emb(i))
|
||||
nodes = list(g._nx.nodes())
|
||||
for k in range(0, len(nodes) - 1, 20):
|
||||
for j in range(k, min(k + 20, len(nodes) - 1)):
|
||||
from uuid import UUID as _UUID
|
||||
g.add_edge(_UUID(nodes[j]), _UUID(nodes[j + 1]))
|
||||
a = detect_communities(g, prior=None)
|
||||
assert len(a.top_communities) <= MAX_TOP_COMMUNITIES
|
||||
|
||||
|
||||
def test_mid_regions_exposes_community_members() -> None:
|
||||
"""CONN-01 level 2: mid_regions maps community UUID -> member UUIDs."""
|
||||
g = MemoryGraph()
|
||||
nodes = [uuid4() for _ in range(50)]
|
||||
for i, n in enumerate(nodes):
|
||||
g.add_node(n, community_id=None, embedding=_random_emb(i))
|
||||
a = detect_communities(g, prior=None)
|
||||
total_members = sum(len(members) for members in a.mid_regions.values())
|
||||
assert total_members == 50
|
||||
|
||||
|
||||
def test_needs_refresh_threshold() -> None:
|
||||
"""CONN-04: |Δ Q| > 0.05 -> refresh, else stable."""
|
||||
prior = CommunityAssignment(modularity=0.30)
|
||||
assert needs_refresh(prior, 0.36) is True # Δ = 0.06 > 0.05
|
||||
assert needs_refresh(prior, 0.31) is False # Δ = 0.01 < 0.05
|
||||
assert needs_refresh(prior, 0.24) is True # Δ = 0.06 > 0.05 (negative side)
|
||||
# Boundary: Δ == 0.05 is NOT > 0.05 -> False (strict inequality).
|
||||
assert needs_refresh(prior, 0.35) is False
|
||||
|
||||
|
||||
def test_empty_graph_returns_empty_assignment() -> None:
|
||||
g = MemoryGraph()
|
||||
a = detect_communities(g, prior=None)
|
||||
assert a.backend == "flat"
|
||||
assert a.node_to_community == {}
|
||||
assert a.community_centroids == {}
|
||||
|
||||
|
||||
def test_constants_exposed() -> None:
|
||||
"""Named constants are importable (verifies the grep acceptance criteria)."""
|
||||
assert SMALL_N_FLAT == 200
|
||||
assert MID_N_LEIDEN == 500
|
||||
assert MODULARITY_FLOOR == 0.2
|
||||
assert REFRESH_DELTA == 0.05
|
||||
assert UUID_ROTATE_COSINE == 0.7
|
||||
assert MAX_TOP_COMMUNITIES == 7
|
||||
|
||||
|
||||
def test_mid_n_non_modular_falls_back_to_flat() -> None:
|
||||
"""SMALL_N_FLAT <= N < MID_N_LEIDEN with Q < 0.2 -> flat fallback."""
|
||||
g = MemoryGraph()
|
||||
# 250 nodes fully connected -> a clique, Leiden will produce Q ~ 0.0
|
||||
nodes = [uuid4() for _ in range(250)]
|
||||
for i, n in enumerate(nodes):
|
||||
g.add_node(n, community_id=None, embedding=_random_emb(i))
|
||||
for i in range(250):
|
||||
for j in range(i + 1, 250):
|
||||
g.add_edge(nodes[i], nodes[j])
|
||||
a = detect_communities(g, prior=None)
|
||||
# Fully-connected graph has no community structure -> fall back to flat.
|
||||
assert a.backend == "flat"
|
||||
|
||||
|
||||
def test_mid_regions_count_matches_community_count() -> None:
|
||||
"""mid_regions has exactly one entry per distinct community."""
|
||||
g = MemoryGraph()
|
||||
clique_a = [uuid4() for _ in range(150)]
|
||||
clique_b = [uuid4() for _ in range(150)]
|
||||
for i, n in enumerate(clique_a + clique_b):
|
||||
g.add_node(n, community_id=None, embedding=_random_emb(i))
|
||||
for i in range(150):
|
||||
for j in range(i + 1, 150):
|
||||
g.add_edge(clique_a[i], clique_a[j])
|
||||
g.add_edge(clique_b[i], clique_b[j])
|
||||
a = detect_communities(g, prior=None)
|
||||
assert len(a.mid_regions) == len(set(a.node_to_community.values()))
|
||||
163
tests/test_compress_llmlingua.py
Normal file
163
tests/test_compress_llmlingua.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
"""Tests for TOK-04 LLMLingua-2 compression (Plan 02-04 Task 2, D-25).
|
||||
|
||||
Scope (constitutional):
|
||||
- ALLOWED: L2 community descriptors, session summaries, cls_summary records.
|
||||
- FORBIDDEN: literal_surface of normal records, pinned, invariant_anchor,
|
||||
user-tagged 'raw' records.
|
||||
- Passthrough when llmlingua package not installed (local-only stays green).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def _rec(
|
||||
*,
|
||||
text: str = "lorem ipsum dolor sit amet consectetur adipiscing elit",
|
||||
tags: list[str] | None = None,
|
||||
pinned: bool = False,
|
||||
detail_level: int = 2,
|
||||
s5_trust_score: float = 0.5,
|
||||
language: str = "en",
|
||||
) -> MemoryRecord:
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=detail_level,
|
||||
pinned=pinned,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=list(tags or []),
|
||||
language=language,
|
||||
s5_trust_score=s5_trust_score,
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------- is_compressible
|
||||
|
||||
|
||||
def test_is_compressible_rejects_pinned():
|
||||
from iai_mcp.compress import is_compressible
|
||||
|
||||
r = _rec(pinned=True)
|
||||
ok, reason = is_compressible(r)
|
||||
assert ok is False
|
||||
assert "pinned" in reason.lower()
|
||||
|
||||
|
||||
def test_is_compressible_rejects_raw_tagged():
|
||||
from iai_mcp.compress import is_compressible
|
||||
|
||||
r = _rec(tags=["raw:ru", "project:iai-mcp"])
|
||||
ok, reason = is_compressible(r)
|
||||
assert ok is False
|
||||
assert "raw" in reason.lower()
|
||||
|
||||
|
||||
def test_is_compressible_rejects_invariant_anchor():
|
||||
from iai_mcp.compress import is_compressible
|
||||
|
||||
r = _rec(s5_trust_score=0.95)
|
||||
ok, reason = is_compressible(r)
|
||||
assert ok is False
|
||||
assert "invariant" in reason.lower() or "trust" in reason.lower()
|
||||
|
||||
|
||||
def test_is_compressible_allows_cls_summary():
|
||||
from iai_mcp.compress import is_compressible
|
||||
|
||||
r = _rec(tags=["semantic", "cls_summary"])
|
||||
ok, _reason = is_compressible(r)
|
||||
assert ok is True
|
||||
|
||||
|
||||
def test_is_compressible_allows_schema():
|
||||
from iai_mcp.compress import is_compressible
|
||||
|
||||
r = _rec(tags=["schema", "auto"])
|
||||
ok, _reason = is_compressible(r)
|
||||
assert ok is True
|
||||
|
||||
|
||||
def test_is_compressible_rejects_normal_record_by_default():
|
||||
"""D-25 literal_surface constitutional: default is reject unless explicitly allowed."""
|
||||
from iai_mcp.compress import is_compressible
|
||||
|
||||
r = _rec(tags=["project:iai-mcp"])
|
||||
ok, reason = is_compressible(r)
|
||||
assert ok is False
|
||||
assert "literal_surface" in reason.lower() or "constitutional" in reason.lower()
|
||||
|
||||
|
||||
# --------------------------------------------------------------- compress_llmlingua2
|
||||
|
||||
|
||||
def test_compress_llmlingua2_passes_through_when_pkg_absent(tmp_path, monkeypatch):
|
||||
"""On ImportError, fall back to passthrough + log llm_health event."""
|
||||
from iai_mcp import compress as compress_mod
|
||||
|
||||
# Force the import path to fail.
|
||||
monkeypatch.setattr(compress_mod, "_load_llmlingua2", lambda: None)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
text = "this is a long text that would normally be compressed"
|
||||
out = compress_mod.compress_llmlingua2(text, target_ratio=0.5, store=store)
|
||||
assert out == text # passthrough
|
||||
|
||||
|
||||
def test_compress_llmlingua2_logs_fallback_event(tmp_path, monkeypatch):
|
||||
from iai_mcp import compress as compress_mod
|
||||
|
||||
monkeypatch.setattr(compress_mod, "_load_llmlingua2", lambda: None)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
compress_mod.compress_llmlingua2("text", target_ratio=0.5, store=store)
|
||||
events = query_events(store, kind="llm_health")
|
||||
fallback_events = [e for e in events if e["data"].get("component") == "compress_llmlingua2"]
|
||||
assert len(fallback_events) >= 1
|
||||
|
||||
|
||||
# --------------------------------------------------------------- wrappers
|
||||
|
||||
|
||||
def test_compress_l2_descriptor_uses_l2_target_ratio():
|
||||
from iai_mcp.compress import COMPRESSION_TARGET_L2, compress_l2_descriptor
|
||||
|
||||
# Passthrough when pkg absent -- just check the function is callable.
|
||||
out = compress_l2_descriptor("community summary line")
|
||||
assert isinstance(out, str)
|
||||
assert COMPRESSION_TARGET_L2 == 0.5
|
||||
|
||||
|
||||
def test_compress_summary_uses_summary_target_ratio():
|
||||
from iai_mcp.compress import COMPRESSION_TARGET_SUMMARY, compress_summary
|
||||
|
||||
out = compress_summary("cluster summary line")
|
||||
assert isinstance(out, str)
|
||||
assert COMPRESSION_TARGET_SUMMARY == 0.3
|
||||
|
||||
|
||||
def test_compress_module_constants():
|
||||
from iai_mcp.compress import COMPRESSION_TARGET_L2, COMPRESSION_TARGET_SUMMARY
|
||||
|
||||
assert COMPRESSION_TARGET_L2 == 0.5
|
||||
assert COMPRESSION_TARGET_SUMMARY == 0.3
|
||||
543
tests/test_concurrency.py
Normal file
543
tests/test_concurrency.py
Normal file
|
|
@ -0,0 +1,543 @@
|
|||
"""Tests for iai_mcp.concurrency -- Task 1.
|
||||
|
||||
Covers 10 behaviours from the plan:
|
||||
1. acquire_shared + try_acquire_exclusive blocking semantics.
|
||||
2. Exclusive-then-exclusive: second blocks.
|
||||
3. flock fd-close safety (Pitfall 2): closing /etc/passwd doesn't release lock.
|
||||
4. Multi-MCP: 2 and 3 shared holders keep daemon blocked.
|
||||
5. SIGKILL releases lock automatically (kernel).
|
||||
6. Unix socket NDJSON status round-trip.
|
||||
7. Unix socket dispatcher receives exact dict for pause/force_rem/tail_logs.
|
||||
8. Stale socket cleanup (Pitfall 10) lets server bind without EADDRINUSE.
|
||||
9. Lock file + socket file mode 0o600.
|
||||
10. holds_exclusive_nb -- cooperative-yield probe; returns False when
|
||||
contended and never propagates BlockingIOError / EWOULDBLOCK.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import fcntl
|
||||
import json
|
||||
import multiprocessing
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# Use spawn so fork+LanceDB+multithread hazards (Pitfall 6) never apply.
|
||||
_SPAWN = multiprocessing.get_context("spawn")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# helpers that run inside spawn children
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _child_hold_shared(lock_path_str: str, acquired_flag: str, release_flag: str) -> int:
|
||||
"""Open the lock file, take LOCK_SH, touch acquired_flag, wait for release_flag, exit."""
|
||||
fd = os.open(lock_path_str, os.O_RDWR | os.O_CREAT, 0o600)
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_SH)
|
||||
Path(acquired_flag).write_text("ok")
|
||||
# Wait for parent to signal release.
|
||||
release = Path(release_flag)
|
||||
for _ in range(300): # up to 30s
|
||||
if release.exists():
|
||||
break
|
||||
time.sleep(0.1)
|
||||
finally:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
os.close(fd)
|
||||
return 0
|
||||
|
||||
|
||||
def _child_hold_shared_sigkillable(lock_path_str: str, acquired_flag: str) -> int:
|
||||
"""Take LOCK_SH, touch flag, sleep forever (until SIGKILL from parent)."""
|
||||
fd = os.open(lock_path_str, os.O_RDWR | os.O_CREAT, 0o600)
|
||||
fcntl.flock(fd, fcntl.LOCK_SH)
|
||||
Path(acquired_flag).write_text("ok")
|
||||
while True:
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fixture: isolate LOCK_PATH / SOCKET_PATH into tmp_path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def lock_and_socket_paths(tmp_path, monkeypatch):
|
||||
"""Redirect module-level LOCK_PATH + SOCKET_PATH to tmp_path.
|
||||
|
||||
AF_UNIX on macOS caps the path at 104 chars; pytest's tmp_path is often
|
||||
too long. We place the lock in tmp_path and the socket under a short
|
||||
/tmp/iai-<pid>-<n>/ directory so `bind()` succeeds.
|
||||
"""
|
||||
from iai_mcp import concurrency
|
||||
lock_path = tmp_path / ".lock"
|
||||
# Short socket dir to stay inside the AF_UNIX 104-byte limit on macOS.
|
||||
sock_dir = Path(f"/tmp/iai-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
|
||||
monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path)
|
||||
try:
|
||||
yield lock_path, sock_path
|
||||
finally:
|
||||
# Best-effort cleanup so /tmp doesn't accumulate.
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: shared vs exclusive
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_shared_blocks_exclusive(tmp_path, lock_and_socket_paths):
|
||||
"""ProcessLock.acquire_shared() holder blocks try_acquire_exclusive()."""
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
|
||||
lock_path, _ = lock_and_socket_paths
|
||||
reader = ProcessLock(lock_path)
|
||||
reader.acquire_shared()
|
||||
try:
|
||||
writer = ProcessLock(lock_path)
|
||||
try:
|
||||
# Separate fd on same file: exclusive must NOT be acquirable.
|
||||
assert writer.try_acquire_exclusive() is False
|
||||
finally:
|
||||
writer.close()
|
||||
finally:
|
||||
reader.release()
|
||||
reader.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: exclusive-then-exclusive
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_exclusive_then_exclusive_nonblocking(tmp_path, lock_and_socket_paths):
|
||||
"""First exclusive holder succeeds; second gets False (non-blocking)."""
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
|
||||
lock_path, _ = lock_and_socket_paths
|
||||
first = ProcessLock(lock_path)
|
||||
try:
|
||||
assert first.try_acquire_exclusive() is True
|
||||
second = ProcessLock(lock_path)
|
||||
try:
|
||||
assert second.try_acquire_exclusive() is False
|
||||
finally:
|
||||
second.close()
|
||||
finally:
|
||||
first.release()
|
||||
first.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: flock fd-close safety (Pitfall 2 guard)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_flock_fd_close_safe(tmp_path, lock_and_socket_paths):
|
||||
"""Closing an unrelated fd must NOT release our flock lock.
|
||||
|
||||
flock is owned by process + open-file-description; closing /etc/passwd's fd
|
||||
doesn't touch our lock. This is the reason we use flock not lockf (Pitfall 2).
|
||||
"""
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
|
||||
lock_path, _ = lock_and_socket_paths
|
||||
holder = ProcessLock(lock_path)
|
||||
try:
|
||||
assert holder.try_acquire_exclusive() is True
|
||||
|
||||
# Open + close an unrelated file to provoke the lockf close-fd trap.
|
||||
unrelated = os.open("/etc/passwd", os.O_RDONLY)
|
||||
os.close(unrelated)
|
||||
|
||||
# Confirm another process cannot grab exclusive -- our lock still held.
|
||||
other = ProcessLock(lock_path)
|
||||
try:
|
||||
assert other.try_acquire_exclusive() is False
|
||||
finally:
|
||||
other.close()
|
||||
finally:
|
||||
holder.release()
|
||||
holder.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: multi-MCP shared holders
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize("n_holders", [2, 3])
|
||||
def test_multi_mcp(tmp_path, lock_and_socket_paths, n_holders):
|
||||
"""N parallel shared holders block exclusive until ALL release."""
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
|
||||
lock_path, _ = lock_and_socket_paths
|
||||
lock_path_str = str(lock_path)
|
||||
|
||||
# Spawn N children, each holding LOCK_SH.
|
||||
acquired_flags = [tmp_path / f".acquired_{i}" for i in range(n_holders)]
|
||||
release_flag = tmp_path / ".release"
|
||||
|
||||
procs = []
|
||||
for i in range(n_holders):
|
||||
p = _SPAWN.Process(
|
||||
target=_child_hold_shared,
|
||||
args=(lock_path_str, str(acquired_flags[i]), str(release_flag)),
|
||||
)
|
||||
p.start()
|
||||
procs.append(p)
|
||||
|
||||
try:
|
||||
# Wait for all children to acquire shared.
|
||||
deadline = time.time() + 15
|
||||
while time.time() < deadline:
|
||||
if all(f.exists() for f in acquired_flags):
|
||||
break
|
||||
time.sleep(0.05)
|
||||
assert all(f.exists() for f in acquired_flags), "children failed to take LOCK_SH"
|
||||
|
||||
# Daemon cannot take exclusive.
|
||||
daemon = ProcessLock(lock_path)
|
||||
try:
|
||||
assert daemon.try_acquire_exclusive() is False
|
||||
finally:
|
||||
daemon.close()
|
||||
|
||||
# Release ALL children, then daemon can acquire.
|
||||
release_flag.write_text("go")
|
||||
finally:
|
||||
for p in procs:
|
||||
p.join(timeout=10)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join(timeout=2)
|
||||
|
||||
# After all children exit, exclusive must succeed.
|
||||
daemon2 = ProcessLock(lock_path)
|
||||
try:
|
||||
assert daemon2.try_acquire_exclusive() is True
|
||||
finally:
|
||||
daemon2.release()
|
||||
daemon2.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: SIGKILL releases lock (kernel-enforced)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_sigkill_releases_lock(tmp_path, lock_and_socket_paths):
|
||||
"""Kernel auto-releases flock on process death (threat model: user kill -9)."""
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
|
||||
lock_path, _ = lock_and_socket_paths
|
||||
lock_path_str = str(lock_path)
|
||||
|
||||
acquired_flag = tmp_path / ".acquired_sigkill"
|
||||
child = _SPAWN.Process(
|
||||
target=_child_hold_shared_sigkillable,
|
||||
args=(lock_path_str, str(acquired_flag)),
|
||||
)
|
||||
child.start()
|
||||
try:
|
||||
deadline = time.time() + 15
|
||||
while time.time() < deadline and not acquired_flag.exists():
|
||||
time.sleep(0.05)
|
||||
assert acquired_flag.exists(), "child didn't acquire shared"
|
||||
|
||||
# Parent observes shared holder -> cannot take exclusive.
|
||||
attempt = ProcessLock(lock_path)
|
||||
try:
|
||||
assert attempt.try_acquire_exclusive() is False
|
||||
finally:
|
||||
attempt.close()
|
||||
|
||||
# Kill child -9.
|
||||
os.kill(child.pid, signal.SIGKILL)
|
||||
child.join(timeout=10)
|
||||
assert not child.is_alive()
|
||||
finally:
|
||||
if child.is_alive():
|
||||
child.terminate()
|
||||
child.join(timeout=2)
|
||||
|
||||
# Kernel released child's lock -> exclusive now succeeds.
|
||||
daemon = ProcessLock(lock_path)
|
||||
try:
|
||||
# Give the kernel a brief moment to propagate the release.
|
||||
deadline = time.time() + 3
|
||||
acquired = False
|
||||
while time.time() < deadline:
|
||||
if daemon.try_acquire_exclusive():
|
||||
acquired = True
|
||||
break
|
||||
time.sleep(0.05)
|
||||
assert acquired, "exclusive still blocked after SIGKILL"
|
||||
finally:
|
||||
daemon.release()
|
||||
daemon.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 6: socket NDJSON status round-trip
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_socket_status_round_trip(tmp_path, lock_and_socket_paths):
|
||||
"""serve_control_socket answers status with ok=true + state + uptime_sec."""
|
||||
from iai_mcp.concurrency import ProcessLock, serve_control_socket
|
||||
|
||||
_, sock_path = lock_and_socket_paths
|
||||
lock = ProcessLock(lock_and_socket_paths[0])
|
||||
state = {"fsm_state": "WAKE", "daemon_started_at": "2026-04-18T00:00:00+00:00"}
|
||||
|
||||
async def runner():
|
||||
shutdown = asyncio.Event()
|
||||
server_task = asyncio.create_task(
|
||||
serve_control_socket(store=None, lock=lock, state=state, shutdown=shutdown,
|
||||
socket_path=sock_path)
|
||||
)
|
||||
# Wait for socket to appear.
|
||||
for _ in range(100):
|
||||
if sock_path.exists():
|
||||
break
|
||||
await asyncio.sleep(0.02)
|
||||
assert sock_path.exists(), "socket never bound"
|
||||
|
||||
reader, writer = await asyncio.open_unix_connection(path=str(sock_path))
|
||||
writer.write(b'{"type":"status"}\n')
|
||||
await writer.drain()
|
||||
line = await reader.readline()
|
||||
writer.close()
|
||||
try:
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(server_task, timeout=5)
|
||||
return json.loads(line)
|
||||
|
||||
try:
|
||||
resp = asyncio.run(runner())
|
||||
finally:
|
||||
lock.close()
|
||||
|
||||
assert resp["ok"] is True
|
||||
assert resp["state"] == "WAKE"
|
||||
# uptime_sec is a non-negative number.
|
||||
assert isinstance(resp["uptime_sec"], (int, float))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 7: injected dispatcher receives request dicts unchanged
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_socket_injected_dispatcher(tmp_path, lock_and_socket_paths):
|
||||
"""pause/force_rem/tail_logs routed through injected dispatcher unchanged."""
|
||||
from iai_mcp.concurrency import ProcessLock, serve_control_socket
|
||||
|
||||
_, sock_path = lock_and_socket_paths
|
||||
lock = ProcessLock(lock_and_socket_paths[0])
|
||||
|
||||
received: list[dict] = []
|
||||
|
||||
async def custom_dispatcher(req: dict) -> dict:
|
||||
received.append(req)
|
||||
return {"ok": True, "seen": req.get("type")}
|
||||
|
||||
requests = [
|
||||
{"type": "pause", "seconds": 60},
|
||||
{"type": "force_rem"},
|
||||
{"type": "tail_logs", "n": 10},
|
||||
]
|
||||
|
||||
async def runner():
|
||||
shutdown = asyncio.Event()
|
||||
server_task = asyncio.create_task(
|
||||
serve_control_socket(
|
||||
store=None, lock=lock, state={}, shutdown=shutdown,
|
||||
dispatcher=custom_dispatcher, socket_path=sock_path,
|
||||
)
|
||||
)
|
||||
for _ in range(100):
|
||||
if sock_path.exists():
|
||||
break
|
||||
await asyncio.sleep(0.02)
|
||||
assert sock_path.exists()
|
||||
|
||||
responses = []
|
||||
for req in requests:
|
||||
r, w = await asyncio.open_unix_connection(path=str(sock_path))
|
||||
w.write((json.dumps(req) + "\n").encode())
|
||||
await w.drain()
|
||||
line = await r.readline()
|
||||
responses.append(json.loads(line))
|
||||
w.close()
|
||||
try:
|
||||
await w.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(server_task, timeout=5)
|
||||
return responses
|
||||
|
||||
try:
|
||||
responses = asyncio.run(runner())
|
||||
finally:
|
||||
lock.close()
|
||||
|
||||
assert received == requests, f"dispatcher saw {received!r}"
|
||||
for resp, req in zip(responses, requests):
|
||||
assert resp == {"ok": True, "seen": req["type"]}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 8: stale socket cleanup (Pitfall 10)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_stale_socket_cleanup(tmp_path, lock_and_socket_paths):
|
||||
"""Pre-existing socket file (SIGKILL-orphaned) is cleaned so bind succeeds."""
|
||||
from iai_mcp.concurrency import ProcessLock, serve_control_socket
|
||||
|
||||
_, sock_path = lock_and_socket_paths
|
||||
# Simulate orphaned socket file.
|
||||
sock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
sock_path.write_text("stale")
|
||||
assert sock_path.exists()
|
||||
|
||||
lock = ProcessLock(lock_and_socket_paths[0])
|
||||
|
||||
async def runner():
|
||||
shutdown = asyncio.Event()
|
||||
server_task = asyncio.create_task(
|
||||
serve_control_socket(store=None, lock=lock, state={}, shutdown=shutdown,
|
||||
socket_path=sock_path)
|
||||
)
|
||||
for _ in range(100):
|
||||
if sock_path.exists() and sock_path.stat().st_size == 0:
|
||||
# Socket replaces stale file; content is empty binary.
|
||||
break
|
||||
await asyncio.sleep(0.02)
|
||||
# Quick status round-trip to confirm server is live.
|
||||
r, w = await asyncio.open_unix_connection(path=str(sock_path))
|
||||
w.write(b'{"type":"status"}\n')
|
||||
await w.drain()
|
||||
line = await r.readline()
|
||||
w.close()
|
||||
try:
|
||||
await w.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(server_task, timeout=5)
|
||||
return json.loads(line)
|
||||
|
||||
try:
|
||||
resp = asyncio.run(runner())
|
||||
finally:
|
||||
lock.close()
|
||||
|
||||
assert resp.get("ok") is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 9: 0o600 permissions on lock file + socket
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_file_permissions_user_only(tmp_path, lock_and_socket_paths):
|
||||
"""Lock + socket files must be 0o600 (user-only rw)."""
|
||||
from iai_mcp.concurrency import ProcessLock, serve_control_socket
|
||||
|
||||
lock_path, sock_path = lock_and_socket_paths
|
||||
|
||||
lock = ProcessLock(lock_path)
|
||||
# Lock file exists and has 0o600 mode.
|
||||
assert lock_path.exists()
|
||||
mode = lock_path.stat().st_mode & 0o777
|
||||
assert mode == 0o600, f"lock mode is {oct(mode)}, expected 0o600"
|
||||
|
||||
async def runner():
|
||||
shutdown = asyncio.Event()
|
||||
server_task = asyncio.create_task(
|
||||
serve_control_socket(store=None, lock=lock, state={}, shutdown=shutdown,
|
||||
socket_path=sock_path)
|
||||
)
|
||||
for _ in range(100):
|
||||
if sock_path.exists():
|
||||
break
|
||||
await asyncio.sleep(0.02)
|
||||
# Check socket file mode.
|
||||
sock_mode = sock_path.stat().st_mode & 0o777
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(server_task, timeout=5)
|
||||
return sock_mode
|
||||
|
||||
try:
|
||||
sock_mode = asyncio.run(runner())
|
||||
finally:
|
||||
lock.close()
|
||||
assert sock_mode == 0o600, f"socket mode is {oct(sock_mode)}, expected 0o600"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 10: holds_exclusive_nb cooperative-yield probe
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_holds_exclusive_nb(tmp_path, lock_and_socket_paths):
|
||||
"""holds_exclusive_nb returns True when we hold EX; False when contended.
|
||||
|
||||
The probe MUST catch BlockingIOError/EWOULDBLOCK internally and never
|
||||
propagate the exception.
|
||||
"""
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
|
||||
lock_path, _ = lock_and_socket_paths
|
||||
daemon = ProcessLock(lock_path)
|
||||
try:
|
||||
# 1. Held exclusive -> probe returns True (no-op re-acquire).
|
||||
assert daemon.try_acquire_exclusive() is True
|
||||
assert daemon.holds_exclusive_nb() is True
|
||||
|
||||
# 2. Release and let a child grab shared; probe now returns False.
|
||||
daemon.release()
|
||||
|
||||
lock_path_str = str(lock_path)
|
||||
acquired_flag = tmp_path / ".shared_holder_acquired"
|
||||
release_flag = tmp_path / ".shared_holder_release"
|
||||
child = _SPAWN.Process(
|
||||
target=_child_hold_shared,
|
||||
args=(lock_path_str, str(acquired_flag), str(release_flag)),
|
||||
)
|
||||
child.start()
|
||||
try:
|
||||
deadline = time.time() + 15
|
||||
while time.time() < deadline and not acquired_flag.exists():
|
||||
time.sleep(0.05)
|
||||
assert acquired_flag.exists()
|
||||
|
||||
# Daemon no longer holds EX, and child holds SH.
|
||||
# holds_exclusive_nb should return False without raising.
|
||||
assert daemon.holds_exclusive_nb() is False
|
||||
finally:
|
||||
release_flag.write_text("go")
|
||||
child.join(timeout=10)
|
||||
if child.is_alive():
|
||||
child.terminate()
|
||||
child.join(timeout=2)
|
||||
finally:
|
||||
daemon.close()
|
||||
403
tests/test_concurrency_session_open.py
Normal file
403
tests/test_concurrency_session_open.py
Normal file
|
|
@ -0,0 +1,403 @@
|
|||
"""Tests for — the 7th unix-socket message type `session_open`.
|
||||
|
||||
Covers:
|
||||
- Valid session_open message is accepted; reply = {"ok": True, "reason": "session_open_queued"}.
|
||||
- Missing session_id is tolerated (optional field per spec).
|
||||
- Wrong-typed session_id is rejected at validation.
|
||||
- After a valid session_open, state contains:
|
||||
* first_turn_pending[session_id] = True
|
||||
* hippea_cascade_request with pending=True
|
||||
- The 6 prior message types still work (no regression).
|
||||
|
||||
Uses a real `serve_control_socket(store, lock, state, shutdown)` behind a
|
||||
threaded background event-loop so asyncio.run() calls in the test body don't
|
||||
tear the server down between requests.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import tempfile
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import concurrency, daemon_state
|
||||
from iai_mcp.concurrency import (
|
||||
ProcessLock,
|
||||
_dispatch_socket_request,
|
||||
_validate_socket_message,
|
||||
serve_control_socket,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- fixtures
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_socket(tmp_path: Path) -> Path:
|
||||
"""Short unique unix-socket path (macOS ~104-byte limit)."""
|
||||
candidate = tmp_path / "d.sock"
|
||||
if len(str(candidate)) > 100:
|
||||
candidate = Path(tempfile.mkdtemp(prefix="iai-sock-")) / "d.sock"
|
||||
return candidate
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_state(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""Redirect daemon_state.STATE_PATH to a hermetic tmp file."""
|
||||
p = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", p)
|
||||
return p
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- unit tests
|
||||
|
||||
|
||||
def test_validate_session_open_accepts_valid_message() -> None:
|
||||
ok, err = _validate_socket_message(
|
||||
{"type": "session_open", "session_id": "s1", "ts": "2026-04-19T00:00:00Z"}
|
||||
)
|
||||
assert ok is True
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_validate_session_open_accepts_missing_session_id() -> None:
|
||||
"""session_id is optional per spec; absence is tolerated."""
|
||||
ok, err = _validate_socket_message({"type": "session_open"})
|
||||
assert ok is True
|
||||
assert err is None
|
||||
|
||||
|
||||
def test_validate_session_open_rejects_non_string_session_id() -> None:
|
||||
ok, err = _validate_socket_message(
|
||||
{"type": "session_open", "session_id": 123, "ts": "x"}
|
||||
)
|
||||
assert ok is False
|
||||
assert err is not None
|
||||
assert "session_id" in err
|
||||
|
||||
|
||||
def test_validate_session_open_rejects_non_string_ts() -> None:
|
||||
ok, err = _validate_socket_message(
|
||||
{"type": "session_open", "session_id": "s1", "ts": 42}
|
||||
)
|
||||
assert ok is False
|
||||
assert err is not None
|
||||
assert "ts" in err
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- dispatcher unit
|
||||
|
||||
|
||||
def _make_fake_store() -> Any:
|
||||
return MagicMock()
|
||||
|
||||
|
||||
def _make_fake_lock() -> Any:
|
||||
return MagicMock(spec=ProcessLock)
|
||||
|
||||
|
||||
# We call asyncio.run() directly in tests below; no asyncio marker needed.
|
||||
|
||||
|
||||
def test_dispatch_session_open_queues_first_turn_and_cascade(
|
||||
tmp_state: Path,
|
||||
) -> None:
|
||||
"""session_open handler: sets first_turn_pending[session_id]=True AND
|
||||
hippea_cascade_request with pending=True; persists via save_state."""
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
req = {
|
||||
"type": "session_open",
|
||||
"session_id": "sess-abc",
|
||||
"ts": "2026-04-19T12:00:00Z",
|
||||
}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(req, _make_fake_store(), _make_fake_lock(), state)
|
||||
)
|
||||
assert resp == {"ok": True, "reason": "session_open_queued"}
|
||||
# Flag set for first-turn hook.
|
||||
pending = state.get("first_turn_pending")
|
||||
assert isinstance(pending, dict)
|
||||
stamp = pending.get("sess-abc")
|
||||
assert isinstance(stamp, str) and stamp # ISO-8601 timestamp, post-fix
|
||||
# Flag set for cascade task.
|
||||
cascade = state.get("hippea_cascade_request")
|
||||
assert isinstance(cascade, dict)
|
||||
assert cascade.get("pending") is True
|
||||
assert cascade.get("session_id") == "sess-abc"
|
||||
# Echo for introspection.
|
||||
last = state.get("last_session_open")
|
||||
assert isinstance(last, dict)
|
||||
assert last.get("session_id") == "sess-abc"
|
||||
# Persisted to disk.
|
||||
assert tmp_state.exists()
|
||||
on_disk = json.loads(tmp_state.read_text())
|
||||
assert on_disk.get("hippea_cascade_request", {}).get("pending") is True
|
||||
|
||||
|
||||
def test_dispatch_session_open_missing_session_id_ok(tmp_state: Path) -> None:
|
||||
"""No session_id -> defaults to empty string; still queues cascade."""
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
req = {"type": "session_open", "ts": "2026-04-19T12:00:00Z"}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(req, _make_fake_store(), _make_fake_lock(), state)
|
||||
)
|
||||
assert resp.get("ok") is True
|
||||
assert resp.get("reason") == "session_open_queued"
|
||||
|
||||
|
||||
def test_dispatch_session_open_clips_long_session_id(tmp_state: Path) -> None:
|
||||
"""session_id is clipped to 128 chars (ASVS V5 output hardening)."""
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
long_id = "a" * 1000
|
||||
req = {"type": "session_open", "session_id": long_id, "ts": "x"}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(req, _make_fake_store(), _make_fake_lock(), state)
|
||||
)
|
||||
assert resp["ok"] is True
|
||||
last = state.get("last_session_open") or {}
|
||||
assert len(last.get("session_id", "")) <= 128
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- no-regression
|
||||
|
||||
|
||||
def test_dispatch_force_wake_still_works(tmp_state: Path) -> None:
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(
|
||||
{"type": "force_wake", "ts": "x"},
|
||||
_make_fake_store(),
|
||||
_make_fake_lock(),
|
||||
state,
|
||||
)
|
||||
)
|
||||
assert resp == {"ok": True, "reason": "wake_queued"}
|
||||
|
||||
|
||||
def test_dispatch_force_rem_still_works(tmp_state: Path) -> None:
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(
|
||||
{"type": "force_rem", "ts": "x"},
|
||||
_make_fake_store(),
|
||||
_make_fake_lock(),
|
||||
state,
|
||||
)
|
||||
)
|
||||
assert resp == {"ok": True, "reason": "rem_queued"}
|
||||
|
||||
|
||||
def test_dispatch_pause_still_works(tmp_state: Path) -> None:
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(
|
||||
{"type": "pause"},
|
||||
_make_fake_store(),
|
||||
_make_fake_lock(),
|
||||
state,
|
||||
)
|
||||
)
|
||||
assert resp == {"ok": True, "paused": True}
|
||||
assert state["scheduler_paused"] is True
|
||||
|
||||
|
||||
def test_dispatch_resume_still_works(tmp_state: Path) -> None:
|
||||
state: dict = {"fsm_state": "WAKE", "scheduler_paused": True}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(
|
||||
{"type": "resume"},
|
||||
_make_fake_store(),
|
||||
_make_fake_lock(),
|
||||
state,
|
||||
)
|
||||
)
|
||||
assert resp == {"ok": True, "paused": False}
|
||||
assert state["scheduler_paused"] is False
|
||||
|
||||
|
||||
def test_dispatch_user_initiated_sleep_still_works(tmp_state: Path) -> None:
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(
|
||||
{"type": "user_initiated_sleep", "reason": "night", "ts": "x"},
|
||||
_make_fake_store(),
|
||||
_make_fake_lock(),
|
||||
state,
|
||||
)
|
||||
)
|
||||
assert resp.get("ok") is True
|
||||
assert resp.get("state") == "TRANSITIONING"
|
||||
|
||||
|
||||
def test_dispatch_status_still_works(tmp_state: Path) -> None:
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
resp = asyncio.run(
|
||||
_dispatch_socket_request(
|
||||
{"type": "status"},
|
||||
_make_fake_store(),
|
||||
_make_fake_lock(),
|
||||
state,
|
||||
)
|
||||
)
|
||||
assert resp.get("ok") is True
|
||||
assert resp.get("state") == "WAKE"
|
||||
# Version echoed per Plan 04-gap-1.
|
||||
assert "version" in resp
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- round-trip
|
||||
|
||||
|
||||
class _ThreadedDaemon:
|
||||
"""Real serve_control_socket on background thread + event loop.
|
||||
|
||||
Reuses the pattern from tests/test_core_bedtime_inject.py but drives the
|
||||
production _dispatch_socket_request so we exercise the real 7th-message
|
||||
path end-to-end.
|
||||
"""
|
||||
|
||||
def __init__(self, path: Path, state: dict) -> None:
|
||||
self.path = path
|
||||
self.state = state
|
||||
self.lock = MagicMock(spec=ProcessLock)
|
||||
self.store = MagicMock()
|
||||
self.shutdown = None # populated on the loop thread
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
self._thread: threading.Thread | None = None
|
||||
self._ready = threading.Event()
|
||||
|
||||
def start(self) -> None:
|
||||
def _run() -> None:
|
||||
self._loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self._loop)
|
||||
self.shutdown = asyncio.Event()
|
||||
|
||||
async def _serve() -> None:
|
||||
# Hand the real dispatcher the state we own.
|
||||
async def _dispatcher(req: dict) -> dict:
|
||||
return await _dispatch_socket_request(
|
||||
req, self.store, self.lock, self.state
|
||||
)
|
||||
|
||||
task = asyncio.create_task(
|
||||
serve_control_socket(
|
||||
self.store,
|
||||
self.lock,
|
||||
self.state,
|
||||
self.shutdown, # type: ignore[arg-type]
|
||||
dispatcher=_dispatcher,
|
||||
socket_path=self.path,
|
||||
)
|
||||
)
|
||||
# Give the server a moment to bind before signalling ready.
|
||||
await asyncio.sleep(0.1)
|
||||
self._ready.set()
|
||||
await task
|
||||
|
||||
try:
|
||||
self._loop.run_until_complete(_serve())
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
try:
|
||||
self._loop.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._thread = threading.Thread(target=_run, daemon=True)
|
||||
self._thread.start()
|
||||
assert self._ready.wait(timeout=5.0), "threaded daemon failed to start"
|
||||
|
||||
def stop(self) -> None:
|
||||
if self._loop is None:
|
||||
return
|
||||
if self.shutdown is not None:
|
||||
self._loop.call_soon_threadsafe(self.shutdown.set)
|
||||
self._thread and self._thread.join(timeout=5.0)
|
||||
|
||||
|
||||
async def _send(path: Path, msg: dict, *, timeout: float = 5.0) -> dict:
|
||||
reader, writer = await asyncio.open_unix_connection(str(path))
|
||||
try:
|
||||
writer.write((json.dumps(msg) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
line = await asyncio.wait_for(reader.readline(), timeout=timeout)
|
||||
return json.loads(line)
|
||||
finally:
|
||||
try:
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def test_session_open_end_to_end_round_trip(
|
||||
tmp_socket: Path, tmp_state: Path,
|
||||
) -> None:
|
||||
"""Real NDJSON round-trip over a unix socket — the 7th message type."""
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
daemon = _ThreadedDaemon(tmp_socket, state)
|
||||
daemon.start()
|
||||
try:
|
||||
resp = asyncio.run(
|
||||
_send(
|
||||
tmp_socket,
|
||||
{
|
||||
"type": "session_open",
|
||||
"session_id": "e2e-sess-1",
|
||||
"ts": "2026-04-19T12:00:00Z",
|
||||
},
|
||||
)
|
||||
)
|
||||
assert resp == {"ok": True, "reason": "session_open_queued"}
|
||||
# State mutations visible to the test after the reply.
|
||||
pending = state.get("first_turn_pending")
|
||||
assert isinstance(pending, dict)
|
||||
stamp = pending.get("e2e-sess-1")
|
||||
assert isinstance(stamp, str) and stamp # ISO-8601 timestamp, post-fix
|
||||
cascade = state.get("hippea_cascade_request")
|
||||
assert isinstance(cascade, dict)
|
||||
assert cascade.get("pending") is True
|
||||
finally:
|
||||
daemon.stop()
|
||||
|
||||
|
||||
def test_session_open_does_not_regress_other_6_types(
|
||||
tmp_socket: Path, tmp_state: Path,
|
||||
) -> None:
|
||||
"""Force_wake / force_rem / pause / resume / status / user_initiated_sleep
|
||||
all still succeed end-to-end."""
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
daemon = _ThreadedDaemon(tmp_socket, state)
|
||||
daemon.start()
|
||||
try:
|
||||
# force_wake
|
||||
r = asyncio.run(_send(tmp_socket, {"type": "force_wake", "ts": "x"}))
|
||||
assert r == {"ok": True, "reason": "wake_queued"}
|
||||
# force_rem
|
||||
r = asyncio.run(_send(tmp_socket, {"type": "force_rem", "ts": "x"}))
|
||||
assert r == {"ok": True, "reason": "rem_queued"}
|
||||
# pause
|
||||
r = asyncio.run(_send(tmp_socket, {"type": "pause"}))
|
||||
assert r.get("ok") is True
|
||||
# resume
|
||||
r = asyncio.run(_send(tmp_socket, {"type": "resume"}))
|
||||
assert r.get("ok") is True
|
||||
# status
|
||||
r = asyncio.run(_send(tmp_socket, {"type": "status"}))
|
||||
assert r.get("ok") is True
|
||||
# user_initiated_sleep (state is WAKE so this transitions)
|
||||
r = asyncio.run(
|
||||
_send(
|
||||
tmp_socket,
|
||||
{"type": "user_initiated_sleep", "reason": "night", "ts": "x"},
|
||||
)
|
||||
)
|
||||
assert r.get("ok") is True
|
||||
finally:
|
||||
daemon.stop()
|
||||
516
tests/test_concurrent_wrapper_spawn.py
Normal file
516
tests/test_concurrent_wrapper_spawn.py
Normal file
|
|
@ -0,0 +1,516 @@
|
|||
"""Phase 07.1 Plan 08 — R5 acceptance: concurrent wrapper cold-start regression trap.
|
||||
|
||||
THE regression-trap test that catches the precise scenario Phase 7's verifier
|
||||
missed: N parallel wrapper cold-starts when no daemon exists.
|
||||
|
||||
SPEC R5 / A2 contract:
|
||||
- PASSES on post-Phase-7.1 code (with launchd-managed listener):
|
||||
bridge.ts is a pure connector (Plan 07.1-04) -> all 5 wrappers connect
|
||||
to the SAME launchd-pre-bound socket -> launchd spawns the daemon
|
||||
ONCE in response to the first connection -> all 5 wrappers share it.
|
||||
- FAILS deterministically on pre-Phase-7.1 baseline:
|
||||
bridge.ts spawn-fallback wins the TOCTOU race for multiple wrappers,
|
||||
2-5 daemons end up bound, the singleton assertion fires.
|
||||
|
||||
Without this test, has the same verification gap had:
|
||||
architectural code coverage without runtime invariant coverage. This test IS
|
||||
the runtime invariant proof.
|
||||
|
||||
Test isolation: a per-test LaunchAgent with a unique Label
|
||||
``com.iai-mcp.daemon.test-<pid>-<tmp_id>`` is rendered into ``tmp_path/
|
||||
Library/LaunchAgents/`` (NOT the user's real ``~/Library/LaunchAgents/``,
|
||||
to avoid pollution if teardown is interrupted) and loaded via
|
||||
``launchctl load -w``. The test socket lives under
|
||||
``/tmp/iai-cspawn-<pid>-<tmp_id>/d.sock`` (within macOS's 104-byte
|
||||
AF_UNIX path cap). Teardown unloads the agent, removes the plist, kills
|
||||
any spawned test daemon (env-filtered to never touch the user's real
|
||||
production daemon), and removes the socket.
|
||||
|
||||
Total runtime: ~25-30s (5 staggered cold-starts + 15s settle + readline
|
||||
poll). Override with ``IAI_MCP_SKIP_LAUNCHCTL_TESTS=1`` to skip.
|
||||
|
||||
This module is macOS-only (LaunchAgent + launchctl). Skipped on Linux/Windows.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import select
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
|
||||
REPO = Path(__file__).resolve().parent.parent
|
||||
WRAPPER = REPO / "mcp-wrapper"
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
platform.system() != "Darwin",
|
||||
reason="LaunchAgent + launchctl is macOS-only",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def built_wrapper() -> Path:
|
||||
"""Build the TS wrapper once per test module; reuse across tests.
|
||||
|
||||
Same pattern as ``tests/test_socket_subagent_reuse.py:built_wrapper``.
|
||||
"""
|
||||
if not (WRAPPER / "node_modules").exists():
|
||||
subprocess.run(["npm", "install"], cwd=WRAPPER, check=True)
|
||||
subprocess.run(["npm", "run", "build"], cwd=WRAPPER, check=True)
|
||||
dist = WRAPPER / "dist" / "index.js"
|
||||
assert dist.exists(), "npm run build should have produced dist/index.js"
|
||||
return dist
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def test_launchagent(tmp_path):
|
||||
"""Render + load a tmp LaunchAgent against an isolated test socket path.
|
||||
|
||||
The plist is written into ``tmp_path/Library/LaunchAgents/`` (NOT the
|
||||
user's real ``~/Library/LaunchAgents/``) so any teardown failure leaves
|
||||
no pollution under the user's home directory. ``launchctl load -w``
|
||||
accepts any absolute plist path; the loaded agent is identified
|
||||
internally by its ``Label`` value, which is unique per-test
|
||||
(PID + ``tmp_path`` id).
|
||||
|
||||
[Rule 3 deviation] The base template only sets PATH/HOME/
|
||||
IAI_MCP_LAUNCHD_MANAGED in EnvironmentVariables. Without
|
||||
``IAI_DAEMON_SOCKET_PATH`` in env the launchd-spawned daemon picks up
|
||||
the socket via fd 3 (LISTEN_FDS branch, Plan 07.1-02), but the
|
||||
psutil-environ filter the test uses to count "daemons bound to this
|
||||
test socket" returns 0 because the env var was never set in the
|
||||
daemon's process environment. Inject ``IAI_DAEMON_SOCKET_PATH`` into
|
||||
the rendered plist's EnvironmentVariables so the daemon process
|
||||
carries it (harmlessly -- the launchd path ignores the env value and
|
||||
uses fd 3) and the test's environ filter works.
|
||||
|
||||
Yields: ``(sock_path, plist_path, label, env)`` -- env is suitable for
|
||||
spawning wrappers via subprocess.Popen.
|
||||
"""
|
||||
if os.environ.get("IAI_MCP_SKIP_LAUNCHCTL_TESTS") == "1":
|
||||
pytest.skip("IAI_MCP_SKIP_LAUNCHCTL_TESTS=1")
|
||||
|
||||
# Use /tmp/ for the socket directory (macOS AF_UNIX 104-byte path cap;
|
||||
# tmp_path under /private/var/folders/... is too long for some labels).
|
||||
sock_dir = Path(f"/tmp/iai-cspawn-{os.getpid()}-{id(tmp_path) & 0xFFFFFF:x}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
|
||||
label = f"com.iai-mcp.daemon.test-{os.getpid()}-{id(tmp_path) & 0xFFFFFF:x}"
|
||||
|
||||
# Render plist under tmp_path/Library/LaunchAgents/ (NOT the user's
|
||||
# real ~/Library/LaunchAgents/ -- avoids pollution if teardown is
|
||||
# interrupted on a dev box where the production daemon is OFF).
|
||||
plist_dir = tmp_path / "Library" / "LaunchAgents"
|
||||
plist_dir.mkdir(parents=True, exist_ok=True)
|
||||
plist_path = plist_dir / f"{label}.plist"
|
||||
|
||||
# Read template and substitute placeholders. Then:
|
||||
# 1. Replace the production label string ONLY at the
|
||||
# <key>Label</key> binding site (anchor on the surrounding
|
||||
# <string>...</string> so we don't accidentally rewrite the
|
||||
# docstring comment block at the top, which mentions the
|
||||
# production label by name).
|
||||
# 2. Replace the production socket path with the test socket path.
|
||||
# 3. Inject IAI_DAEMON_SOCKET_PATH and PYTHONPATH into
|
||||
# EnvironmentVariables (Rule 3 fix -- without
|
||||
# IAI_DAEMON_SOCKET_PATH in the daemon's process env, the
|
||||
# psutil-environ filter cannot identify the launchd-spawned
|
||||
# daemon as belonging to this test).
|
||||
template = (REPO / "scripts" / "com.iai-mcp.daemon.plist.template").read_text()
|
||||
label_old_xml = "<string>com.iai-mcp.daemon</string>"
|
||||
label_new_xml = f"<string>{label}</string>"
|
||||
if template.count(label_old_xml) != 1:
|
||||
pytest.fail(
|
||||
f"plist template invariant broken: expected exactly one "
|
||||
f"<string>com.iai-mcp.daemon</string> occurrence (the "
|
||||
f"<key>Label</key> binding); found "
|
||||
f"{template.count(label_old_xml)}",
|
||||
)
|
||||
rendered = (
|
||||
template
|
||||
.replace("{PYTHON_PATH}", sys.executable)
|
||||
.replace("{HOME}", str(Path.home()))
|
||||
.replace(label_old_xml, label_new_xml)
|
||||
.replace(
|
||||
f"{Path.home()}/.iai-mcp/.daemon.sock",
|
||||
str(sock_path),
|
||||
)
|
||||
.replace(
|
||||
"<key>IAI_MCP_LAUNCHD_MANAGED</key>\n <string>1</string>",
|
||||
"<key>IAI_MCP_LAUNCHD_MANAGED</key>\n <string>1</string>\n"
|
||||
f" <key>IAI_DAEMON_SOCKET_PATH</key>\n <string>{sock_path}</string>\n"
|
||||
f" <key>PYTHONPATH</key>\n <string>{REPO / 'src'}</string>",
|
||||
)
|
||||
)
|
||||
plist_path.write_text(rendered)
|
||||
|
||||
# Pre-clean (idempotent). Ignore any "not loaded" errors.
|
||||
subprocess.run(
|
||||
["launchctl", "unload", "-w", str(plist_path)],
|
||||
capture_output=True, check=False,
|
||||
)
|
||||
|
||||
# Load the test LaunchAgent.
|
||||
res = subprocess.run(
|
||||
["launchctl", "load", "-w", str(plist_path)],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
if res.returncode != 0:
|
||||
# Common causes: TCC denial on macOS Sequoia/Sonoma, missing
|
||||
# /Library/LaunchAgents permission, plist syntax error.
|
||||
pytest.skip(f"launchctl load failed (rc={res.returncode}): {res.stderr.strip()}")
|
||||
|
||||
# Verify registration. If load returned 0 but the label is missing,
|
||||
# something is off -- fail rather than silently skip.
|
||||
list_res = subprocess.run(
|
||||
["launchctl", "list"], capture_output=True, text=True, check=False,
|
||||
)
|
||||
if label not in list_res.stdout:
|
||||
subprocess.run(
|
||||
["launchctl", "unload", "-w", str(plist_path)],
|
||||
capture_output=True, check=False,
|
||||
)
|
||||
pytest.fail(
|
||||
f"LaunchAgent {label!r} not present in `launchctl list` after load",
|
||||
)
|
||||
|
||||
env = {
|
||||
**os.environ,
|
||||
"IAI_MCP_PYTHON": sys.executable,
|
||||
"PYTHONPATH": str(REPO / "src") + os.pathsep + os.environ.get("PYTHONPATH", ""),
|
||||
"IAI_DAEMON_SOCKET_PATH": str(sock_path),
|
||||
}
|
||||
|
||||
try:
|
||||
yield sock_path, plist_path, label, env
|
||||
finally:
|
||||
# Teardown: unload, kill any spawned test daemon (env-filtered),
|
||||
# remove socket file. The plist itself lives under tmp_path which
|
||||
# pytest cleans up automatically.
|
||||
subprocess.run(
|
||||
["launchctl", "unload", "-w", str(plist_path)],
|
||||
capture_output=True, check=False,
|
||||
)
|
||||
# Env-filtered daemon kill. NEVER touch the user's real production
|
||||
# daemon (it would be running with the production socket path,
|
||||
# not the tmp test socket path).
|
||||
for proc in psutil.process_iter(["cmdline", "environ"]):
|
||||
try:
|
||||
cl = " ".join(proc.info.get("cmdline") or [])
|
||||
if "iai_mcp.daemon" not in cl:
|
||||
continue
|
||||
penv = proc.info.get("environ") or {}
|
||||
if penv.get("IAI_DAEMON_SOCKET_PATH") == str(sock_path):
|
||||
proc.send_signal(signal.SIGTERM)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
# Brief settle, then second-pass SIGKILL on stragglers.
|
||||
time.sleep(0.5)
|
||||
for proc in psutil.process_iter(["cmdline", "environ"]):
|
||||
try:
|
||||
cl = " ".join(proc.info.get("cmdline") or [])
|
||||
if "iai_mcp.daemon" not in cl:
|
||||
continue
|
||||
penv = proc.info.get("environ") or {}
|
||||
if penv.get("IAI_DAEMON_SOCKET_PATH") == str(sock_path):
|
||||
proc.kill()
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
try:
|
||||
sock_path.unlink()
|
||||
except (FileNotFoundError, OSError):
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _spawn_wrapper_send_initialize(
|
||||
built_wrapper: Path, env: dict,
|
||||
) -> subprocess.Popen:
|
||||
"""Spawn one wrapper subprocess; send MCP initialize on stdin.
|
||||
|
||||
Returns the Popen handle. Caller polls stdout (with select+timeout) to
|
||||
read the initialize response after the daemon settle window expires.
|
||||
"""
|
||||
proc = subprocess.Popen(
|
||||
["node", str(built_wrapper)],
|
||||
cwd=str(REPO),
|
||||
env=env,
|
||||
stdin=subprocess.PIPE,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
init_req = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "initialize",
|
||||
"params": {
|
||||
"protocolVersion": "2025-03-26",
|
||||
"capabilities": {},
|
||||
"clientInfo": {"name": "concurrent-spawn-test", "version": "0.0"},
|
||||
},
|
||||
}
|
||||
try:
|
||||
assert proc.stdin is not None
|
||||
proc.stdin.write((json.dumps(init_req) + "\n").encode("utf-8"))
|
||||
proc.stdin.flush()
|
||||
except BrokenPipeError:
|
||||
# Wrapper crashed before reading stdin; readline below will see
|
||||
# empty bytes and the test will report 0/5 successes.
|
||||
pass
|
||||
return proc
|
||||
|
||||
|
||||
def _read_initialize_response(
|
||||
proc: subprocess.Popen, timeout_sec: float = 2.0,
|
||||
) -> dict | None:
|
||||
"""Poll wrapper stdout for one JSON-RPC line (the initialize response)."""
|
||||
if proc.stdout is None:
|
||||
return None
|
||||
try:
|
||||
ready, _, _ = select.select([proc.stdout], [], [], timeout_sec)
|
||||
if not ready:
|
||||
return None
|
||||
line = proc.stdout.readline()
|
||||
if not line:
|
||||
return None
|
||||
return json.loads(line.decode("utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
|
||||
def _count_daemons_for_socket(sock_path: Path) -> int:
|
||||
"""Count iai_mcp.daemon processes whose env points at sock_path.
|
||||
|
||||
The launchd-spawned daemon picks up its socket via fd 3 (LISTEN_FDS),
|
||||
not env -- but the test plist's EnvironmentVariables block sets
|
||||
IAI_DAEMON_SOCKET_PATH so this filter works. The daemon process
|
||||
inherits the env from launchd; the launchd path ignores the env value
|
||||
when binding (uses fd 3), making the env var purely a tag for
|
||||
test isolation.
|
||||
"""
|
||||
count = 0
|
||||
sock_str = str(sock_path)
|
||||
for proc in psutil.process_iter(["cmdline", "environ"]):
|
||||
try:
|
||||
cl = " ".join(proc.info.get("cmdline") or [])
|
||||
if "iai_mcp.daemon" not in cl:
|
||||
continue
|
||||
env = proc.info.get("environ") or {}
|
||||
if env.get("IAI_DAEMON_SOCKET_PATH") == sock_str:
|
||||
count += 1
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
return count
|
||||
|
||||
|
||||
def _count_binders(sock_path: Path) -> int:
|
||||
"""Count distinct PIDs that hold sock_path open (lsof -U)."""
|
||||
res = subprocess.run(
|
||||
["lsof", "-U", "-F", "pn"],
|
||||
capture_output=True, text=True, check=False,
|
||||
)
|
||||
pids: set[int] = set()
|
||||
current: int | None = None
|
||||
target = str(sock_path)
|
||||
for line in res.stdout.splitlines():
|
||||
if line.startswith("p"):
|
||||
try:
|
||||
current = int(line[1:])
|
||||
except ValueError:
|
||||
current = None
|
||||
elif line.startswith("n") and current is not None and line[1:] == target:
|
||||
pids.add(current)
|
||||
return len(pids)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_5_concurrent_wrapper_cold_starts_yield_singleton(
|
||||
built_wrapper, test_launchagent,
|
||||
):
|
||||
"""SPEC R5 / A2: 5 staggered cold-starts -> exactly 1 daemon after settle.
|
||||
|
||||
Setup (via test_launchagent fixture):
|
||||
- Tmp LaunchAgent loaded against an isolated test socket path.
|
||||
- Plist has RunAtLoad=false. Empirically (macOS Sequoia 15.x),
|
||||
launchctl load -w for a Sockets-activated agent may spawn the
|
||||
daemon eagerly anyway -- the test tolerates this via the
|
||||
relaxed pre-condition (<= 1) and asserts the singleton
|
||||
invariant on the post-condition (== 1).
|
||||
|
||||
Body:
|
||||
- Spawn 5 wrapper subprocesses with staggered start times
|
||||
(~0/50/100/150/200 ms apart). Each sends MCP initialize.
|
||||
- Wait 15s for the daemon to settle (cold-start ~8s embedder
|
||||
load + LanceDB open + buffer).
|
||||
- Read each wrapper's initialize response (with 2s readline
|
||||
timeout per wrapper -- they should all be ready by t+15s).
|
||||
- Terminate wrappers (releases their connect-side fds before the
|
||||
binder count assertion).
|
||||
|
||||
Assertions:
|
||||
(a) ``_count_daemons_for_socket(sock_path) == 1`` -- exactly one
|
||||
iai_mcp.daemon process bound to this test socket. The
|
||||
singleton invariant.
|
||||
(b) ``_count_binders(sock_path) <= 1`` -- lsof reports at most
|
||||
one process holding the socket file. Wrappers are clients
|
||||
of the abstract socket connection, not file-holders -- after
|
||||
their fds close they don't show up here. The launchd
|
||||
pre-bound listener is owned by launchd itself, which may
|
||||
or may not appear in lsof depending on the version.
|
||||
(c) all 5 wrapper subprocesses received a successful MCP
|
||||
initialize JSON-RPC response.
|
||||
|
||||
On post-Phase-7.1 code (current main): bridge.ts is a pure connector
|
||||
(Plan 07.1-04 deleted spawn-fallback). All 5 wrappers connect to the
|
||||
SAME launchd-pre-bound socket, launchd's spawn-once contract gives
|
||||
them the SAME daemon, all 3 assertions hold. THIS is what the test
|
||||
proves.
|
||||
|
||||
Regression-trap caveat: the SPEC framing of "FAILS deterministically
|
||||
on pre-Phase-7.1 baseline" turned out to be platform-conditional. On
|
||||
macOS Sequoia 15.x, ``launchctl load -w`` eagerly spawns the daemon
|
||||
when the plist has Sockets defined (despite RunAtLoad=false). With
|
||||
the launchd-pre-bound socket already up and a daemon already bound,
|
||||
pre-Phase-7.1 bridge.ts would also succeed -- its spawn-fallback
|
||||
would never fire because the initial connect succeeds. This test
|
||||
therefore PROVES the post-Phase-7.1 invariant cleanly (its primary
|
||||
job) but is NOT a deterministic regression trap on macOS Sequoia.
|
||||
On older macOS versions where launchctl-load defers spawn until
|
||||
first connection, the regression-trap behavior would hold. See the
|
||||
SUMMARY's "Regression-trap caveat" section for the deferred-items
|
||||
note on a true-TOCTOU test architecture.
|
||||
"""
|
||||
sock_path, plist_path, label, env = test_launchagent
|
||||
|
||||
# Pre-condition: at most 1 daemon bound to this socket. RunAtLoad=false
|
||||
# in the plist is documented as "spawn lazily on first connection",
|
||||
# but on macOS Sequoia (15.x) `launchctl load -w` for a Sockets-
|
||||
# activated agent eagerly spawns the daemon despite RunAtLoad=false.
|
||||
# Empirically verified: the daemon may be PID-listed immediately
|
||||
# after `launchctl load -w` returns. This does NOT defeat the
|
||||
# singleton invariant -- it just shifts the spawn moment. The
|
||||
# critical assertion is the post-condition (`== 1` after 5 wrappers),
|
||||
# not whether the daemon was 0 or 1 before.
|
||||
initial_daemon_count = _count_daemons_for_socket(sock_path)
|
||||
assert initial_daemon_count <= 1, (
|
||||
f"expected <= 1 daemon before test, found {initial_daemon_count} "
|
||||
f"(stale daemons from earlier test? cleanup leak?)"
|
||||
)
|
||||
|
||||
# Spawn 5 wrappers staggered by ~50 ms each. Total stagger window
|
||||
# ~200 ms -- well within the launchd socket-activation race window
|
||||
# this test exercises.
|
||||
procs: list[subprocess.Popen] = []
|
||||
stagger_intervals = [0.0, 0.05, 0.05, 0.05, 0.05]
|
||||
for delay in stagger_intervals:
|
||||
if delay > 0:
|
||||
time.sleep(delay)
|
||||
procs.append(_spawn_wrapper_send_initialize(built_wrapper, env))
|
||||
|
||||
# Wait 15s for the daemon to settle. Cold start = 8s embedder load
|
||||
# + LanceDB open + buffer. Per advisor: do NOT shorten this -- the
|
||||
# 8s embedder cold-start is the empirical reality.
|
||||
time.sleep(15)
|
||||
|
||||
# Read each wrapper's initialize response.
|
||||
init_responses: list[dict | None] = [
|
||||
_read_initialize_response(p, timeout_sec=2.0) for p in procs
|
||||
]
|
||||
|
||||
# Snapshot the singleton + binder counts BEFORE terminating wrappers.
|
||||
# Terminating may take 2s+ per wrapper; we want the assertion to fire
|
||||
# against the steady state we just observed.
|
||||
daemon_count = _count_daemons_for_socket(sock_path)
|
||||
binder_count = _count_binders(sock_path)
|
||||
|
||||
# Cleanup wrappers (release their connect-side fds; daemon still up
|
||||
# for the fixture teardown to handle).
|
||||
for proc in procs:
|
||||
try:
|
||||
proc.terminate()
|
||||
proc.wait(timeout=2)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.kill()
|
||||
|
||||
# Assertion (a) -- THE singleton invariant.
|
||||
assert daemon_count == 1, (
|
||||
f"singleton invariant violated: {daemon_count} daemons bound to "
|
||||
f"{sock_path} after 5 concurrent wrapper cold-starts. "
|
||||
f"contract: launchd handles the spawn-once; all wrappers join "
|
||||
f"the same daemon. Pre-Phase-7.1 baseline reproduces 2-5 daemons "
|
||||
f"via TOCTOU race in bridge.ts spawn-fallback."
|
||||
)
|
||||
# Assertion (b) -- file-holder confirmation. Either 0 (the socket
|
||||
# file is owned by launchd's pre-bind, not a daemon process fd entry)
|
||||
# or 1 (the spawned daemon also shows in lsof). In either case the
|
||||
# COUNT must be <= 1: 2+ would mean dueling binders.
|
||||
assert binder_count <= 1, (
|
||||
f"lsof reports {binder_count} binders for {sock_path}; "
|
||||
f"expected <= 1 (singleton)"
|
||||
)
|
||||
# Assertion (c) -- all 5 wrappers handshook successfully. A wrapper
|
||||
# that received an initialize result proves it connected to a real
|
||||
# daemon and got a real response (not just a launchd-side accept).
|
||||
success_count = sum(
|
||||
1 for r in init_responses if r is not None and "result" in r
|
||||
)
|
||||
assert success_count == 5, (
|
||||
f"only {success_count}/5 wrappers received successful initialize "
|
||||
f"response. Responses: {init_responses}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="manual baseline regression check; run only against pre-Phase-7.1 "
|
||||
"(git stash) to demonstrate the regression-trap behavior",
|
||||
)
|
||||
def test_pre_phase_7_1_baseline_fails():
|
||||
"""Documentation marker: how to run against the pre-7.1 baseline.
|
||||
|
||||
Manual procedure to demonstrate the regression-trap behavior:
|
||||
|
||||
1. ``git stash`` (or ``git checkout <pre-7.1-commit>``)
|
||||
2. ``cd mcp-wrapper && npm run build`` (rebuild bridge.ts with
|
||||
the spawn-fallback restored)
|
||||
3. ``pytest tests/test_concurrent_wrapper_spawn.py::\\
|
||||
test_5_concurrent_wrapper_cold_starts_yield_singleton -v``
|
||||
4. Expected: assertion (a) FAILS with daemon_count >= 2 (the
|
||||
TOCTOU race produces multiple daemons that all bind in
|
||||
parallel before any of them notice the others).
|
||||
5. ``git stash pop`` (or ``git checkout main``) to restore
|
||||
Phase 7.1.
|
||||
6. Rebuild + rerun: assertion passes.
|
||||
|
||||
The executor of Plan 07.1-08 cannot easily git-stash mid-execution
|
||||
(stashing would break the test file itself, which lives in the
|
||||
working tree). Future verification: a maintainer who wants to
|
||||
re-prove the regression-trap behavior follows the procedure above.
|
||||
"""
|
||||
pass
|
||||
143
tests/test_consolidated_from_edges.py
Normal file
143
tests/test_consolidated_from_edges.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
"""Tests for the consolidated_from edge type (MEM-07, D-16, D-29).
|
||||
|
||||
After run_heavy_consolidation:
|
||||
- `consolidated_from` edges link the semantic summary record to each source
|
||||
episodic record in its cluster.
|
||||
- src = summary record (tier=semantic); dst = source episode.
|
||||
- Source episodes keep their literal_surface verbatim (MEM-01 preservation).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def _record(text: str, tier: str = "episodic") -> MemoryRecord:
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier=tier,
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[],
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
def _run_heavy(store):
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||||
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
|
||||
|
||||
return run_heavy_consolidation(
|
||||
store,
|
||||
session_id="s-cfr",
|
||||
config=SleepConfig(llm_enabled=False),
|
||||
budget=BudgetLedger(store),
|
||||
rate=RateLimitLedger(store),
|
||||
has_api_key=False,
|
||||
)
|
||||
|
||||
|
||||
def test_consolidated_from_edge_created_on_heavy_run(tmp_path):
|
||||
"""Cohesive cluster of 3 -> at least one consolidated_from edge."""
|
||||
from iai_mcp.store import EDGES_TABLE, MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
recs = [_record(f"rec {i}") for i in range(3)]
|
||||
for r in recs:
|
||||
store.insert(r)
|
||||
# Triangle: all three connected
|
||||
store.boost_edges(
|
||||
[(recs[0].id, recs[1].id), (recs[1].id, recs[2].id), (recs[0].id, recs[2].id)],
|
||||
edge_type="hebbian", delta=0.5,
|
||||
)
|
||||
|
||||
_run_heavy(store)
|
||||
|
||||
df = store.db.open_table(EDGES_TABLE).to_pandas()
|
||||
cf = df[df["edge_type"] == "consolidated_from"]
|
||||
assert len(cf) >= 3
|
||||
|
||||
|
||||
def test_consolidated_from_edge_points_semantic_to_episodes(tmp_path):
|
||||
"""src of consolidated_from is the summary record (tier=semantic);
|
||||
dst is a source episode (tier=episodic)."""
|
||||
from iai_mcp.store import EDGES_TABLE, MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
recs = [_record(f"rec {i}") for i in range(3)]
|
||||
for r in recs:
|
||||
store.insert(r)
|
||||
store.boost_edges(
|
||||
[(recs[0].id, recs[1].id), (recs[1].id, recs[2].id), (recs[0].id, recs[2].id)],
|
||||
edge_type="hebbian", delta=0.5,
|
||||
)
|
||||
|
||||
_run_heavy(store)
|
||||
|
||||
df = store.db.open_table(EDGES_TABLE).to_pandas()
|
||||
cf = df[df["edge_type"] == "consolidated_from"]
|
||||
assert not cf.empty
|
||||
|
||||
source_ids = {str(r.id) for r in recs}
|
||||
for _, row in cf.iterrows():
|
||||
# Either src or dst is a summary (not in our original source_ids);
|
||||
# the other should be one of our source episodes.
|
||||
if row["src"] not in source_ids and row["dst"] in source_ids:
|
||||
# Fetch the summary record
|
||||
summary = store.get(UUID(row["src"]))
|
||||
assert summary is not None
|
||||
assert summary.tier == "semantic"
|
||||
dst_rec = store.get(UUID(row["dst"]))
|
||||
assert dst_rec is not None
|
||||
assert dst_rec.tier == "episodic"
|
||||
elif row["dst"] not in source_ids and row["src"] in source_ids:
|
||||
# boost_edges canonicalises (src, dst) as sorted -- either direction
|
||||
summary = store.get(UUID(row["dst"]))
|
||||
assert summary is not None
|
||||
assert summary.tier == "semantic"
|
||||
else:
|
||||
# Edge between two source records -- that's wrong for consolidated_from.
|
||||
pytest.fail(
|
||||
f"consolidated_from edge without a summary endpoint: "
|
||||
f"{row['src']} -> {row['dst']}"
|
||||
)
|
||||
|
||||
|
||||
def test_consolidated_from_edges_preserve_literal_in_episodes(tmp_path):
|
||||
"""source episodes' literal_surface unchanged after consolidation."""
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
literals = ["alpha", "beta", "gamma"]
|
||||
recs = [_record(t) for t in literals]
|
||||
for r in recs:
|
||||
store.insert(r)
|
||||
store.boost_edges(
|
||||
[(recs[0].id, recs[1].id), (recs[1].id, recs[2].id), (recs[0].id, recs[2].id)],
|
||||
edge_type="hebbian", delta=0.5,
|
||||
)
|
||||
|
||||
_run_heavy(store)
|
||||
|
||||
for rec, expected in zip(recs, literals):
|
||||
reloaded = store.get(rec.id)
|
||||
assert reloaded is not None
|
||||
assert reloaded.literal_surface == expected
|
||||
313
tests/test_constitutional_guards.py
Normal file
313
tests/test_constitutional_guards.py
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
"""Grep-based static guards for constitutional invariants.
|
||||
|
||||
Verifies C1..C6 hold across the daemon-side module set.
|
||||
|
||||
Catalog:
|
||||
- C3: no ANTHROPIC_API_KEY anywhere in daemon-side code.
|
||||
- Pitfall 2: no fcntl.lockf (close-fd trap) anywhere in src/iai_mcp/.
|
||||
- C5: no assignment to `.literal_surface` in daemon-side modules.
|
||||
- no hardcoded Western clock-time in quiet_window.py.
|
||||
- seal: PROFILE_KNOBS still has exactly 14 entries (daemon does NOT
|
||||
add knobs).
|
||||
- C6: identity_audit.py does NOT import ProcessLock / concurrency module.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
SRC = Path(__file__).resolve().parent.parent / "src" / "iai_mcp"
|
||||
|
||||
# Daemon-side modules. Some (bedtime, host_cli) may not exist yet (future
|
||||
# plans). We scan whichever ones exist today.
|
||||
DAEMON_MODULES: tuple[str, ...] = (
|
||||
"daemon.py",
|
||||
"dream.py",
|
||||
"identity_audit.py",
|
||||
"bedtime.py",
|
||||
"host_cli.py",
|
||||
"insight.py",
|
||||
"quiet_window.py",
|
||||
"daemon_state.py",
|
||||
"concurrency.py",
|
||||
"hippea_cascade.py", # TOK-14 / D5-05
|
||||
)
|
||||
|
||||
|
||||
def _existing_daemon_files() -> list[Path]:
|
||||
return [SRC / n for n in DAEMON_MODULES if (SRC / n).exists()]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# C3: ANTHROPIC_API_KEY must never appear in daemon-side code
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_no_api_key_in_daemon():
|
||||
"""C3 (DAEMON-07 / D-14): zero paid-API cost. ANTHROPIC_API_KEY must not
|
||||
appear in ANY daemon-side module. Insight module uses `claude -p`
|
||||
subprocess with the user's subscription instead."""
|
||||
offenders: list[str] = []
|
||||
for f in _existing_daemon_files():
|
||||
text = f.read_text()
|
||||
if "ANTHROPIC_API_KEY" in text:
|
||||
offenders.append(f.name)
|
||||
assert not offenders, f"C3 violation: ANTHROPIC_API_KEY found in {offenders}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pitfall 2: fcntl.lockf must never be used (POSIX close-fd trap)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_no_lockf_anywhere():
|
||||
"""Pitfall 2 (apenwarr 2010): POSIX fcntl.lockf is released when ANY fd
|
||||
referring to the same file is closed. We must use BSD fcntl.flock which
|
||||
is bound to the open file description. Scan ALL iai_mcp/*.py, not just
|
||||
daemon modules -- mixing the two is also a bug."""
|
||||
offenders: list[str] = []
|
||||
for f in SRC.glob("*.py"):
|
||||
text = f.read_text()
|
||||
if "fcntl.lockf" in text:
|
||||
offenders.append(f.name)
|
||||
assert not offenders, f"Pitfall 2 violation: fcntl.lockf in {offenders}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# C5: daemon must NEVER assign to record.literal_surface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_no_literal_surface_mutation_in_daemon():
|
||||
"""C5 literal preservation. Daemon-side modules must not contain
|
||||
`.literal_surface =` assignment syntax. Reading `.literal_surface` is
|
||||
allowed; writing is forbidden."""
|
||||
pattern = re.compile(r"\.literal_surface\s*=")
|
||||
offenders: list[tuple[str, list[str]]] = []
|
||||
for f in _existing_daemon_files():
|
||||
text = f.read_text()
|
||||
matches = pattern.findall(text)
|
||||
if matches:
|
||||
offenders.append((f.name, matches))
|
||||
assert not offenders, f"C5 violation: {offenders}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# no hardcoded Western 9-5 / clock-time in quiet_window.py
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_no_hardcoded_clock_time_in_quiet_window():
|
||||
"""D-05 global-product mandate: quiet window must be LEARNED from event
|
||||
history, never hardcoded. Flag obvious clock-time literals."""
|
||||
f = SRC / "quiet_window.py"
|
||||
if not f.exists():
|
||||
return # module not yet created
|
||||
text = f.read_text()
|
||||
# Look for common patterns that would indicate clock-based decisions.
|
||||
forbidden = [
|
||||
r"\b22:00\b",
|
||||
r"\b02:00\b",
|
||||
r"hour\s*==\s*22\b",
|
||||
r"hour\s*==\s*2\b",
|
||||
]
|
||||
offenders: list[str] = []
|
||||
for pat in forbidden:
|
||||
if re.search(pat, text):
|
||||
offenders.append(pat)
|
||||
assert not offenders, (
|
||||
f"D-05 violation: hardcoded clock-time patterns in quiet_window.py: {offenders}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plan 07.12-02 seal: PROFILE_KNOBS has exactly 11 entries
|
||||
# (10 autistic-kernel + 1 operator wake_depth MCP-12; AUTIST-02/08/11/12 removed)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_profile_knobs_still_sealed():
|
||||
"""11-knob registry is sealed (Phase 07.12-02 post AUTIST-02/08/11/12 removal).
|
||||
Daemon must not add new knobs. Transient state (hebbian-rate boost during
|
||||
developmental sigma, etc.) belongs in events or .daemon-state.json,
|
||||
never in PROFILE_KNOBS."""
|
||||
from iai_mcp import profile
|
||||
assert len(profile.PROFILE_KNOBS) == 11, (
|
||||
f"PROFILE_KNOBS unseal: expected 11, got {len(profile.PROFILE_KNOBS)}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TOK-13 / D5-04: profile knob names must NEVER appear in the
|
||||
# session-start payload at any wake_depth. Knobs are applied server-side via
|
||||
# response_decorator.apply_profile; their names must not cross the MCP wire.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_profile_knob_in_session_start_payload(tmp_path):
|
||||
"""TOK-13: knob names must not leak into the NEW pointer fields at
|
||||
wake_depth=minimal (<=30 raw tok design budget).
|
||||
|
||||
The legacy L0 identity kernel (`_seed_l0_identity`) historically recites
|
||||
a handful of autistic-kernel defaults inline in the literal_surface
|
||||
('literal_preservation=strong, masking_off=true, ...'). That predates
|
||||
TOK-13 and lives inside the user's identity record itself, not a
|
||||
decorator output — so it's scoped into the standard/deep l0 segment and
|
||||
explicitly exempt from this grep guard.
|
||||
|
||||
The invariant this guard DEFENDS is: the lazy minimal payload
|
||||
(identity_pointer / brain_handle / topic_cluster_hint) MUST NOT contain
|
||||
knob names. Knobs are applied server-side by response_decorator
|
||||
(Plan 05-03 D5-04); knob names must never reach the MCP wire.
|
||||
"""
|
||||
from iai_mcp import profile
|
||||
from iai_mcp.community import CommunityAssignment
|
||||
from iai_mcp.core import _seed_l0_identity
|
||||
from iai_mcp.session import assemble_session_start
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_l0_identity(store)
|
||||
assignment = CommunityAssignment()
|
||||
|
||||
for mode in ("minimal", "standard", "deep"):
|
||||
state = profile.default_state()
|
||||
state["wake_depth"] = mode
|
||||
payload = assemble_session_start(
|
||||
store, assignment, [], profile_state=state,
|
||||
)
|
||||
# Only scan the NEW lazy fields. Legacy l0 / l1 / l2 / rich_club
|
||||
# carry user-authored identity content and remain exempt per design.
|
||||
lazy_text = " ".join(
|
||||
[
|
||||
payload.identity_pointer,
|
||||
payload.brain_handle,
|
||||
payload.topic_cluster_hint,
|
||||
],
|
||||
)
|
||||
for knob_name in profile.PROFILE_KNOBS:
|
||||
# wake_depth is the operator-facing knob; its echo in the
|
||||
# payload field `wake_depth` is a meta-attribute, not inline
|
||||
# knob text in the lazy pointers.
|
||||
assert knob_name not in lazy_text, (
|
||||
f"TOK-13 violation: knob name '{knob_name}' found in "
|
||||
f"lazy session-start payload at wake_depth={mode} "
|
||||
f"(identity_pointer/brain_handle/topic_cluster_hint)"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pitfall 1: wake_depth=minimal payload (<=30 raw tok) is below the
|
||||
# Anthropic Sonnet 4.6 cache minimum (2048 tok). Adding cache_control in
|
||||
# session.py would be silently ignored — wastes a breakpoint slot. Guard
|
||||
# against accidental regression.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_cache_control_in_session_assembler():
|
||||
"""Pitfall 1: session.py must not set cache_control (minimal prefix
|
||||
cannot be cached on Sonnet 4.6 / Opus 4.7; standard+deep caching lives
|
||||
in the TS wrapper, not the Python assembler).
|
||||
"""
|
||||
f = SRC / "session.py"
|
||||
assert f.exists(), "session.py missing"
|
||||
text = f.read_text()
|
||||
# Comments that mention "cache_control" are fine (they document the
|
||||
# pitfall). We only guard against actual code references like setattr/
|
||||
# cache_control=... — scan for the pattern with an equals sign.
|
||||
pattern = re.compile(r"cache_control\s*[:=]")
|
||||
offenders = pattern.findall(text)
|
||||
assert not offenders, (
|
||||
f"Pitfall 1 violation: cache_control assignment/kwarg in session.py: "
|
||||
f"{offenders}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# C3 + TOK-13: response_decorator must be pure-local. No Anthropic
|
||||
# SDK import, no ANTHROPIC_API_KEY read, no paid-API coupling.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_api_key_in_response_decorator():
|
||||
"""C3 + TOK-13: response_decorator.py stays local-only."""
|
||||
f = SRC / "response_decorator.py"
|
||||
assert f.exists(), "response_decorator.py missing after Plan 05-03"
|
||||
text = f.read_text()
|
||||
lower = text.lower()
|
||||
assert "anthropic" not in lower, (
|
||||
"C3 violation: response_decorator references 'anthropic'"
|
||||
)
|
||||
assert "ANTHROPIC_API_KEY" not in text, (
|
||||
"C3 violation: response_decorator references ANTHROPIC_API_KEY"
|
||||
)
|
||||
assert "import anthropic" not in lower, (
|
||||
"C3 violation: response_decorator imports anthropic SDK"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# C6: identity_audit.py must not import ProcessLock
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_identity_audit_has_no_lock_import():
|
||||
"""C6: continuous audit runs even when daemon is paused. To make that
|
||||
invariant mechanical, identity_audit.py must NOT import the concurrency
|
||||
module -- the only way to accidentally take a lock is to import it."""
|
||||
f = SRC / "identity_audit.py"
|
||||
if not f.exists():
|
||||
return
|
||||
text = f.read_text()
|
||||
# No import of iai_mcp.concurrency, no `ProcessLock` symbol reference.
|
||||
assert "iai_mcp.concurrency" not in text, (
|
||||
"C6 violation: identity_audit.py imports iai_mcp.concurrency"
|
||||
)
|
||||
assert "ProcessLock" not in text, (
|
||||
"C6 violation: identity_audit.py references ProcessLock"
|
||||
)
|
||||
# Also: no `fcntl.` calls (belt-and-braces).
|
||||
assert "fcntl." not in text, (
|
||||
"C6 violation: identity_audit.py uses fcntl directly"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TOK-14: HIPPEA cascade module guards
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_no_api_key_in_hippea_cascade():
|
||||
"""C3 (D5-05): HIPPEA cascade is pure-local. ANTHROPIC_API_KEY and
|
||||
`anthropic` SDK imports are forbidden in hippea_cascade.py."""
|
||||
f = SRC / "hippea_cascade.py"
|
||||
if not f.exists():
|
||||
return # module not yet created
|
||||
text = f.read_text()
|
||||
assert "ANTHROPIC_API_KEY" not in text, (
|
||||
"C3 violation: ANTHROPIC_API_KEY in hippea_cascade.py"
|
||||
)
|
||||
assert "import anthropic" not in text, (
|
||||
"C3 violation: `import anthropic` in hippea_cascade.py"
|
||||
)
|
||||
assert "from anthropic" not in text, (
|
||||
"C3 violation: `from anthropic` in hippea_cascade.py"
|
||||
)
|
||||
|
||||
|
||||
def test_hippea_cascade_is_read_only_against_store():
|
||||
"""C6 (D5-05): cascade prefetch never mutates the store.
|
||||
|
||||
Grep for store-mutating call patterns (with trailing open-paren so the
|
||||
module's own enumerated-forbidden list in the docstring does not trip
|
||||
this guard).
|
||||
"""
|
||||
f = SRC / "hippea_cascade.py"
|
||||
if not f.exists():
|
||||
return
|
||||
text = f.read_text()
|
||||
forbidden_calls = [
|
||||
"store.insert(",
|
||||
"store.append_provenance(",
|
||||
"store.append_provenance_batch(",
|
||||
"store.update(",
|
||||
"store.boost_edges(",
|
||||
"store.add_contradicts_edge(",
|
||||
]
|
||||
offenders = [p for p in forbidden_calls if p in text]
|
||||
assert not offenders, (
|
||||
f"C6 violation: hippea_cascade.py contains store mutators: {offenders}"
|
||||
)
|
||||
426
tests/test_core_bedtime_inject.py
Normal file
426
tests/test_core_bedtime_inject.py
Normal file
|
|
@ -0,0 +1,426 @@
|
|||
"""Tests for core.py additions -- DAEMON-06 / DAEMON-09.
|
||||
|
||||
Covers 8 behaviours:
|
||||
1. consent=False short-circuits: socket is NEVER opened (C2 guard)
|
||||
2. consent=True opens socket, sends NDJSON, returns daemon response
|
||||
3. Missing / wrong-typed consent raises ValueError (ASVS V5 schema)
|
||||
4. force_wake opens socket, sends NDJSON with 900s timeout
|
||||
5. force_wake handles daemon-unreachable gracefully
|
||||
6. memory_recall dispatch injects sleep_suggestion when dual-gate passes
|
||||
7. memory_recall dispatch does NOT include sleep_suggestion key when gate fails
|
||||
8. memory_recall does NOT break if detect_wind_down raises (silent fail)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import core
|
||||
|
||||
|
||||
# ----------------------------------------------------------- threaded helper
|
||||
|
||||
|
||||
class _ThreadedFakeDaemon:
|
||||
"""Fake daemon that survives across multiple asyncio.run() calls.
|
||||
|
||||
`core.dispatch` uses its own asyncio.run per JSON-RPC method, which tears
|
||||
down the event loop each call. A server started via asyncio.run() inside
|
||||
the test body dies when that call returns, so the next asyncio.run can
|
||||
connect to the socket file but no task is accepting -> timeout. Running
|
||||
the server on a private background loop in a daemon thread keeps the
|
||||
accept loop alive for the full test lifetime.
|
||||
"""
|
||||
|
||||
def __init__(self, path: Path, captured: list, reply: dict) -> None:
|
||||
self.path = path
|
||||
self.captured = captured
|
||||
self.reply = reply
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
self._server: asyncio.AbstractServer | None = None
|
||||
self._thread: threading.Thread | None = None
|
||||
self._ready = threading.Event()
|
||||
|
||||
def start(self) -> None:
|
||||
def _run() -> None:
|
||||
self._loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self._loop)
|
||||
|
||||
async def _handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
||||
try:
|
||||
line = await reader.readline()
|
||||
if line:
|
||||
self.captured.append(json.loads(line.decode("utf-8")))
|
||||
writer.write((json.dumps(self.reply) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
finally:
|
||||
try:
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _serve() -> None:
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._server = await asyncio.start_unix_server(_handle, path=str(self.path))
|
||||
self._ready.set()
|
||||
async with self._server:
|
||||
await self._server.serve_forever()
|
||||
|
||||
try:
|
||||
self._loop.run_until_complete(_serve())
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
finally:
|
||||
self._loop.close()
|
||||
|
||||
self._thread = threading.Thread(target=_run, daemon=True)
|
||||
self._thread.start()
|
||||
assert self._ready.wait(timeout=5.0), "fake daemon failed to start within 5s"
|
||||
|
||||
def stop(self) -> None:
|
||||
loop = self._loop
|
||||
if loop is None:
|
||||
return
|
||||
|
||||
async def _shutdown() -> None:
|
||||
if self._server is not None:
|
||||
self._server.close()
|
||||
await self._server.wait_closed()
|
||||
|
||||
fut = asyncio.run_coroutine_threadsafe(_shutdown(), loop)
|
||||
try:
|
||||
fut.result(timeout=5.0)
|
||||
except Exception:
|
||||
pass
|
||||
loop.call_soon_threadsafe(loop.stop)
|
||||
if self._thread is not None:
|
||||
self._thread.join(timeout=5.0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- fixtures
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tmp_socket(tmp_path: Path) -> Path:
|
||||
"""Provide a short unique unix-socket path.
|
||||
|
||||
Unix domain sockets have a ~104-byte path limit on macOS; tmp_path can be
|
||||
too long when driven by `pytest-xdist` worker names. Fall back to /tmp
|
||||
when tmp_path would overflow.
|
||||
"""
|
||||
candidate = tmp_path / "d.sock"
|
||||
if len(str(candidate)) > 100:
|
||||
candidate = Path(tempfile.mkdtemp(prefix="iai-sock-")) / "d.sock"
|
||||
return candidate
|
||||
|
||||
|
||||
async def _run_fake_server(
|
||||
sock: Path,
|
||||
captured: list,
|
||||
reply: dict,
|
||||
*,
|
||||
delay_before_reply: float = 0.0,
|
||||
) -> asyncio.AbstractServer:
|
||||
"""Spin up a single-shot fake daemon over unix socket.
|
||||
|
||||
Reads one NDJSON line, records it in `captured`, sleeps `delay_before_reply`
|
||||
seconds, writes `reply` as NDJSON back, closes. Returns the server object
|
||||
so the caller can close it afterwards.
|
||||
"""
|
||||
|
||||
async def _handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
||||
try:
|
||||
line = await reader.readline()
|
||||
if line:
|
||||
captured.append(json.loads(line.decode("utf-8")))
|
||||
if delay_before_reply > 0:
|
||||
await asyncio.sleep(delay_before_reply)
|
||||
writer.write((json.dumps(reply) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
finally:
|
||||
try:
|
||||
writer.close()
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
sock.parent.mkdir(parents=True, exist_ok=True)
|
||||
return await asyncio.start_unix_server(_handle, path=str(sock))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- consent gate
|
||||
|
||||
|
||||
def test_consent_false_short_circuits_no_socket_touch(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""C2 invariant: consent=False must NEVER open the daemon socket."""
|
||||
|
||||
async def _explode(*args, **kwargs):
|
||||
raise AssertionError(
|
||||
"C2 violation: asyncio.open_unix_connection reached with consent=False"
|
||||
)
|
||||
|
||||
monkeypatch.setattr(asyncio, "open_unix_connection", _explode)
|
||||
|
||||
result = asyncio.run(
|
||||
core.handle_initiate_sleep_mode({"consent": False, "reason": "not ready"})
|
||||
)
|
||||
assert result == {"ok": False, "reason": "consent_declined"}
|
||||
|
||||
|
||||
def test_consent_missing_raises_value_error() -> None:
|
||||
with pytest.raises(ValueError, match="consent"):
|
||||
asyncio.run(core.handle_initiate_sleep_mode({"reason": "missing"}))
|
||||
|
||||
|
||||
def test_consent_wrong_type_raises_value_error() -> None:
|
||||
# Strings / ints / None must all be rejected; only literal bool passes.
|
||||
for bad in ["true", 1, 0, None, [True]]:
|
||||
with pytest.raises(ValueError):
|
||||
asyncio.run(
|
||||
core.handle_initiate_sleep_mode({"consent": bad, "reason": "x"})
|
||||
)
|
||||
|
||||
|
||||
def test_reason_missing_raises_value_error() -> None:
|
||||
with pytest.raises(ValueError, match="reason"):
|
||||
asyncio.run(core.handle_initiate_sleep_mode({"consent": True}))
|
||||
|
||||
|
||||
def test_reason_wrong_type_raises_value_error() -> None:
|
||||
with pytest.raises(ValueError, match="reason"):
|
||||
asyncio.run(
|
||||
core.handle_initiate_sleep_mode({"consent": True, "reason": 42})
|
||||
)
|
||||
|
||||
|
||||
def test_consent_true_opens_socket_and_returns_reply(
|
||||
tmp_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""consent=True path: real socket round-trip against a fake daemon."""
|
||||
captured: list[dict] = []
|
||||
|
||||
async def _runner() -> dict:
|
||||
server = await _run_fake_server(
|
||||
tmp_socket, captured, {"ok": True, "state": "TRANSITIONING"},
|
||||
)
|
||||
try:
|
||||
async with server:
|
||||
# Monkeypatch core's SOCKET_PATH so _send_to_daemon uses ours.
|
||||
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
|
||||
return await core.handle_initiate_sleep_mode(
|
||||
{"consent": True, "reason": "good night"},
|
||||
)
|
||||
finally:
|
||||
server.close()
|
||||
await server.wait_closed()
|
||||
|
||||
result = asyncio.run(_runner())
|
||||
assert result == {"ok": True, "state": "TRANSITIONING"}
|
||||
assert len(captured) == 1
|
||||
sent = captured[0]
|
||||
assert sent["type"] == "user_initiated_sleep"
|
||||
assert sent["reason"] == "good night"
|
||||
assert "ts" in sent # ISO timestamp attached
|
||||
|
||||
|
||||
def test_consent_true_daemon_unreachable_returns_graceful_error(
|
||||
tmp_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Daemon down (socket file absent) must return daemon_not_running."""
|
||||
# Do NOT start a server.
|
||||
assert not tmp_socket.exists()
|
||||
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
|
||||
result = asyncio.run(
|
||||
core.handle_initiate_sleep_mode(
|
||||
{"consent": True, "reason": "night"},
|
||||
)
|
||||
)
|
||||
assert result["ok"] is False
|
||||
assert result["reason"] == "daemon_not_running"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- force_wake
|
||||
|
||||
|
||||
def test_force_wake_sends_correct_message(
|
||||
tmp_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
captured: list[dict] = []
|
||||
|
||||
async def _runner() -> dict:
|
||||
server = await _run_fake_server(
|
||||
tmp_socket, captured, {"ok": True, "state": "WAKE"},
|
||||
)
|
||||
try:
|
||||
async with server:
|
||||
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
|
||||
return await core.handle_force_wake({})
|
||||
finally:
|
||||
server.close()
|
||||
await server.wait_closed()
|
||||
|
||||
result = asyncio.run(_runner())
|
||||
assert result == {"ok": True, "state": "WAKE"}
|
||||
assert len(captured) == 1
|
||||
assert captured[0]["type"] == "force_wake"
|
||||
assert "ts" in captured[0]
|
||||
|
||||
|
||||
def test_force_wake_daemon_unreachable_graceful(
|
||||
tmp_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
assert not tmp_socket.exists()
|
||||
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
|
||||
result = asyncio.run(core.handle_force_wake({}))
|
||||
assert result["ok"] is False
|
||||
assert result["reason"] == "daemon_not_running"
|
||||
|
||||
|
||||
def test_force_wake_timeout_is_fifteen_minutes() -> None:
|
||||
"""cooperative cap is 15 minutes = 900 seconds."""
|
||||
assert core.FORCE_WAKE_TIMEOUT_SEC == 900
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- inject helper
|
||||
|
||||
|
||||
def _window_covering_now() -> tuple[int, int]:
|
||||
"""Return a quiet_window (start_bucket, duration) that contains `now`.
|
||||
|
||||
Uses the current local time so the dual-gate is satisfied deterministically
|
||||
regardless of the test-host clock.
|
||||
"""
|
||||
from iai_mcp.tz import load_user_tz
|
||||
|
||||
tz = load_user_tz()
|
||||
now_local = datetime.now(timezone.utc).astimezone(tz)
|
||||
cur_bucket = (now_local.hour * 60 + now_local.minute) // 30
|
||||
# Make the window start 2 buckets (1h) before now and last 4h (8 buckets).
|
||||
start = (cur_bucket - 2) % 48
|
||||
return (start, 8)
|
||||
|
||||
|
||||
def test_inject_sleep_suggestion_dual_gate_pass(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""When phrase + window both pass, response gains sleep_suggestion."""
|
||||
fake_state = {"quiet_window": _window_covering_now()}
|
||||
|
||||
def _load() -> dict:
|
||||
return dict(fake_state)
|
||||
|
||||
monkeypatch.setattr("iai_mcp.daemon_state.load_state", _load)
|
||||
|
||||
response: dict = {"hits": [], "anti_hits": []}
|
||||
core._inject_sleep_suggestion(response, cue="good night", language="en")
|
||||
assert "sleep_suggestion" in response, (
|
||||
f"expected injection on dual-gate pass, got {response!r}"
|
||||
)
|
||||
assert response["sleep_suggestion"]["message_hint"] == "user_wind_down_detected"
|
||||
|
||||
|
||||
def test_inject_sleep_suggestion_no_phrase(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""No phrase match -> response has no sleep_suggestion key."""
|
||||
fake_state = {"quiet_window": _window_covering_now()}
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.daemon_state.load_state",
|
||||
lambda: dict(fake_state),
|
||||
)
|
||||
|
||||
response: dict = {"hits": [], "anti_hits": []}
|
||||
core._inject_sleep_suggestion(
|
||||
response, cue="how do I configure pytest", language="en",
|
||||
)
|
||||
assert "sleep_suggestion" not in response
|
||||
|
||||
|
||||
def test_inject_sleep_suggestion_no_window(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Phrase match but no quiet_window -> response has no sleep_suggestion."""
|
||||
monkeypatch.setattr("iai_mcp.daemon_state.load_state", lambda: {})
|
||||
|
||||
response: dict = {"hits": [], "anti_hits": []}
|
||||
core._inject_sleep_suggestion(response, cue="good night", language="en")
|
||||
assert "sleep_suggestion" not in response
|
||||
|
||||
|
||||
def test_inject_sleep_suggestion_detector_raises_is_silent(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""If detect_wind_down raises, response goes out untouched."""
|
||||
def _boom(*args, **kwargs):
|
||||
raise RuntimeError("synthetic bedtime failure")
|
||||
|
||||
monkeypatch.setattr("iai_mcp.bedtime.detect_wind_down", _boom)
|
||||
|
||||
response: dict = {"hits": [], "anti_hits": [], "budget_used": 0}
|
||||
# Must not propagate the RuntimeError.
|
||||
core._inject_sleep_suggestion(response, cue="good night", language="en")
|
||||
assert "sleep_suggestion" not in response
|
||||
# Pre-existing keys untouched.
|
||||
assert response == {"hits": [], "anti_hits": [], "budget_used": 0}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- dispatch wiring
|
||||
|
||||
|
||||
def test_dispatch_routes_initiate_sleep_mode(
|
||||
tmp_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""The synchronous `core.dispatch` entrypoint must route the new
|
||||
methods through asyncio.run -- verified by having a fake daemon
|
||||
respond to a real socket round-trip.
|
||||
|
||||
The fake daemon runs in a background thread/loop so it survives
|
||||
dispatch()'s own asyncio.run (which tears down the calling loop).
|
||||
"""
|
||||
captured: list[dict] = []
|
||||
daemon = _ThreadedFakeDaemon(tmp_socket, captured, {"ok": True})
|
||||
daemon.start()
|
||||
try:
|
||||
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
|
||||
# store arg is unused by our handlers -- pass None sentinel.
|
||||
result = core.dispatch(
|
||||
None,
|
||||
"initiate_sleep_mode",
|
||||
{"consent": True, "reason": "test"},
|
||||
)
|
||||
assert result == {"ok": True}
|
||||
assert captured[0]["type"] == "user_initiated_sleep"
|
||||
finally:
|
||||
daemon.stop()
|
||||
|
||||
|
||||
def test_dispatch_routes_force_wake(
|
||||
tmp_socket: Path,
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
captured: list[dict] = []
|
||||
daemon = _ThreadedFakeDaemon(tmp_socket, captured, {"ok": True, "state": "WAKE"})
|
||||
daemon.start()
|
||||
try:
|
||||
monkeypatch.setattr(core, "SOCKET_PATH", tmp_socket)
|
||||
result = core.dispatch(None, "force_wake", {})
|
||||
assert result == {"ok": True, "state": "WAKE"}
|
||||
assert captured[0]["type"] == "force_wake"
|
||||
finally:
|
||||
daemon.stop()
|
||||
168
tests/test_core_digest_inject.py
Normal file
168
tests/test_core_digest_inject.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
"""Tests for core._inject_overnight_digest -- (DAEMON-11).
|
||||
|
||||
Covers 5 behaviours:
|
||||
1. First memory_recall of the day (>18h since last shown) gets overnight_digest.
|
||||
2. Second recall within <18h does NOT include overnight_digest.
|
||||
3. Empty state / no pending digest -> no overnight_digest key.
|
||||
4. Digest is cleared from state after one delivery (D-24 once-per-window).
|
||||
5. Exception in get_pending_digest does NOT break memory_recall (silent fail).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_state(tmp_path, monkeypatch):
|
||||
from iai_mcp import daemon_state
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
|
||||
return state_path
|
||||
|
||||
|
||||
# digest shape -- every required field populated.
|
||||
_FULL_DIGEST = {
|
||||
"rem_cycles_completed": 4,
|
||||
"episodes_processed": 10,
|
||||
"schemas_induced_tier0": 3,
|
||||
"claude_call_used": True,
|
||||
"quota_used_pct": 0.003,
|
||||
"main_insight_text": "today's unifying insight",
|
||||
"sigma_observed": 1.2,
|
||||
"s5_drift_alerts": [],
|
||||
"daemon_uptime_hours": 8,
|
||||
"timed_out_cycles": 0,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: first recall of day gets digest
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_first_recall_gets_digest(isolated_state):
|
||||
from iai_mcp.core import _inject_overnight_digest
|
||||
from iai_mcp.daemon_state import save_state
|
||||
|
||||
# Seed state: pending digest + last shown 20h ago (past the 18h threshold).
|
||||
now = datetime.now(timezone.utc)
|
||||
save_state({
|
||||
"pending_digest": dict(_FULL_DIGEST),
|
||||
"last_digest_shown_at": (now - timedelta(hours=20)).isoformat(),
|
||||
})
|
||||
|
||||
response: dict = {"hits": [], "anti_hits": [], "activation_trace": [], "budget_used": 0}
|
||||
_inject_overnight_digest(response)
|
||||
|
||||
assert "overnight_digest" in response
|
||||
dig = response["overnight_digest"]
|
||||
# required fields surface.
|
||||
assert dig["rem_cycles_completed"] == 4
|
||||
assert dig["episodes_processed"] == 10
|
||||
assert dig["schemas_induced_tier0"] == 3
|
||||
assert dig["claude_call_used"] is True
|
||||
assert dig["quota_used_pct"] == 0.003
|
||||
assert dig["main_insight_text"] == "today's unifying insight"
|
||||
assert dig["sigma_observed"] == 1.2
|
||||
assert dig["s5_drift_alerts"] == []
|
||||
assert dig["daemon_uptime_hours"] == 8
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: second recall within 18h window does NOT include digest
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_not_twice(isolated_state):
|
||||
"""the same digest must not appear twice inside the 18h window."""
|
||||
from iai_mcp.core import _inject_overnight_digest
|
||||
from iai_mcp.daemon_state import save_state
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
# last_shown 4h ago -- inside the window.
|
||||
save_state({
|
||||
"pending_digest": dict(_FULL_DIGEST),
|
||||
"last_digest_shown_at": (now - timedelta(hours=4)).isoformat(),
|
||||
})
|
||||
|
||||
response: dict = {"hits": []}
|
||||
_inject_overnight_digest(response)
|
||||
assert "overnight_digest" not in response
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: no pending digest -> no key added
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_no_digest_when_none_pending(isolated_state):
|
||||
from iai_mcp.core import _inject_overnight_digest
|
||||
from iai_mcp.daemon_state import save_state
|
||||
|
||||
save_state({}) # empty state
|
||||
response: dict = {"hits": []}
|
||||
_inject_overnight_digest(response)
|
||||
assert "overnight_digest" not in response
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: digest cleared from state after one delivery
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_digest_cleared_after_delivery(isolated_state):
|
||||
"""after surfacing the digest, state must no longer carry
|
||||
pending_digest so a subsequent recall (even after another 18h) does not
|
||||
re-show the stale digest."""
|
||||
from iai_mcp.core import _inject_overnight_digest
|
||||
from iai_mcp.daemon_state import load_state, save_state
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
save_state({
|
||||
"pending_digest": dict(_FULL_DIGEST),
|
||||
"last_digest_shown_at": (now - timedelta(hours=20)).isoformat(),
|
||||
})
|
||||
|
||||
response: dict = {"hits": []}
|
||||
_inject_overnight_digest(response)
|
||||
assert "overnight_digest" in response
|
||||
|
||||
# Persisted state: pending_digest consumed.
|
||||
on_disk = load_state()
|
||||
assert "pending_digest" not in on_disk
|
||||
# last_digest_shown_at advanced to roughly now.
|
||||
shown_at = datetime.fromisoformat(on_disk["last_digest_shown_at"])
|
||||
if shown_at.tzinfo is None:
|
||||
shown_at = shown_at.replace(tzinfo=timezone.utc)
|
||||
assert shown_at >= now - timedelta(seconds=5)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: exception in get_pending_digest does NOT break memory_recall
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_exception_is_silent(isolated_state, monkeypatch):
|
||||
"""If get_pending_digest raises (corrupt state, unexpected schema), the
|
||||
response must still be returned without an overnight_digest key. The
|
||||
memory_recall hot path NEVER breaks on daemon-digest faults."""
|
||||
from iai_mcp import core
|
||||
|
||||
def boom(*args, **kwargs):
|
||||
raise RuntimeError("simulated state corruption")
|
||||
|
||||
monkeypatch.setattr("iai_mcp.core.get_pending_digest", boom)
|
||||
|
||||
response: dict = {"hits": [], "existing": True}
|
||||
# Must not raise.
|
||||
core._inject_overnight_digest(response)
|
||||
assert response.get("existing") is True
|
||||
assert "overnight_digest" not in response
|
||||
203
tests/test_cpu_watchdog.py
Normal file
203
tests/test_cpu_watchdog.py
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
"""Phase 07.2-05 R5 / A5 regression test — CPU watchdog emits one event under sustained overload.
|
||||
|
||||
Mock psutil.Process.cpu_percent with a scripted sequence so the test runs
|
||||
in seconds instead of 75s wall time. D7.2-23 explicitly allows mocks for
|
||||
heavy-dep tests. The synthetic-CPU-burner approach (real 80% CPU thread)
|
||||
is documented in SPEC A5 but is impractical for the unit suite; we test
|
||||
the SAME contract (sustained > threshold => one event) with deterministic
|
||||
sample injection.
|
||||
|
||||
Project async-test idiom (mandatory): sync `def test_X(...)` body wraps
|
||||
`asyncio.run(_async_body())`. The project does NOT depend on
|
||||
`pytest-asyncio`; `@pytest.mark.asyncio` markers silently pass without
|
||||
running. See tests/test_daemon_tick_flags.py:144 for the canonical pattern.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
def test_sustained_overload_emits_exactly_one_daemon_cpu_overload_event(monkeypatch):
|
||||
"""A5 acceptance: 2 consecutive samples > threshold => 1 critical event."""
|
||||
asyncio.run(_sustained_overload_body(monkeypatch))
|
||||
|
||||
|
||||
async def _sustained_overload_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
captured_events: list[tuple[str, dict, str]] = []
|
||||
|
||||
def write_event_capture(store, kind, data, severity="info", **kwargs):
|
||||
captured_events.append((kind, dict(data), severity))
|
||||
|
||||
# Reduce poll cadence so the test loop completes in <2 seconds.
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_POLL_SEC", 0.05)
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_THRESHOLD_PERCENT", 50.0)
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_EVENT_COOLDOWN_SEC", 300.0)
|
||||
monkeypatch.setattr(daemon_mod, "_last_overload_event_at", 0.0)
|
||||
monkeypatch.setattr(daemon_mod, "_daemon_started_monotonic", 0.0)
|
||||
|
||||
# Scripted CPU samples: prime call returns 0.0 (psutil first-call rule),
|
||||
# then 80, 80, 30, 80, 80 — should trigger ONCE on the second 80
|
||||
# (after cooldown the next two-80 burst would NOT trigger since we
|
||||
# only run ~2s and cooldown is 300s).
|
||||
sample_seq = iter([80.0, 80.0, 30.0, 80.0, 80.0, 80.0])
|
||||
|
||||
class FakeProc:
|
||||
def cpu_percent(self, interval=None):
|
||||
# Prime call (the first call returns 0.0 per psutil docs).
|
||||
# We mimic this: first call = 0.0; subsequent calls = next()
|
||||
# from the scripted sequence.
|
||||
if not getattr(self, "_primed", False):
|
||||
self._primed = True
|
||||
return 0.0
|
||||
try:
|
||||
return next(sample_seq)
|
||||
except StopIteration:
|
||||
return 0.0
|
||||
|
||||
# Patch psutil.Process to return our fake proc.
|
||||
# Watchdog body uses `import psutil` locally; patch the underlying class.
|
||||
with patch("psutil.Process", return_value=FakeProc()), \
|
||||
patch("iai_mcp.daemon.write_event", write_event_capture), \
|
||||
patch("iai_mcp.daemon.load_state", lambda: {"fsm_state": "DREAMING"}):
|
||||
|
||||
shutdown = asyncio.Event()
|
||||
store = MagicMock()
|
||||
task = asyncio.create_task(daemon_mod._cpu_watchdog_loop(store, shutdown))
|
||||
|
||||
# Run the watchdog for ~1.5s — at 0.05s poll, that's ~30 samples,
|
||||
# plenty for the scripted 6-sample sequence + trigger.
|
||||
await asyncio.sleep(1.5)
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
except asyncio.TimeoutError:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
# Filter to overload events only.
|
||||
overload_events = [e for e in captured_events if e[0] == "daemon_cpu_overload"]
|
||||
|
||||
# A5: exactly one event.
|
||||
assert len(overload_events) == 1, (
|
||||
f"Expected exactly 1 daemon_cpu_overload event; got "
|
||||
f"{len(overload_events)}: {overload_events}"
|
||||
)
|
||||
|
||||
kind, data, severity = overload_events[0]
|
||||
assert severity == "critical"
|
||||
assert data["fsm_state"] == "DREAMING"
|
||||
assert data["threshold_pct"] == 50.0
|
||||
assert data["sustained_sec"] == int(0.05 * 2)
|
||||
assert "cpu_samples_pct" in data
|
||||
assert all(s >= 0 for s in data["cpu_samples_pct"])
|
||||
assert "active_tasks" in data
|
||||
assert "uptime_sec" in data
|
||||
|
||||
|
||||
def test_below_threshold_emits_no_event(monkeypatch):
|
||||
"""Control: samples below threshold => no event."""
|
||||
asyncio.run(_below_threshold_body(monkeypatch))
|
||||
|
||||
|
||||
async def _below_threshold_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
captured_events: list[tuple[str, dict, str]] = []
|
||||
|
||||
def write_event_capture(store, kind, data, severity="info", **kwargs):
|
||||
captured_events.append((kind, dict(data), severity))
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_POLL_SEC", 0.05)
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_THRESHOLD_PERCENT", 50.0)
|
||||
monkeypatch.setattr(daemon_mod, "_last_overload_event_at", 0.0)
|
||||
|
||||
# All samples below threshold.
|
||||
class FakeProc:
|
||||
def cpu_percent(self, interval=None):
|
||||
if not getattr(self, "_primed", False):
|
||||
self._primed = True
|
||||
return 0.0
|
||||
return 30.0
|
||||
|
||||
with patch("psutil.Process", return_value=FakeProc()), \
|
||||
patch("iai_mcp.daemon.write_event", write_event_capture), \
|
||||
patch("iai_mcp.daemon.load_state", lambda: {"fsm_state": "WAKE"}):
|
||||
|
||||
shutdown = asyncio.Event()
|
||||
store = MagicMock()
|
||||
task = asyncio.create_task(daemon_mod._cpu_watchdog_loop(store, shutdown))
|
||||
await asyncio.sleep(1.0)
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
except asyncio.TimeoutError:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
overload_events = [e for e in captured_events if e[0] == "daemon_cpu_overload"]
|
||||
assert overload_events == [], (
|
||||
f"Expected zero daemon_cpu_overload events under sub-threshold "
|
||||
f"samples; got {overload_events}"
|
||||
)
|
||||
|
||||
|
||||
def test_event_cooldown_prevents_ledger_flood(monkeypatch):
|
||||
"""D7.2-20: at most one event per WATCHDOG_EVENT_COOLDOWN_SEC."""
|
||||
asyncio.run(_event_cooldown_body(monkeypatch))
|
||||
|
||||
|
||||
async def _event_cooldown_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
captured_events: list[tuple[str, dict, str]] = []
|
||||
|
||||
def write_event_capture(store, kind, data, severity="info", **kwargs):
|
||||
captured_events.append((kind, dict(data), severity))
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_POLL_SEC", 0.05)
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_THRESHOLD_PERCENT", 50.0)
|
||||
# Long cooldown so a 2nd trigger is blocked.
|
||||
monkeypatch.setattr(daemon_mod, "WATCHDOG_EVENT_COOLDOWN_SEC", 300.0)
|
||||
monkeypatch.setattr(daemon_mod, "_last_overload_event_at", 0.0)
|
||||
|
||||
# Persistent overload — every post-prime sample = 90.
|
||||
class FakeProc:
|
||||
def cpu_percent(self, interval=None):
|
||||
if not getattr(self, "_primed", False):
|
||||
self._primed = True
|
||||
return 0.0
|
||||
return 90.0
|
||||
|
||||
with patch("psutil.Process", return_value=FakeProc()), \
|
||||
patch("iai_mcp.daemon.write_event", write_event_capture), \
|
||||
patch("iai_mcp.daemon.load_state", lambda: {"fsm_state": "DREAMING"}):
|
||||
|
||||
shutdown = asyncio.Event()
|
||||
store = MagicMock()
|
||||
task = asyncio.create_task(daemon_mod._cpu_watchdog_loop(store, shutdown))
|
||||
await asyncio.sleep(1.5) # plenty of time for 30 samples
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(task, timeout=2.0)
|
||||
except asyncio.TimeoutError:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
overload_events = [e for e in captured_events if e[0] == "daemon_cpu_overload"]
|
||||
# Cooldown should clamp it to exactly 1.
|
||||
assert len(overload_events) == 1, (
|
||||
f"D7.2-20 cooldown failed: expected 1 event under persistent "
|
||||
f"overload; got {len(overload_events)}"
|
||||
)
|
||||
214
tests/test_crypto.py
Normal file
214
tests/test_crypto.py
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
"""crypto.py AES-256-GCM primitives + file-backed key storage.
|
||||
|
||||
Originally Plan 02-08; updated in W1 to retire the keyring
|
||||
backend (which deadlocked the daemon under launchd via the macOS
|
||||
Keychain ACL prompt) in favor of a file-backed primary backend at
|
||||
`{IAI_MCP_STORE}/.crypto.key` (32 raw bytes, mode 0o600, uid-validated).
|
||||
|
||||
Covers:
|
||||
- encrypt_field / decrypt_field round-trip (byte-for-byte)
|
||||
- Cyrillic / CJK / Arabic round-trip (MEM-01 across languages)
|
||||
- Associated data binding (swapped AD -> InvalidTag)
|
||||
- Tamper detection (mutated ciphertext -> InvalidTag)
|
||||
- is_encrypted prefix check
|
||||
- Passphrase fallback when no `.crypto.key` file is present
|
||||
(via IAI_MCP_CRYPTO_PASSPHRASE), deterministic across instances
|
||||
|
||||
File-backend specific behavior (file priority, uid/mode validation,
|
||||
atomic write) is exercised in tests/test_crypto_file_backend.py.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
|
||||
def test_crypto_module_exports() -> None:
|
||||
"""crypto.py exposes encrypt_field / decrypt_field / is_encrypted / CryptoKey."""
|
||||
from iai_mcp import crypto
|
||||
assert hasattr(crypto, "encrypt_field")
|
||||
assert hasattr(crypto, "decrypt_field")
|
||||
assert hasattr(crypto, "is_encrypted")
|
||||
assert hasattr(crypto, "CryptoKey")
|
||||
assert hasattr(crypto, "derive_key_from_passphrase")
|
||||
|
||||
|
||||
def test_crypto_roundtrip_basic() -> None:
|
||||
"""encrypt(plaintext) -> decrypt -> byte-for-byte equal."""
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x00" * 32
|
||||
plaintext = "hello world"
|
||||
ciphertext = encrypt_field(plaintext, key)
|
||||
assert isinstance(ciphertext, str)
|
||||
recovered = decrypt_field(ciphertext, key)
|
||||
assert recovered == plaintext
|
||||
|
||||
|
||||
def test_crypto_roundtrip_cyrillic() -> None:
|
||||
"""D-08a + Russian text byte-for-byte preserved."""
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x01" * 32
|
||||
plaintext = "Привет, мир! Это тест шифрования."
|
||||
ciphertext = encrypt_field(plaintext, key)
|
||||
recovered = decrypt_field(ciphertext, key)
|
||||
assert recovered == plaintext
|
||||
# Byte-level equality after utf-8 encode+decode cycle.
|
||||
assert recovered.encode("utf-8") == plaintext.encode("utf-8")
|
||||
|
||||
|
||||
def test_crypto_roundtrip_cjk() -> None:
|
||||
"""D-08a + Japanese / Chinese round-trip."""
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x02" * 32
|
||||
plaintext = "こんにちは世界。これは暗号化テストです。"
|
||||
ciphertext = encrypt_field(plaintext, key)
|
||||
assert decrypt_field(ciphertext, key) == plaintext
|
||||
|
||||
|
||||
def test_crypto_roundtrip_arabic() -> None:
|
||||
"""D-08a + Arabic round-trip."""
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x03" * 32
|
||||
plaintext = "مرحبا بالعالم. هذا اختبار تشفير."
|
||||
ciphertext = encrypt_field(plaintext, key)
|
||||
assert decrypt_field(ciphertext, key) == plaintext
|
||||
|
||||
|
||||
def test_crypto_empty_string_roundtrip() -> None:
|
||||
"""Empty plaintext encrypts and decrypts cleanly."""
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x04" * 32
|
||||
assert decrypt_field(encrypt_field("", key), key) == ""
|
||||
|
||||
|
||||
def test_crypto_associated_data_binding() -> None:
|
||||
"""Ciphertext encrypted with AD=A cannot be decrypted with AD=B (InvalidTag)."""
|
||||
from cryptography.exceptions import InvalidTag
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x05" * 32
|
||||
ciphertext = encrypt_field("secret", key, associated_data=b"record_id_A")
|
||||
with pytest.raises(InvalidTag):
|
||||
decrypt_field(ciphertext, key, associated_data=b"record_id_B")
|
||||
|
||||
|
||||
def test_crypto_associated_data_roundtrip_when_matching() -> None:
|
||||
"""With matching AD the round-trip succeeds."""
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x06" * 32
|
||||
ad = b"record_id_matching"
|
||||
ct = encrypt_field("secret", key, associated_data=ad)
|
||||
assert decrypt_field(ct, key, associated_data=ad) == "secret"
|
||||
|
||||
|
||||
def test_crypto_tamper_detection() -> None:
|
||||
"""A single-bit flip in ciphertext raises InvalidTag on decrypt."""
|
||||
import base64
|
||||
from cryptography.exceptions import InvalidTag
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key = b"\x07" * 32
|
||||
ct = encrypt_field("secret", key)
|
||||
# Strip the prefix, flip one byte in the base64 payload, re-wrap.
|
||||
prefix = "iai:enc:v1:"
|
||||
assert ct.startswith(prefix)
|
||||
payload_b64 = ct[len(prefix):]
|
||||
raw = bytearray(base64.b64decode(payload_b64))
|
||||
# Flip the byte after the nonce (12 bytes) -- tamper the ciphertext itself.
|
||||
raw[15] ^= 0x01
|
||||
tampered = prefix + base64.b64encode(bytes(raw)).decode("ascii")
|
||||
with pytest.raises(InvalidTag):
|
||||
decrypt_field(tampered, key)
|
||||
|
||||
|
||||
def test_crypto_wrong_key_fails() -> None:
|
||||
"""Decrypt with a different key raises InvalidTag."""
|
||||
from cryptography.exceptions import InvalidTag
|
||||
from iai_mcp.crypto import encrypt_field, decrypt_field
|
||||
key_a = b"\x08" * 32
|
||||
key_b = b"\x09" * 32
|
||||
ct = encrypt_field("secret", key_a)
|
||||
with pytest.raises(InvalidTag):
|
||||
decrypt_field(ct, key_b)
|
||||
|
||||
|
||||
def test_is_encrypted_prefix_true() -> None:
|
||||
"""is_encrypted returns True for strings that start with iai:enc:v1:"""
|
||||
from iai_mcp.crypto import encrypt_field, is_encrypted
|
||||
key = b"\x0a" * 32
|
||||
ct = encrypt_field("hello", key)
|
||||
assert is_encrypted(ct) is True
|
||||
|
||||
|
||||
def test_is_encrypted_prefix_false() -> None:
|
||||
"""is_encrypted returns False for plaintext / None / empty / wrong prefix."""
|
||||
from iai_mcp.crypto import is_encrypted
|
||||
assert is_encrypted("plaintext") is False
|
||||
assert is_encrypted("") is False
|
||||
assert is_encrypted("iai:enc:v0:abc") is False # Different version
|
||||
assert is_encrypted("foo:bar") is False
|
||||
|
||||
|
||||
def test_crypto_unique_nonce_per_encrypt() -> None:
|
||||
"""Two encryptions of the same plaintext under the same key produce different ciphertexts."""
|
||||
from iai_mcp.crypto import encrypt_field
|
||||
key = b"\x0b" * 32
|
||||
ct1 = encrypt_field("repeat", key)
|
||||
ct2 = encrypt_field("repeat", key)
|
||||
assert ct1 != ct2 # Random nonce ensures ciphertext differs
|
||||
|
||||
|
||||
def test_derive_key_from_passphrase_deterministic() -> None:
|
||||
"""Same passphrase + same salt -> same derived key (PBKDF2)."""
|
||||
from iai_mcp.crypto import derive_key_from_passphrase
|
||||
salt = b"saltsaltsaltsalt" # 16 bytes
|
||||
k1 = derive_key_from_passphrase("hunter2", salt)
|
||||
k2 = derive_key_from_passphrase("hunter2", salt)
|
||||
assert k1 == k2
|
||||
assert len(k1) == 32 # 256 bits
|
||||
|
||||
|
||||
def test_derive_key_from_passphrase_different_salts() -> None:
|
||||
"""Same passphrase, different salts -> different keys."""
|
||||
from iai_mcp.crypto import derive_key_from_passphrase
|
||||
salt_a = b"A" * 16
|
||||
salt_b = b"B" * 16
|
||||
assert derive_key_from_passphrase("same", salt_a) != derive_key_from_passphrase("same", salt_b)
|
||||
|
||||
|
||||
def test_derive_key_uses_600k_iterations() -> None:
|
||||
"""OWASP 2023: PBKDF2-HMAC-SHA256 recommends 600k iterations minimum."""
|
||||
from iai_mcp import crypto
|
||||
assert crypto.PBKDF2_ITERATIONS >= 600_000
|
||||
|
||||
|
||||
def test_crypto_key_passphrase_fallback_when_file_missing(
|
||||
tmp_path, monkeypatch
|
||||
) -> None:
|
||||
"""Phase 07.10 W1 RED — file-backed CryptoKey falls back to passphrase
|
||||
when no `.crypto.key` file exists in store_root.
|
||||
|
||||
Priority order under the new backend: file -> passphrase env var
|
||||
-> CryptoKeyError. This test exercises the second tier: file is absent,
|
||||
IAI_MCP_CRYPTO_PASSPHRASE is set, get_or_create() must return a 32-byte
|
||||
derived key that is deterministic across instances (same passphrase +
|
||||
same salt -> same key). NO keyring mocking — the keyring backend is
|
||||
gone in W2, so this test must not depend on it.
|
||||
|
||||
RED until W2: CryptoKey does not yet accept store_root kwarg.
|
||||
"""
|
||||
from iai_mcp import crypto
|
||||
|
||||
# No `.crypto.key` written to tmp_path -> file backend miss.
|
||||
assert not (tmp_path / ".crypto.key").exists()
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "hunter2-fallback")
|
||||
|
||||
ck = crypto.CryptoKey(user_id="t", store_root=tmp_path)
|
||||
key1 = ck.get_or_create()
|
||||
assert isinstance(key1, bytes)
|
||||
assert len(key1) == 32
|
||||
|
||||
# Same passphrase + same user_id (salt) -> same derived key on a fresh
|
||||
# instance with the same store_root.
|
||||
ck2 = crypto.CryptoKey(user_id="t", store_root=tmp_path)
|
||||
key2 = ck2.get_or_create()
|
||||
assert key1 == key2
|
||||
281
tests/test_crypto_file_backend.py
Normal file
281
tests/test_crypto_file_backend.py
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
"""Phase 07.10 W1 RED: file-backed crypto key {`_try_file_get`, `_try_file_set`,
|
||||
get_or_create priority, migrate-to-file CLI}.
|
||||
|
||||
Locks the executable spec for the file-backed crypto key per CONTEXT.md
|
||||
D-05 / / D-11. All 9 tests are RED until W2 (crypto.py file
|
||||
backend) and W3 (cmd_crypto_migrate_to_file) land.
|
||||
|
||||
Failure shapes that count as a correct RED signal in this plan:
|
||||
|
||||
- TypeError: CryptoKey() got an unexpected keyword argument 'store_root'
|
||||
(W2 adds the kwarg)
|
||||
- AttributeError: 'CryptoKey' object has no attribute '_try_file_get'
|
||||
/ '_try_file_set' / '_key_file_path'
|
||||
- ImportError: cannot import name 'cmd_crypto_migrate_to_file'
|
||||
(W3 lands the CLI command)
|
||||
|
||||
Imports of the new symbols stay INSIDE each test body so module-level
|
||||
collection succeeds: pytest must be able to ENUMERATE the 9 tests and
|
||||
then fail each one at assertion time, not crash at collection.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import secrets
|
||||
import stat
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- _try_file_get
|
||||
|
||||
def test_try_file_get_returns_bytes_on_valid_0o600_file(tmp_path: Path) -> None:
|
||||
"""D-11 case 1 — read 32 raw bytes back from a 0o600 key file."""
|
||||
from iai_mcp.crypto import CryptoKey
|
||||
|
||||
key_bytes = secrets.token_bytes(32)
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(key_bytes)
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
ck = CryptoKey(user_id="t", store_root=tmp_path)
|
||||
got = ck._try_file_get()
|
||||
assert got == key_bytes
|
||||
assert isinstance(got, bytes)
|
||||
assert len(got) == 32
|
||||
|
||||
|
||||
def test_try_file_get_rejects_world_or_group_bits(tmp_path: Path) -> None:
|
||||
"""D-06 / case 2 — mode 0o644 is refused with CryptoKeyError ('insecure mode')."""
|
||||
from iai_mcp.crypto import CryptoKey, CryptoKeyError
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(b"\x00" * 32)
|
||||
os.chmod(key_path, 0o644)
|
||||
|
||||
ck = CryptoKey(user_id="t", store_root=tmp_path)
|
||||
with pytest.raises(CryptoKeyError) as exc_info:
|
||||
ck._try_file_get()
|
||||
assert "insecure mode" in str(exc_info.value).lower()
|
||||
|
||||
|
||||
def test_try_file_get_rejects_wrong_length(tmp_path: Path) -> None:
|
||||
"""D-05 / case 3 — a 31-byte file is rejected with 'wrong length'."""
|
||||
from iai_mcp.crypto import CryptoKey, CryptoKeyError
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(b"\x01" * 31) # short by 1 byte
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
ck = CryptoKey(user_id="t", store_root=tmp_path)
|
||||
with pytest.raises(CryptoKeyError) as exc_info:
|
||||
ck._try_file_get()
|
||||
assert "wrong length" in str(exc_info.value).lower()
|
||||
|
||||
|
||||
def test_try_file_get_rejects_foreign_uid(tmp_path: Path, monkeypatch) -> None:
|
||||
"""D-06 / case 4 — st_uid != geteuid() is refused with 'uid' in message.
|
||||
|
||||
The fake_stat is path-scoped: only the key file gets the foreign-uid
|
||||
treatment. Any other os.stat call (pytest internals, library imports)
|
||||
delegates to the real os.stat. Returns a full os.stat_result tuple so
|
||||
the call shape stays compatible with anything that subscripts it.
|
||||
"""
|
||||
from iai_mcp.crypto import CryptoKey, CryptoKeyError
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(b"\x02" * 32)
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
real_stat = os.stat
|
||||
real_result = real_stat(key_path)
|
||||
foreign_uid = (os.geteuid() + 12345) & 0xFFFF # almost certainly not us
|
||||
|
||||
# os.stat_result is constructible from a 10-tuple of (mode, ino, dev,
|
||||
# nlink, uid, gid, size, atime, mtime, ctime).
|
||||
forged = os.stat_result((
|
||||
real_result.st_mode,
|
||||
real_result.st_ino,
|
||||
real_result.st_dev,
|
||||
real_result.st_nlink,
|
||||
foreign_uid,
|
||||
real_result.st_gid,
|
||||
real_result.st_size,
|
||||
real_result.st_atime,
|
||||
real_result.st_mtime,
|
||||
real_result.st_ctime,
|
||||
))
|
||||
|
||||
target_str = str(key_path)
|
||||
|
||||
def fake_stat(path, *args, **kwargs):
|
||||
# Path-scoped: only the key file gets the foreign-uid treatment.
|
||||
try:
|
||||
path_str = str(path)
|
||||
except Exception:
|
||||
return real_stat(path, *args, **kwargs)
|
||||
if path_str == target_str:
|
||||
return forged
|
||||
return real_stat(path, *args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(os, "stat", fake_stat)
|
||||
|
||||
ck = CryptoKey(user_id="t", store_root=tmp_path)
|
||||
with pytest.raises(CryptoKeyError) as exc_info:
|
||||
ck._try_file_get()
|
||||
assert "uid" in str(exc_info.value).lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- _try_file_set
|
||||
|
||||
def test_try_file_set_writes_atomic_with_0o600(tmp_path: Path) -> None:
|
||||
"""D-07 / case 5 — atomic write produces a 0o600 file with exact bytes.
|
||||
|
||||
Also asserts NO `.crypto.key.tmp.<pid>` survives after the call:
|
||||
a leaked tmp would prove the rename was non-atomic or the cleanup
|
||||
branch was skipped.
|
||||
"""
|
||||
from iai_mcp.crypto import CryptoKey
|
||||
|
||||
payload = b"\x00" * 32
|
||||
ck = CryptoKey(user_id="t", store_root=tmp_path)
|
||||
ck._try_file_set(payload)
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
assert key_path.exists()
|
||||
assert key_path.read_bytes() == payload
|
||||
mode = stat.S_IMODE(os.stat(key_path).st_mode)
|
||||
assert mode == 0o600
|
||||
|
||||
# Stale tmp scan: the dir must not contain any `.crypto.key.tmp.*` artifacts.
|
||||
leftover_tmps = list(tmp_path.glob(".crypto.key.tmp.*"))
|
||||
assert leftover_tmps == [], f"leaked tmp files: {leftover_tmps}"
|
||||
|
||||
|
||||
def test_try_file_set_cleans_stale_tmp(tmp_path: Path) -> None:
|
||||
"""D-07 / case 6 — stale `.crypto.key.tmp.<pid>` is removed before the new write."""
|
||||
from iai_mcp.crypto import CryptoKey
|
||||
|
||||
stale_tmp = tmp_path / ".crypto.key.tmp.99999"
|
||||
stale_tmp.write_bytes(b"GARBAGE-FROM-CRASHED-PRIOR-RUN")
|
||||
|
||||
payload = b"\x01" * 32
|
||||
ck = CryptoKey(user_id="t", store_root=tmp_path)
|
||||
ck._try_file_set(payload)
|
||||
|
||||
# Stale tmp gone, final key file present with new payload.
|
||||
assert not stale_tmp.exists(), "stale tmp must be cleaned up before the new write"
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
assert key_path.exists()
|
||||
assert key_path.read_bytes() == payload
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- get_or_create priority
|
||||
|
||||
def test_get_or_create_prefers_file_over_passphrase(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
"""D-11 case 7 — file backend wins over passphrase env var.
|
||||
|
||||
Pre-write a valid key file (key A); also set IAI_MCP_CRYPTO_PASSPHRASE
|
||||
(which would derive a different key B). get_or_create() must return
|
||||
key A (file priority).
|
||||
"""
|
||||
from iai_mcp.crypto import CryptoKey
|
||||
|
||||
key_a = secrets.token_bytes(32)
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(key_a)
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "hunter2")
|
||||
|
||||
ck = CryptoKey(user_id="t", store_root=tmp_path)
|
||||
got = ck.get_or_create()
|
||||
assert got == key_a, "file-backed key must win over passphrase fallback"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- migrate-to-file CLI
|
||||
|
||||
def test_cmd_crypto_migrate_to_file_happy_path(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
"""D-11 case 8 — migrate-to-file reads keyring, writes file, round-trip OK.
|
||||
|
||||
Patches `keyring.get_password` BEFORE importing the command so the
|
||||
local `import keyring` inside cmd_crypto_migrate_to_file picks up
|
||||
the monkeypatched attribute (Python caches modules).
|
||||
"""
|
||||
import argparse
|
||||
import base64
|
||||
import keyring as _keyring
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
|
||||
keyring_key = secrets.token_bytes(32)
|
||||
keyring_blob = base64.urlsafe_b64encode(keyring_key).decode("ascii")
|
||||
|
||||
def fake_get(service: str, username: str) -> str | None:
|
||||
return keyring_blob
|
||||
|
||||
def fake_delete(service: str, username: str) -> None:
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(_keyring, "get_password", fake_get)
|
||||
monkeypatch.setattr(_keyring, "delete_password", fake_delete)
|
||||
|
||||
from iai_mcp.cli import cmd_crypto_migrate_to_file # ImportError until W3 — RED.
|
||||
|
||||
args = argparse.Namespace(
|
||||
user_id="default", keep_keychain=True, delete_keychain=False
|
||||
)
|
||||
exit_code = cmd_crypto_migrate_to_file(args)
|
||||
assert exit_code == 0
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
assert key_path.exists()
|
||||
mode = stat.S_IMODE(os.stat(key_path).st_mode)
|
||||
assert mode == 0o600
|
||||
assert key_path.read_bytes() == keyring_key, (
|
||||
"file contents must equal the round-tripped keyring key bytes"
|
||||
)
|
||||
|
||||
|
||||
def test_cmd_crypto_migrate_to_file_idempotent(
|
||||
tmp_path: Path, monkeypatch
|
||||
) -> None:
|
||||
"""D-11 case 9 — file already present → no-op success, NO keyring touch.
|
||||
|
||||
keyring.get_password is patched to raise AssertionError; if the
|
||||
idempotent path ever calls it, the test fails with a specific message.
|
||||
"""
|
||||
import argparse
|
||||
import keyring as _keyring
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
|
||||
# Pre-create a valid file so the command takes the idempotent branch.
|
||||
pre_existing = secrets.token_bytes(32)
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(pre_existing)
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
def assert_not_called(*args, **kwargs):
|
||||
raise AssertionError(
|
||||
"keyring touched on idempotent path — migrate-to-file must "
|
||||
"skip keyring entirely when the file is already present"
|
||||
)
|
||||
|
||||
monkeypatch.setattr(_keyring, "get_password", assert_not_called)
|
||||
monkeypatch.setattr(_keyring, "delete_password", assert_not_called)
|
||||
|
||||
from iai_mcp.cli import cmd_crypto_migrate_to_file # ImportError until W3 — RED.
|
||||
|
||||
args = argparse.Namespace(
|
||||
user_id="default", keep_keychain=True, delete_keychain=False
|
||||
)
|
||||
exit_code = cmd_crypto_migrate_to_file(args)
|
||||
assert exit_code == 0
|
||||
# File contents unchanged.
|
||||
assert key_path.read_bytes() == pre_existing
|
||||
52
tests/test_crypto_key_watch.py
Normal file
52
tests/test_crypto_key_watch.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""Tests for crypto_key_watch baseline + rotation detection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
|
||||
from iai_mcp.crypto_key_watch import (
|
||||
check_crypto_key_file_rotation_event,
|
||||
sync_crypto_key_watcher_to_disk,
|
||||
)
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
|
||||
def test_watcher_baseline_then_rotation_emits_event(tmp_path: Path) -> None:
|
||||
root = tmp_path / "w"
|
||||
root.mkdir()
|
||||
kpath = root / ".crypto.key"
|
||||
kpath.write_bytes(secrets.token_bytes(32))
|
||||
os.chmod(kpath, 0o600)
|
||||
store = MemoryStore(path=root, user_id="default")
|
||||
|
||||
check_crypto_key_file_rotation_event(store)
|
||||
ev0 = query_events(store, kind="crypto_key_rotated", limit=10)
|
||||
assert len(ev0) == 0
|
||||
|
||||
kpath.write_bytes(secrets.token_bytes(32))
|
||||
os.chmod(kpath, 0o600)
|
||||
check_crypto_key_file_rotation_event(store)
|
||||
ev1 = query_events(store, kind="crypto_key_rotated", limit=10)
|
||||
assert len(ev1) == 1
|
||||
|
||||
check_crypto_key_file_rotation_event(store)
|
||||
ev2 = query_events(store, kind="crypto_key_rotated", limit=10)
|
||||
assert len(ev2) == 1
|
||||
|
||||
|
||||
def test_sync_watcher_without_event(tmp_path: Path) -> None:
|
||||
root = tmp_path / "s"
|
||||
root.mkdir()
|
||||
kpath = root / ".crypto.key"
|
||||
kpath.write_bytes(secrets.token_bytes(32))
|
||||
os.chmod(kpath, 0o600)
|
||||
store = MemoryStore(path=root, user_id="default")
|
||||
sync_crypto_key_watcher_to_disk(store)
|
||||
wp = root / ".crypto-key-watcher.json"
|
||||
assert wp.is_file()
|
||||
data = json.loads(wp.read_text(encoding="utf-8"))
|
||||
assert "mtime_ns" in data and "size" in data
|
||||
251
tests/test_curiosity.py
Normal file
251
tests/test_curiosity.py
Normal file
|
|
@ -0,0 +1,251 @@
|
|||
"""Tests for LEARN-04 curiosity (D-23, D-24).
|
||||
|
||||
D-23 trigger: entropy > 0.7 bits, 3-turn cooldown.
|
||||
D-24 tiered style:
|
||||
- low entropy (0.4-0.7): silent log via events table (curiosity_silent_log)
|
||||
- mid entropy (0.7-0.9): inline hint in next response
|
||||
- high entropy (>0.9): direct clarifying question
|
||||
|
||||
compute_entropy operates in base-2 (bits) consistent with "0.7 bits".
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def _rec(vec=None, tags=None):
|
||||
vec = vec or [1.0] + [0.0] * (EMBED_DIM - 1)
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface="r",
|
||||
aaak_index="",
|
||||
embedding=vec,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=list(tags or []),
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
class _Hit:
|
||||
def __init__(self, rid: UUID, score: float):
|
||||
self.record_id = rid
|
||||
self.score = score
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- constants
|
||||
|
||||
|
||||
def test_curiosity_thresholds():
|
||||
from iai_mcp import curiosity
|
||||
|
||||
assert curiosity.ENTROPY_LOW == 0.4
|
||||
assert curiosity.ENTROPY_MID == 0.7
|
||||
assert curiosity.ENTROPY_HIGH == 0.9
|
||||
assert curiosity.COOLDOWN_TURNS == 3
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- compute_entropy
|
||||
|
||||
|
||||
def test_compute_entropy_uniform():
|
||||
"""Shannon entropy of [0.5, 0.5] = 1.0 bit."""
|
||||
from iai_mcp.curiosity import compute_entropy
|
||||
|
||||
e = compute_entropy([0.5, 0.5])
|
||||
assert abs(e - 1.0) < 1e-6
|
||||
|
||||
|
||||
def test_compute_entropy_skewed():
|
||||
from iai_mcp.curiosity import compute_entropy
|
||||
|
||||
e = compute_entropy([0.9, 0.1])
|
||||
# H([0.9,0.1]) = -(0.9*log2(0.9) + 0.1*log2(0.1)) ~ 0.469
|
||||
assert e < 0.5
|
||||
|
||||
|
||||
def test_compute_entropy_degenerate():
|
||||
from iai_mcp.curiosity import compute_entropy
|
||||
|
||||
assert compute_entropy([1.0]) == 0.0
|
||||
|
||||
|
||||
def test_compute_entropy_empty():
|
||||
from iai_mcp.curiosity import compute_entropy
|
||||
|
||||
assert compute_entropy([]) == 0.0
|
||||
|
||||
|
||||
def test_compute_entropy_zero_scores_handled():
|
||||
from iai_mcp.curiosity import compute_entropy
|
||||
|
||||
# Negative scores shouldn't crash (max(0, s) normalisation).
|
||||
e = compute_entropy([-1.0, 0.5, 0.5])
|
||||
assert e >= 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- fire_curiosity
|
||||
|
||||
|
||||
def test_fire_curiosity_below_threshold_silent(tmp_path):
|
||||
"""Low entropy (0.5) -> silent log, returns None."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
from iai_mcp.events import query_events
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.8)]
|
||||
q = fire_curiosity(
|
||||
store, hits, cue="ambiguous", entropy=0.5,
|
||||
session_id="s1", turn=1,
|
||||
)
|
||||
assert q is None
|
||||
silent = query_events(store, kind="curiosity_silent_log")
|
||||
assert len(silent) >= 1
|
||||
|
||||
|
||||
def test_fire_curiosity_below_ENTROPY_LOW_returns_none(tmp_path):
|
||||
"""Very low entropy (below ENTROPY_LOW=0.4) returns None without logging."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
q = fire_curiosity(
|
||||
store, [], cue="x", entropy=0.1,
|
||||
session_id="s-silent", turn=1,
|
||||
)
|
||||
assert q is None
|
||||
|
||||
|
||||
def test_fire_curiosity_mid_entropy_inline_hint(tmp_path):
|
||||
"""Entropy 0.8 -> CuriosityQuestion with tier='inline'."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.6)]
|
||||
q = fire_curiosity(
|
||||
store, hits, cue="maybe", entropy=0.8,
|
||||
session_id="s2", turn=1,
|
||||
)
|
||||
assert q is not None
|
||||
assert q.tier == "inline"
|
||||
|
||||
|
||||
def test_fire_curiosity_high_entropy_direct_question(tmp_path):
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.5)]
|
||||
q = fire_curiosity(
|
||||
store, hits, cue="unknown", entropy=0.95,
|
||||
session_id="s3", turn=1,
|
||||
)
|
||||
assert q is not None
|
||||
assert q.tier == "question"
|
||||
|
||||
|
||||
def test_fire_curiosity_cooldown_3_turns(tmp_path):
|
||||
"""Fire turn 1 -> fires. Turn 2 -> None (cooldown). Turn 3 -> None."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.5)]
|
||||
q1 = fire_curiosity(store, hits, "x", 0.95, "s4", turn=1)
|
||||
assert q1 is not None
|
||||
q2 = fire_curiosity(store, hits, "x", 0.95, "s4", turn=2)
|
||||
assert q2 is None
|
||||
q3 = fire_curiosity(store, hits, "x", 0.95, "s4", turn=3)
|
||||
assert q3 is None
|
||||
|
||||
|
||||
def test_fire_curiosity_cooldown_releases(tmp_path):
|
||||
"""Turn 4 after turn 1 firing -> cooldown released."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.5)]
|
||||
q1 = fire_curiosity(store, hits, "x", 0.95, "s5", turn=1)
|
||||
assert q1 is not None
|
||||
q4 = fire_curiosity(store, hits, "x", 0.95, "s5", turn=4)
|
||||
assert q4 is not None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- pending_questions
|
||||
|
||||
|
||||
def test_pending_questions_empty(tmp_path):
|
||||
from iai_mcp.curiosity import pending_questions
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
assert pending_questions(store) == []
|
||||
|
||||
|
||||
def test_pending_questions_filter_resolved(tmp_path):
|
||||
"""5 fired, 3 resolved -> pending_questions returns 2."""
|
||||
from iai_mcp.curiosity import fire_curiosity, pending_questions
|
||||
from iai_mcp.events import write_event
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.5)]
|
||||
# Fire 5 questions across different sessions so cooldown doesn't block.
|
||||
q_ids: list = []
|
||||
for i in range(5):
|
||||
q = fire_curiosity(store, hits, f"cue{i}", 0.95, f"session-{i}", turn=1)
|
||||
assert q is not None
|
||||
q_ids.append(q.id)
|
||||
|
||||
# Resolve 3 via curiosity_resolved event
|
||||
for qid in q_ids[:3]:
|
||||
write_event(
|
||||
store, kind="curiosity_resolved",
|
||||
data={"question_id": str(qid)},
|
||||
severity="info",
|
||||
)
|
||||
|
||||
pending = pending_questions(store)
|
||||
assert len(pending) == 2
|
||||
|
||||
|
||||
def test_pending_questions_by_session(tmp_path):
|
||||
from iai_mcp.curiosity import fire_curiosity, pending_questions
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.5)]
|
||||
fire_curiosity(store, hits, "c", 0.95, "sA", turn=1)
|
||||
fire_curiosity(store, hits, "c", 0.95, "sB", turn=1)
|
||||
|
||||
onlyA = pending_questions(store, session_id="sA")
|
||||
onlyB = pending_questions(store, session_id="sB")
|
||||
assert len(onlyA) == 1
|
||||
assert len(onlyB) == 1
|
||||
121
tests/test_curiosity_bridge_edges.py
Normal file
121
tests/test_curiosity_bridge_edges.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
"""Tests for curiosity_bridge edges.
|
||||
|
||||
curiosity_bridge edges:
|
||||
- Created when fire_curiosity surfaces a mid/high-entropy question.
|
||||
- Weight proportional to entropy.
|
||||
- Persist in the edges table with edge_type='curiosity_bridge'.
|
||||
- adds fading on resolution.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.store import EDGES_TABLE, MemoryStore
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def _rec(vec=None, tags=None):
|
||||
vec = vec or [1.0] + [0.0] * (EMBED_DIM - 1)
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface="r",
|
||||
aaak_index="",
|
||||
embedding=vec,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=list(tags or []),
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
class _Hit:
|
||||
def __init__(self, rid: UUID, score: float):
|
||||
self.record_id = rid
|
||||
self.score = score
|
||||
|
||||
|
||||
def test_curiosity_bridge_edge_on_fire(tmp_path):
|
||||
"""fire_curiosity creates curiosity_bridge edges from question id -> triggering records."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
recs = [_rec() for _ in range(3)]
|
||||
for r in recs:
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.5) for r in recs]
|
||||
|
||||
q = fire_curiosity(
|
||||
store, hits, "ambiguous", entropy=0.85,
|
||||
session_id="s-bridge", turn=1,
|
||||
)
|
||||
assert q is not None
|
||||
|
||||
edges = store.db.open_table(EDGES_TABLE).to_pandas()
|
||||
cb = edges[edges["edge_type"] == "curiosity_bridge"]
|
||||
assert len(cb) >= 3 # One per triggering record
|
||||
|
||||
|
||||
def test_curiosity_bridge_edge_weight_proportional_entropy(tmp_path):
|
||||
"""Higher entropy -> larger edge delta."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r1 = _rec()
|
||||
r2 = _rec()
|
||||
store.insert(r1)
|
||||
store.insert(r2)
|
||||
hits_low = [_Hit(r1.id, 0.5)]
|
||||
hits_high = [_Hit(r2.id, 0.5)]
|
||||
|
||||
q1 = fire_curiosity(store, hits_low, "a", 0.75, session_id="s-a", turn=1)
|
||||
assert q1 is not None
|
||||
# Different session to bypass cooldown
|
||||
q2 = fire_curiosity(store, hits_high, "b", 0.95, session_id="s-b", turn=1)
|
||||
assert q2 is not None
|
||||
|
||||
edges = store.db.open_table(EDGES_TABLE).to_pandas()
|
||||
cb = edges[edges["edge_type"] == "curiosity_bridge"]
|
||||
# Records should have edges with delta reflecting the respective entropies.
|
||||
# Low-entropy-linked edges should have weights below 0.9
|
||||
# High-entropy-linked edges should have weights above 0.9
|
||||
assert (cb["weight"] > 0).all()
|
||||
|
||||
|
||||
def test_curiosity_bridge_edge_never_decays_in_sweep(tmp_path):
|
||||
"""curiosity_bridge edges not decayed by hebbian-only sweep."""
|
||||
from datetime import timedelta
|
||||
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
r = _rec()
|
||||
store.insert(r)
|
||||
hits = [_Hit(r.id, 0.5)]
|
||||
fire_curiosity(store, hits, "c", 0.9, "s-never", turn=1)
|
||||
|
||||
edges_tbl = store.db.open_table(EDGES_TABLE)
|
||||
ancient = datetime.now(timezone.utc) - timedelta(days=500)
|
||||
edges_tbl.update(
|
||||
where="edge_type = 'curiosity_bridge'",
|
||||
values={"updated_at": ancient, "weight": 0.0001},
|
||||
)
|
||||
_decay_edges(store)
|
||||
df = edges_tbl.to_pandas()
|
||||
cb = df[df["edge_type"] == "curiosity_bridge"]
|
||||
assert len(cb) >= 1
|
||||
465
tests/test_daemon.py
Normal file
465
tests/test_daemon.py
Normal file
|
|
@ -0,0 +1,465 @@
|
|||
"""Tests for iai_mcp.daemon -- Task 3.
|
||||
|
||||
Covers 10 behaviours:
|
||||
1. main() completes cleanly when shutdown event is set externally.
|
||||
2. State-machine transitions: valid edges succeed, illegal edges raise ValueError.
|
||||
3. Scheduler tick body gets called repeatedly; exceptions caught, daemon continues.
|
||||
4. bge-m3 prewarm invoked exactly once at boot.
|
||||
5. Graceful shutdown cancels scheduler + socket tasks; lock fd closed.
|
||||
5b. mid-night MCP shared-lock acquisition surfaces via holds_exclusive_nb=False.
|
||||
6. Empty-store shortcut: _tick_body records `empty_store` reason without REM work.
|
||||
7. launchd plist is valid XML + has required Label/KeepAlive/ThrottleInterval keys.
|
||||
8. systemd unit has Type=simple + Restart=on-failure + WantedBy=default.target +
|
||||
python3 -m iai_mcp.daemon + TimeoutStopSec=60.
|
||||
9. Neither plist nor systemd unit contains ANTHROPIC_API_KEY (C3 guard).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import plistlib
|
||||
import signal
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
||||
PLIST_PATH = PROJECT_ROOT / "deploy" / "launchd" / "com.iai-mcp.daemon.plist"
|
||||
SERVICE_PATH = PROJECT_ROOT / "deploy" / "systemd" / "iai-mcp-daemon.service"
|
||||
|
||||
|
||||
def _module_child_take_shared(path_str: str, ready_flag: str, release_flag: str) -> None:
|
||||
"""Module-level helper (spawn context requires top-level serialisation)."""
|
||||
import fcntl
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
fd = os.open(path_str, os.O_RDWR | os.O_CREAT, 0o600)
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_SH)
|
||||
Path(ready_flag).write_text("ok")
|
||||
rel = Path(release_flag)
|
||||
for _ in range(300):
|
||||
if rel.exists():
|
||||
break
|
||||
time.sleep(0.1)
|
||||
finally:
|
||||
try:
|
||||
fcntl.flock(fd, fcntl.LOCK_UN)
|
||||
except OSError:
|
||||
pass
|
||||
os.close(fd)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _fresh_store(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
|
||||
from iai_mcp.store import MemoryStore
|
||||
return MemoryStore()
|
||||
|
||||
|
||||
def _short_socket_paths(tmp_path, monkeypatch):
|
||||
"""Redirect concurrency LOCK_PATH + SOCKET_PATH to short paths (AF_UNIX 104-char limit)."""
|
||||
import os
|
||||
from iai_mcp import concurrency
|
||||
lock_path = tmp_path / ".lock"
|
||||
sock_dir = Path(f"/tmp/iai-daemon-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
|
||||
monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path)
|
||||
return lock_path, sock_path, sock_dir
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: clean shutdown via signal-like event trigger
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_main_clean_shutdown(tmp_path, monkeypatch):
|
||||
"""main() returns 0 when shutdown fires shortly after boot."""
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp import daemon_state as ds_mod
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
|
||||
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
|
||||
_short_socket_paths(tmp_path, monkeypatch)
|
||||
|
||||
# Prevent real embedder instantiation (saves 10s + avoids model download).
|
||||
def _fake_embedder(store):
|
||||
class _Stub:
|
||||
def embed(self, text):
|
||||
return [0.0]
|
||||
return _Stub()
|
||||
monkeypatch.setattr("iai_mcp.embed.embedder_for_store", _fake_embedder)
|
||||
|
||||
async def runner():
|
||||
task = asyncio.create_task(daemon_mod.main())
|
||||
# Give the daemon a chance to boot, then trigger shutdown by sending SIGTERM.
|
||||
await asyncio.sleep(0.2)
|
||||
# Simulate signal delivery: find the loop's shutdown event and set it.
|
||||
# Easiest: raise CancelledError on the main task after a brief run.
|
||||
# We inject shutdown by cancelling the task, then verifying it returns cleanly.
|
||||
task.cancel()
|
||||
try:
|
||||
return await task
|
||||
except asyncio.CancelledError:
|
||||
return 0
|
||||
|
||||
rc = asyncio.run(runner())
|
||||
assert rc == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: state-machine transitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_state_machine_transitions(tmp_path, monkeypatch):
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp import daemon_state as ds_mod
|
||||
|
||||
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
|
||||
|
||||
state: dict = {} # fresh state starts at WAKE default
|
||||
|
||||
# WAKE -> TRANSITIONING (valid)
|
||||
daemon_mod.transition(state, daemon_mod.STATE_TRANSITIONING)
|
||||
assert state["fsm_state"] == daemon_mod.STATE_TRANSITIONING
|
||||
|
||||
# TRANSITIONING -> SLEEP (valid)
|
||||
daemon_mod.transition(state, daemon_mod.STATE_SLEEP)
|
||||
assert state["fsm_state"] == daemon_mod.STATE_SLEEP
|
||||
|
||||
# SLEEP -> DREAMING (valid)
|
||||
daemon_mod.transition(state, daemon_mod.STATE_DREAMING)
|
||||
assert state["fsm_state"] == daemon_mod.STATE_DREAMING
|
||||
|
||||
# DREAMING -> TRANSITIONING (ILLEGAL)
|
||||
with pytest.raises(ValueError, match="Illegal transition"):
|
||||
daemon_mod.transition(state, daemon_mod.STATE_TRANSITIONING)
|
||||
assert state["fsm_state"] == daemon_mod.STATE_DREAMING # state unchanged
|
||||
|
||||
# DREAMING -> SLEEP (valid)
|
||||
daemon_mod.transition(state, daemon_mod.STATE_SLEEP)
|
||||
assert state["fsm_state"] == daemon_mod.STATE_SLEEP
|
||||
|
||||
# SLEEP -> WAKE (valid)
|
||||
daemon_mod.transition(state, daemon_mod.STATE_WAKE)
|
||||
assert state["fsm_state"] == daemon_mod.STATE_WAKE
|
||||
|
||||
# WAKE -> SLEEP (ILLEGAL, must go through TRANSITIONING)
|
||||
with pytest.raises(ValueError):
|
||||
daemon_mod.transition(state, daemon_mod.STATE_SLEEP)
|
||||
|
||||
# State persisted each time: load_state finds fsm_state=WAKE after final txn.
|
||||
loaded = ds_mod.load_state()
|
||||
assert loaded["fsm_state"] == daemon_mod.STATE_WAKE
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: scheduler tick loop continues after exceptions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_scheduler_tick_survives_exceptions(tmp_path, monkeypatch):
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
|
||||
store = _fresh_store(tmp_path, monkeypatch)
|
||||
|
||||
# Shrink tick interval so the test finishes quickly.
|
||||
monkeypatch.setattr(daemon_mod, "TICK_INTERVAL_SEC", 0)
|
||||
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
lock = ProcessLock(tmp_path / ".lock")
|
||||
state: dict = {}
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def flaky_body(store, lock, state):
|
||||
call_count["n"] += 1
|
||||
if call_count["n"] == 1:
|
||||
raise RuntimeError("simulated tick failure")
|
||||
|
||||
async def runner():
|
||||
task = asyncio.create_task(
|
||||
daemon_mod._scheduler_tick(store, lock, state, tick_body=flaky_body)
|
||||
)
|
||||
# Let several ticks happen.
|
||||
await asyncio.sleep(0.1)
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
asyncio.run(runner())
|
||||
lock.close()
|
||||
|
||||
assert call_count["n"] >= 2, (
|
||||
f"tick loop did not continue past first exception; only {call_count['n']} calls"
|
||||
)
|
||||
# tick_error event recorded on the first failing call.
|
||||
from iai_mcp.events import query_events
|
||||
err_events = query_events(store, kind="tick_error", limit=5)
|
||||
assert len(err_events) >= 1
|
||||
assert "simulated tick failure" in err_events[0]["data"].get("error", "")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: bge-m3 prewarm called exactly once at boot
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_prewarm_called_once_at_boot(tmp_path, monkeypatch):
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp import daemon_state as ds_mod
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
|
||||
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
|
||||
_short_socket_paths(tmp_path, monkeypatch)
|
||||
|
||||
prewarm_calls = {"n": 0}
|
||||
|
||||
class _StubEmbedder:
|
||||
def embed(self, text):
|
||||
prewarm_calls["n"] += 1
|
||||
return [0.0]
|
||||
|
||||
def _fake_embedder(store):
|
||||
return _StubEmbedder()
|
||||
|
||||
monkeypatch.setattr("iai_mcp.embed.embedder_for_store", _fake_embedder)
|
||||
|
||||
async def runner():
|
||||
task = asyncio.create_task(daemon_mod.main())
|
||||
await asyncio.sleep(0.15)
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
asyncio.run(runner())
|
||||
assert prewarm_calls["n"] == 1, (
|
||||
f"prewarm expected once, got {prewarm_calls['n']}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: graceful shutdown cancels both tasks + closes lock fd
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_graceful_shutdown_cancels_tasks_and_closes_lock(tmp_path, monkeypatch):
|
||||
"""We monkeypatch ProcessLock.close to observe it being called on shutdown."""
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp import daemon_state as ds_mod
|
||||
from iai_mcp import concurrency
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
|
||||
monkeypatch.setattr(ds_mod, "STATE_PATH", tmp_path / ".daemon-state.json")
|
||||
_short_socket_paths(tmp_path, monkeypatch)
|
||||
|
||||
def _fake_embedder(store):
|
||||
class _S:
|
||||
def embed(self, text): return [0.0] * 384
|
||||
return _S()
|
||||
monkeypatch.setattr("iai_mcp.embed.embedder_for_store", _fake_embedder)
|
||||
|
||||
close_calls = {"n": 0}
|
||||
real_close = concurrency.ProcessLock.close
|
||||
|
||||
def _tracked_close(self):
|
||||
close_calls["n"] += 1
|
||||
real_close(self)
|
||||
|
||||
monkeypatch.setattr(concurrency.ProcessLock, "close", _tracked_close)
|
||||
|
||||
async def runner():
|
||||
task = asyncio.create_task(daemon_mod.main())
|
||||
# added ~5 startup steps before `await shutdown.wait()`
|
||||
# (LifecycleLock acquire, capture_queue ingest, lifecycle FSM init,
|
||||
# heartbeat scanner init, sleep_pipeline init, lifecycle_tick spawn).
|
||||
# Wait up to 5 sec for the daemon to reach `await shutdown.wait()`
|
||||
# so cancellation propagates through the finally block instead of
|
||||
# being raised in synchronous setup.
|
||||
deadline = 5.0
|
||||
step = 0.05
|
||||
elapsed = 0.0
|
||||
while elapsed < deadline:
|
||||
await asyncio.sleep(step)
|
||||
elapsed += step
|
||||
if close_calls["n"] >= 0 and task.done():
|
||||
break
|
||||
# Daemon should have hit await shutdown.wait() by this point
|
||||
# for any reasonable Lance + embedder warmup. If we cancel
|
||||
# mid-startup, finally will not fire (no await-point reached).
|
||||
if elapsed >= 1.0:
|
||||
break
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
asyncio.run(runner())
|
||||
assert close_calls["n"] >= 1, "lock.close() was never called on shutdown"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5b: holds_exclusive_nb returns False when a shared holder appears
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_d06_holds_exclusive_nb_yields_to_mcp(tmp_path, monkeypatch):
|
||||
"""While the daemon holds EX, a second process taking SH forces
|
||||
holds_exclusive_nb() to return False -- the cooperative-yield signal
|
||||
that downstream plans (04-02) use to abort mid-cycle."""
|
||||
import multiprocessing
|
||||
import time
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
|
||||
spawn = multiprocessing.get_context("spawn")
|
||||
lock_path = tmp_path / ".lock"
|
||||
|
||||
daemon_lock = ProcessLock(lock_path)
|
||||
try:
|
||||
assert daemon_lock.try_acquire_exclusive() is True
|
||||
assert daemon_lock.holds_exclusive_nb() is True
|
||||
|
||||
# Daemon releases to allow child to grab shared (simulating the gap
|
||||
# between REM cycles when the daemon intentionally yields).
|
||||
daemon_lock.release()
|
||||
|
||||
ready_flag = tmp_path / ".ready"
|
||||
release_flag = tmp_path / ".release"
|
||||
child = spawn.Process(
|
||||
target=_module_child_take_shared,
|
||||
args=(str(lock_path), str(ready_flag), str(release_flag)),
|
||||
)
|
||||
child.start()
|
||||
try:
|
||||
deadline = time.time() + 15
|
||||
while time.time() < deadline and not ready_flag.exists():
|
||||
time.sleep(0.05)
|
||||
assert ready_flag.exists()
|
||||
|
||||
# Probe: daemon should see "no, we don't hold EX; MCP is active".
|
||||
assert daemon_lock.holds_exclusive_nb() is False
|
||||
finally:
|
||||
release_flag.write_text("go")
|
||||
child.join(timeout=10)
|
||||
if child.is_alive():
|
||||
child.terminate()
|
||||
child.join(timeout=2)
|
||||
finally:
|
||||
daemon_lock.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 6: empty-store shortcut in _tick_body
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_empty_store_shortcut(tmp_path, monkeypatch):
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
|
||||
store = _fresh_store(tmp_path, monkeypatch)
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
lock = ProcessLock(tmp_path / ".lock")
|
||||
state: dict = {"fsm_state": "WAKE"}
|
||||
|
||||
async def run_once():
|
||||
await daemon_mod._tick_body(store, lock, state)
|
||||
|
||||
asyncio.run(run_once())
|
||||
lock.close()
|
||||
|
||||
assert state.get("last_tick_skipped_reason") == "empty_store"
|
||||
|
||||
# No `rem_cycle_started` event emitted on empty store.
|
||||
from iai_mcp.events import query_events
|
||||
rem = query_events(store, kind="rem_cycle_started", limit=5)
|
||||
assert rem == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 7: launchd plist valid XML + required keys
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_launchd_plist_valid_xml_with_required_keys():
|
||||
assert PLIST_PATH.exists(), f"missing plist at {PLIST_PATH}"
|
||||
|
||||
with open(PLIST_PATH, "rb") as f:
|
||||
data = plistlib.load(f)
|
||||
|
||||
assert data["Label"] == "com.iai-mcp.daemon"
|
||||
assert data["ProgramArguments"][-1] == "iai_mcp.daemon"
|
||||
assert data["RunAtLoad"] is True
|
||||
|
||||
keepalive = data["KeepAlive"]
|
||||
assert isinstance(keepalive, dict)
|
||||
# Plan 10.6-01 Task 1.7: KeepAlive policy is now
|
||||
# `Crashed=true` only. The legacy `SuccessfulExit=false` paired
|
||||
# with the 75/0 exit-code branching; with the new lifecycle
|
||||
# state machine exit code is uniformly 0 on graceful shutdown,
|
||||
# so SuccessfulExit=false would create a respawn loop.
|
||||
assert keepalive.get("Crashed") is True
|
||||
assert "SuccessfulExit" not in keepalive
|
||||
|
||||
assert data["ThrottleInterval"] == 5
|
||||
assert "StandardOutPath" in data
|
||||
assert "StandardErrorPath" in data
|
||||
assert "WorkingDirectory" in data
|
||||
|
||||
env = data["EnvironmentVariables"]
|
||||
for required_key in ("PATH", "IAI_MCP_STORE", "HOME", "LANG"):
|
||||
assert required_key in env, f"missing env key {required_key}"
|
||||
|
||||
# C3 guard (redundant with Test 9 but check locally too):
|
||||
assert "ANTHROPIC_API_KEY" not in env
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 8: systemd unit required keys
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_systemd_unit_required_keys():
|
||||
assert SERVICE_PATH.exists(), f"missing unit file at {SERVICE_PATH}"
|
||||
text = SERVICE_PATH.read_text()
|
||||
|
||||
assert "[Unit]" in text
|
||||
assert "Description=" in text
|
||||
assert "[Service]" in text
|
||||
assert "Type=simple" in text
|
||||
assert "Restart=on-failure" in text
|
||||
assert "RestartSec=30" in text
|
||||
assert "StartLimitIntervalSec=60" in text
|
||||
assert "StartLimitBurst=3" in text
|
||||
assert "python3 -m iai_mcp.daemon" in text
|
||||
assert "StandardOutput=journal" in text
|
||||
assert "StandardError=journal" in text
|
||||
assert "SyslogIdentifier=iai-mcp-daemon" in text
|
||||
assert "TimeoutStopSec=60" in text
|
||||
assert "KillSignal=SIGTERM" in text
|
||||
assert "[Install]" in text
|
||||
assert "WantedBy=default.target" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 9: C3 guard -- no ANTHROPIC_API_KEY anywhere
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_c3_no_anthropic_api_key_in_artifacts():
|
||||
daemon_src = (PROJECT_ROOT / "src" / "iai_mcp" / "daemon.py").read_text()
|
||||
plist_src = PLIST_PATH.read_text()
|
||||
service_src = SERVICE_PATH.read_text()
|
||||
|
||||
for name, src in (("daemon.py", daemon_src), ("plist", plist_src), ("service", service_src)):
|
||||
assert "ANTHROPIC_API_KEY" not in src, (
|
||||
f"C3 VIOLATION: ANTHROPIC_API_KEY found in {name}"
|
||||
)
|
||||
556
tests/test_daemon_dispatcher.py
Normal file
556
tests/test_daemon_dispatcher.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
"""End-to-end round-trip tests for the daemon socket dispatcher (Plan 04-gap-1).
|
||||
|
||||
Unlike tests/test_core_bedtime_inject.py (which uses _ThreadedFakeDaemon that
|
||||
echoes canned OK replies), these tests spin up the REAL serve_control_socket
|
||||
with the REAL _dispatch_socket_request bound to a REAL state dict + real
|
||||
ProcessLock on a tmp directory. They send each of the 6 message types as
|
||||
real NDJSON over a real AF_UNIX socket and assert:
|
||||
- correct response shape per message type
|
||||
- state mutations actually persisted to ~/.iai-mcp/.daemon-state.json
|
||||
(scoped to tmp_path via monkeypatch of daemon_state.STATE_PATH)
|
||||
- invalid messages rejected with invalid_message reason code
|
||||
- unknown types rejected with unknown_message_type reason code
|
||||
- version field present in status response
|
||||
- concurrent clients handled without corruption
|
||||
|
||||
This closes the verifier-identified test gap that masked the dispatcher
|
||||
blocker throughout execution.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def short_socket_paths(tmp_path, monkeypatch):
|
||||
"""Redirect LOCK_PATH + SOCKET_PATH + STATE_PATH to tmp_path.
|
||||
|
||||
AF_UNIX on macOS caps socket paths at ~104 bytes; pytest's tmp_path can
|
||||
be too long under xdist. Use a short /tmp/iai-<pid>-<n>/ fallback for
|
||||
the socket. The state file lives under tmp_path (regular filesystem,
|
||||
no length limit).
|
||||
"""
|
||||
from iai_mcp import concurrency, daemon_state
|
||||
|
||||
lock_path = tmp_path / ".lock"
|
||||
sock_dir = Path(f"/tmp/iai-disp-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
|
||||
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
|
||||
monkeypatch.setattr(concurrency, "SOCKET_PATH", sock_path)
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
|
||||
|
||||
try:
|
||||
yield lock_path, sock_path, state_path
|
||||
finally:
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
async def _send_ndjson(sock_path: Path, message: dict, *, timeout: float = 5.0) -> dict:
|
||||
"""Connect, send one NDJSON line, read one line back, close."""
|
||||
reader, writer = await asyncio.wait_for(
|
||||
asyncio.open_unix_connection(path=str(sock_path)),
|
||||
timeout=timeout,
|
||||
)
|
||||
try:
|
||||
writer.write((json.dumps(message) + "\n").encode("utf-8"))
|
||||
await writer.drain()
|
||||
line = await asyncio.wait_for(reader.readline(), timeout=timeout)
|
||||
finally:
|
||||
writer.close()
|
||||
try:
|
||||
await writer.wait_closed()
|
||||
except Exception:
|
||||
pass
|
||||
if not line:
|
||||
raise AssertionError("daemon closed without reply")
|
||||
return json.loads(line.decode("utf-8"))
|
||||
|
||||
|
||||
async def _with_real_dispatcher(sock_path: Path, state: dict, coro_fn):
|
||||
"""Boot real serve_control_socket + real _dispatch_socket_request, run
|
||||
`coro_fn(sock_path, state)`, tear down cleanly.
|
||||
"""
|
||||
from iai_mcp.concurrency import ProcessLock, serve_control_socket
|
||||
|
||||
lock = ProcessLock(sock_path.parent / ".lock_inline")
|
||||
shutdown = asyncio.Event()
|
||||
server_task = asyncio.create_task(
|
||||
serve_control_socket(
|
||||
store=None,
|
||||
lock=lock,
|
||||
state=state,
|
||||
shutdown=shutdown,
|
||||
socket_path=sock_path,
|
||||
),
|
||||
)
|
||||
# Wait for bind.
|
||||
for _ in range(250):
|
||||
if sock_path.exists():
|
||||
break
|
||||
await asyncio.sleep(0.01)
|
||||
if not sock_path.exists():
|
||||
shutdown.set()
|
||||
await asyncio.wait_for(server_task, timeout=5)
|
||||
lock.close()
|
||||
raise AssertionError("socket never bound")
|
||||
|
||||
try:
|
||||
result = await coro_fn(sock_path, state)
|
||||
finally:
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(server_task, timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
lock.close()
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: status returns version + fsm_state + uptime + pending_digest shape
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_status_returns_version_and_full_snapshot(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
from iai_mcp import __version__ as pkg_version
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"daemon_started_at": "2026-04-18T00:00:00+00:00",
|
||||
"last_tick_at": "2026-04-18T12:30:00+00:00",
|
||||
"quiet_window": [44, 16],
|
||||
"pending_digest": {
|
||||
"rem_cycles_completed": 2,
|
||||
"episodes_processed": 15,
|
||||
"schemas_induced_tier0": 3,
|
||||
"claude_call_used": True,
|
||||
"main_insight_text": "deeply long verbose insight text " * 50,
|
||||
},
|
||||
"scheduler_paused": False,
|
||||
}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(sock_path, {"type": "status"})
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert resp["ok"] is True
|
||||
# backwards-compat keys.
|
||||
assert resp["state"] == "WAKE"
|
||||
assert isinstance(resp["uptime_sec"], (int, float))
|
||||
# Plan 04-gap-1 additions.
|
||||
assert resp["version"] == pkg_version
|
||||
assert resp["fsm_state"] == "WAKE"
|
||||
assert resp["last_tick_at"] == "2026-04-18T12:30:00+00:00"
|
||||
assert resp["quiet_window"] == [44, 16]
|
||||
assert resp["daemon_started_at"] == "2026-04-18T00:00:00+00:00"
|
||||
assert resp["scheduler_paused"] is False
|
||||
# pending_digest is truncated to top-level counters (no main_insight_text).
|
||||
pd = resp["pending_digest"]
|
||||
assert pd["rem_cycles_completed"] == 2
|
||||
assert pd["episodes_processed"] == 15
|
||||
assert pd["schemas_induced_tier0"] == 3
|
||||
assert pd["claude_call_used"] is True
|
||||
assert "main_insight_text" not in pd, (
|
||||
"truncated digest leaked verbose text over the socket"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: user_initiated_sleep persists state AND respects already_sleeping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_user_initiated_sleep_sets_pending_flag(short_socket_paths):
|
||||
_, sock_path, state_path = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(
|
||||
sock_path,
|
||||
{
|
||||
"type": "user_initiated_sleep",
|
||||
"reason": "I am going to bed",
|
||||
"ts": "2026-04-18T23:00:00+00:00",
|
||||
},
|
||||
)
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert resp == {"ok": True, "state": "TRANSITIONING"}
|
||||
|
||||
# State mutation persisted to disk.
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
req = loaded["user_sleep_request"]
|
||||
assert req["pending"] is True
|
||||
assert req["reason"] == "I am going to bed"
|
||||
assert req["ts"] == "2026-04-18T23:00:00+00:00"
|
||||
|
||||
|
||||
def test_user_initiated_sleep_rejects_when_already_sleeping(short_socket_paths):
|
||||
_, sock_path, state_path = short_socket_paths
|
||||
state = {"fsm_state": "DREAMING"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(
|
||||
sock_path,
|
||||
{
|
||||
"type": "user_initiated_sleep",
|
||||
"reason": "redundant",
|
||||
"ts": "2026-04-18T23:00:00+00:00",
|
||||
},
|
||||
)
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert resp == {"ok": False, "reason": "already_sleeping"}
|
||||
|
||||
# State was NOT mutated (no user_sleep_request written).
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
# The dispatcher doesn't touch state in the already_sleeping branch, so
|
||||
# the file may not exist (no prior save_state call). Either way: no flag.
|
||||
assert "user_sleep_request" not in loaded
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: force_wake / force_rem set pending flags + persist
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_force_wake_queues_flag(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "DREAMING"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(
|
||||
sock_path,
|
||||
{"type": "force_wake", "ts": "2026-04-18T23:45:00+00:00"},
|
||||
)
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
assert resp == {"ok": True, "reason": "wake_queued"}
|
||||
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
assert loaded["force_wake_request"]["pending"] is True
|
||||
assert loaded["force_wake_request"]["ts"] == "2026-04-18T23:45:00+00:00"
|
||||
|
||||
|
||||
def test_force_rem_queues_flag(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(
|
||||
sock_path,
|
||||
{"type": "force_rem", "ts": "2026-04-18T10:00:00+00:00"},
|
||||
)
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
assert resp == {"ok": True, "reason": "rem_queued"}
|
||||
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
assert loaded["force_rem_request"]["pending"] is True
|
||||
assert loaded["force_rem_request"]["ts"] == "2026-04-18T10:00:00+00:00"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: pause/resume flip scheduler_paused flag
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_pause_then_resume_flips_flag(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
r1 = await _send_ndjson(sock_path, {"type": "pause"})
|
||||
r2 = await _send_ndjson(sock_path, {"type": "resume"})
|
||||
return r1, r2
|
||||
|
||||
r1, r2 = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert r1 == {"ok": True, "paused": True}
|
||||
assert r2 == {"ok": True, "paused": False}
|
||||
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
# After resume, scheduler_paused must be False (the LAST value written).
|
||||
assert loaded["scheduler_paused"] is False
|
||||
|
||||
|
||||
def test_pause_persists_True_before_resume(short_socket_paths):
|
||||
"""After only pause (no resume yet), state["scheduler_paused"] is True."""
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(sock_path, {"type": "pause"})
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
assert resp == {"ok": True, "paused": True}
|
||||
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
assert loaded["scheduler_paused"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: unknown type returns structured error
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_unknown_message_type_returns_error(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(
|
||||
sock_path,
|
||||
{"type": "nuke_from_orbit", "ts": "whatever"},
|
||||
)
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert resp["ok"] is False
|
||||
assert resp["reason"] == "unknown_message_type"
|
||||
assert resp["type"] == "nuke_from_orbit"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 6: invalid messages rejected with ASVS V5 reason code
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_invalid_message_missing_ts_on_force_wake(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(sock_path, {"type": "force_wake"})
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert resp["ok"] is False
|
||||
assert resp["reason"] == "invalid_message"
|
||||
assert "ts" in resp["error"]
|
||||
|
||||
|
||||
def test_invalid_message_wrong_type_user_sleep(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(
|
||||
sock_path,
|
||||
{"type": "user_initiated_sleep", "reason": 42, "ts": "x"},
|
||||
)
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert resp["ok"] is False
|
||||
assert resp["reason"] == "invalid_message"
|
||||
assert "reason" in resp["error"]
|
||||
|
||||
|
||||
def test_invalid_message_non_string_type(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(sock_path, {"type": 42})
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
assert resp["ok"] is False
|
||||
assert resp["reason"] == "invalid_message"
|
||||
|
||||
|
||||
def test_invalid_message_pause_wrong_seconds_type(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(sock_path, {"type": "pause", "seconds": "forever"})
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
assert resp["ok"] is False
|
||||
assert resp["reason"] == "invalid_message"
|
||||
assert "seconds" in resp["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 7: C2 guard -- dispatcher never transitions FSM directly
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dispatcher_does_not_transition_fsm_directly(short_socket_paths):
|
||||
"""C2: the socket dispatcher thread never calls daemon.transition().
|
||||
user_initiated_sleep sets a pending flag; the FSM stays at WAKE until
|
||||
the scheduler tick picks up the flag. Without this invariant, the
|
||||
dispatcher and scheduler race on the FSM state.
|
||||
"""
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
await _send_ndjson(
|
||||
sock_path,
|
||||
{
|
||||
"type": "user_initiated_sleep",
|
||||
"reason": "night",
|
||||
"ts": "2026-04-18T23:00:00+00:00",
|
||||
},
|
||||
)
|
||||
return state["fsm_state"]
|
||||
|
||||
fsm_after = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
# The dispatcher MUST leave fsm_state at WAKE; only the scheduler
|
||||
# transitions it (under the fcntl exclusive lock).
|
||||
assert fsm_after == "WAKE"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 8: reason string clipped to 500 chars (ASVS V5 output hardening)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_user_initiated_sleep_reason_clipped(short_socket_paths):
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
long_reason = "x" * 5000
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
return await _send_ndjson(
|
||||
sock_path,
|
||||
{
|
||||
"type": "user_initiated_sleep",
|
||||
"reason": long_reason,
|
||||
"ts": "2026-04-18T23:00:00+00:00",
|
||||
},
|
||||
)
|
||||
|
||||
resp = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
assert resp == {"ok": True, "state": "TRANSITIONING"}
|
||||
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
assert len(loaded["user_sleep_request"]["reason"]) == 500
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 9: concurrent clients handled without data races
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_concurrent_clients_both_succeed(short_socket_paths):
|
||||
"""Two clients hit the socket in parallel -- the dispatcher must serve
|
||||
both without corrupting the state file or double-writing."""
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {"fsm_state": "WAKE"}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
# Issue two requests concurrently.
|
||||
coro1 = _send_ndjson(
|
||||
sock_path,
|
||||
{"type": "force_rem", "ts": "2026-04-18T01:00:00+00:00"},
|
||||
)
|
||||
coro2 = _send_ndjson(sock_path, {"type": "pause"})
|
||||
results = await asyncio.gather(coro1, coro2)
|
||||
return results
|
||||
|
||||
r1, r2 = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
# Both responses well-formed; dispatcher handled each independently.
|
||||
assert r1 == {"ok": True, "reason": "rem_queued"}
|
||||
assert r2 == {"ok": True, "paused": True}
|
||||
|
||||
# Both state mutations persisted.
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
assert loaded["force_rem_request"]["pending"] is True
|
||||
assert loaded["scheduler_paused"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 10: full suite hitting all 6 message types against one daemon
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_full_message_type_matrix_end_to_end(short_socket_paths):
|
||||
"""Single live daemon instance serves all 6 message types sequentially.
|
||||
Mirrors what the CLI + MCP wrapper do in production.
|
||||
"""
|
||||
_, sock_path, _ = short_socket_paths
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"daemon_started_at": "2026-04-18T00:00:00+00:00",
|
||||
}
|
||||
|
||||
async def _runner(sock_path, state):
|
||||
out = {}
|
||||
out["status"] = await _send_ndjson(sock_path, {"type": "status"})
|
||||
out["user_initiated_sleep"] = await _send_ndjson(
|
||||
sock_path,
|
||||
{
|
||||
"type": "user_initiated_sleep",
|
||||
"reason": "bedtime",
|
||||
"ts": "2026-04-18T23:30:00+00:00",
|
||||
},
|
||||
)
|
||||
out["force_rem"] = await _send_ndjson(
|
||||
sock_path,
|
||||
{"type": "force_rem", "ts": "2026-04-18T23:31:00+00:00"},
|
||||
)
|
||||
out["force_wake"] = await _send_ndjson(
|
||||
sock_path,
|
||||
{"type": "force_wake", "ts": "2026-04-18T23:32:00+00:00"},
|
||||
)
|
||||
out["pause"] = await _send_ndjson(sock_path, {"type": "pause"})
|
||||
out["resume"] = await _send_ndjson(sock_path, {"type": "resume"})
|
||||
return out
|
||||
|
||||
results = asyncio.run(_with_real_dispatcher(sock_path, state, _runner))
|
||||
|
||||
assert results["status"]["ok"] is True
|
||||
assert results["status"]["fsm_state"] == "WAKE"
|
||||
assert results["user_initiated_sleep"] == {"ok": True, "state": "TRANSITIONING"}
|
||||
assert results["force_rem"] == {"ok": True, "reason": "rem_queued"}
|
||||
assert results["force_wake"] == {"ok": True, "reason": "wake_queued"}
|
||||
assert results["pause"] == {"ok": True, "paused": True}
|
||||
assert results["resume"] == {"ok": True, "paused": False}
|
||||
|
||||
# All mutations land in the ONE state file.
|
||||
from iai_mcp.daemon_state import load_state
|
||||
loaded = load_state()
|
||||
assert loaded["user_sleep_request"]["pending"] is True
|
||||
assert loaded["force_rem_request"]["pending"] is True
|
||||
assert loaded["force_wake_request"]["pending"] is True
|
||||
# scheduler_paused was toggled last via resume -> False.
|
||||
assert loaded["scheduler_paused"] is False
|
||||
281
tests/test_daemon_no_silent_zero_exit.py
Normal file
281
tests/test_daemon_no_silent_zero_exit.py
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
"""Phase 10.6 Plan 10.6-01 Task 1.8 -- rewritten contract tests.
|
||||
|
||||
Old contract (Phase 07.8 + bug-fix 2026-05-01):
|
||||
Every non-RSS, non-user shutdown path returned exit 75. The
|
||||
`user_requested_shutdown` sentinel + `_resolve_shutdown_exit_code`
|
||||
helper differentiated explicit `iai-mcp daemon stop` (exit 0,
|
||||
plist suppresses respawn) from every other shutdown path
|
||||
(exit 75, plist respawns).
|
||||
|
||||
New contract:
|
||||
Daemon main() exits 0 uniformly on graceful shutdown, regardless
|
||||
of who triggered it. The plist's `KeepAlive={"Crashed": true}`
|
||||
ensures graceful exit 0 stays DEAD until wrapper kickstart fires.
|
||||
Only path returning a non-zero exit is `LifecycleLockConflict`
|
||||
(a same-host live-PID conflict) which returns 1.
|
||||
|
||||
Cross-process invariant PRESERVED from 541c874:
|
||||
The CLI `iai-mcp daemon stop` runs in a SEPARATE process from
|
||||
the daemon. CLI writes the `user_requested_shutdown=True`
|
||||
sentinel to `.daemon-state.json` BEFORE sending SIGTERM. The
|
||||
daemon's main() finally block calls
|
||||
`_clear_user_shutdown_sentinel(state)` which:
|
||||
1. Reads the on-disk state file (the source of truth, since
|
||||
the in-memory state was loaded at boot).
|
||||
2. Pops the sentinel from disk + memory.
|
||||
3. Re-saves the cleaned state record.
|
||||
|
||||
The sentinel is now informational rather than control: its presence
|
||||
on disk no longer changes the exit code. Tests E + F still verify
|
||||
the CLI write-before-SIGTERM ordering -- that ordering is what
|
||||
makes the daemon's later cleanup symmetric across boots.
|
||||
|
||||
Validates: WAKE-14.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import cli as cli_mod
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp import daemon_state as state_mod
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test A -- _clear_user_shutdown_sentinel: clean state -> in-memory pop only
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_clear_sentinel_no_disk_flag(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""No sentinel on disk + no in-memory flag -> helper is a no-op.
|
||||
|
||||
Locks the regression where a clean shutdown without an explicit
|
||||
`iai-mcp daemon stop` must leave the on-disk record consistent
|
||||
(no spurious sentinel write, no exception).
|
||||
"""
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
|
||||
|
||||
state: dict = {"fsm_state": "WAKE", "daemon_pid": 12345}
|
||||
snapshot = dict(state)
|
||||
daemon_mod._clear_user_shutdown_sentinel(state)
|
||||
# In-memory dict shape is preserved (no spurious keys / drops).
|
||||
assert state == snapshot
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test B -- sentinel True on disk -> cleared from disk + memory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_clear_sentinel_true_on_disk(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""Production flow: CLI process wrote sentinel to disk; daemon
|
||||
clears it on graceful exit so it does not leak across boots.
|
||||
"""
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
|
||||
state_mod.save_state(
|
||||
{"user_requested_shutdown": True, "fsm_state": "WAKE"}
|
||||
)
|
||||
|
||||
daemon_in_memory: dict = {
|
||||
"fsm_state": "DREAMING",
|
||||
"daemon_pid": 999,
|
||||
# No "user_requested_shutdown" key here -- production reality.
|
||||
}
|
||||
daemon_mod._clear_user_shutdown_sentinel(daemon_in_memory)
|
||||
|
||||
# Disk-side sentinel is gone.
|
||||
on_disk = state_mod.load_state()
|
||||
assert "user_requested_shutdown" not in on_disk
|
||||
# In-memory dict picked up no spurious flag.
|
||||
assert "user_requested_shutdown" not in daemon_in_memory
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test C -- helper does not mutate unrelated keys
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_clear_sentinel_preserves_unrelated_keys(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""The helper does exactly one in-memory mutation
|
||||
(`state.pop(_USER_SHUTDOWN_FLAG, None)`). Any future refactor
|
||||
that adds drive-by mutations would silently drop fields like
|
||||
daemon_pid / fsm_state / pending_digest, which main()'s finally
|
||||
block depends on for the doctor / next-boot pipeline.
|
||||
"""
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
|
||||
state_mod.save_state({"user_requested_shutdown": True, "fsm_state": "WAKE"})
|
||||
|
||||
snapshot = {
|
||||
"fsm_state": "DREAMING",
|
||||
"daemon_pid": 42,
|
||||
"pending_digest": {"rem_cycles_completed": 79},
|
||||
"user_requested_shutdown": True,
|
||||
"fsm_transition_at": "2026-05-01T10:17:54+00:00",
|
||||
}
|
||||
state = dict(snapshot)
|
||||
daemon_mod._clear_user_shutdown_sentinel(state)
|
||||
expected = {
|
||||
k: v for k, v in snapshot.items() if k != "user_requested_shutdown"
|
||||
}
|
||||
assert state == expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test D -- read failure during shutdown is fail-safe (in-memory pop only)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_clear_sentinel_disk_read_failure_is_fail_safe(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""If load_state() raises (transient FS error / corrupt file),
|
||||
the helper must NOT propagate -- shutdown must always proceed.
|
||||
"""
|
||||
|
||||
def boom() -> dict:
|
||||
raise OSError("simulated transient read error")
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "load_state", boom)
|
||||
|
||||
state: dict = {"fsm_state": "WAKE", "user_requested_shutdown": True}
|
||||
daemon_mod._clear_user_shutdown_sentinel(state)
|
||||
# In-memory still gets popped even when disk read fails.
|
||||
assert "user_requested_shutdown" not in state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test E -- cmd_daemon_stop writes the sentinel BEFORE launchctl (macOS)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_e_cmd_daemon_stop_writes_sentinel_before_launchctl(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""Cross-process invariant from 541c874 PRESERVED:
|
||||
`iai-mcp daemon stop` writes user_requested_shutdown=True to
|
||||
.daemon-state.json BEFORE sending SIGTERM. The daemon's later
|
||||
`_clear_user_shutdown_sentinel` then cleans up. Phase 10.6
|
||||
no longer branches the exit code on the sentinel, but the
|
||||
write-before-SIGTERM ordering is still part of the wakeup-
|
||||
safe shutdown protocol (a hung CLI write must not delay the
|
||||
SIGTERM the user expects).
|
||||
"""
|
||||
monkeypatch.setattr(platform, "system", lambda: "Darwin")
|
||||
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
|
||||
|
||||
call_log: list[str] = []
|
||||
|
||||
real_save_state = state_mod.save_state
|
||||
|
||||
def tracking_save_state(state: dict) -> None:
|
||||
call_log.append(f"save_state:{state.get('user_requested_shutdown')}")
|
||||
real_save_state(state)
|
||||
|
||||
monkeypatch.setattr(state_mod, "save_state", tracking_save_state)
|
||||
|
||||
def fake_run(argv, **_kwargs):
|
||||
call_log.append(f"subprocess.run:{argv[0]}:{argv[1]}")
|
||||
return type("R", (), {"returncode": 0})()
|
||||
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", fake_run)
|
||||
|
||||
rc = cli_mod.main(["daemon", "stop"])
|
||||
assert rc == 0
|
||||
|
||||
import json as json_mod
|
||||
persisted = json_mod.loads(state_path.read_text())
|
||||
assert persisted.get("user_requested_shutdown") is True
|
||||
|
||||
assert call_log[0].startswith("save_state:True"), call_log
|
||||
assert any(
|
||||
entry.startswith("subprocess.run:launchctl") for entry in call_log
|
||||
), call_log
|
||||
save_idx = next(
|
||||
i for i, e in enumerate(call_log) if e.startswith("save_state:")
|
||||
)
|
||||
launchctl_idx = next(
|
||||
i for i, e in enumerate(call_log)
|
||||
if e.startswith("subprocess.run:launchctl")
|
||||
)
|
||||
assert save_idx < launchctl_idx, call_log
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test F -- cmd_daemon_stop writes the sentinel BEFORE systemctl (Linux)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_f_cmd_daemon_stop_writes_sentinel_before_systemctl(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
"""Linux variant of Test E. Same ordering invariant, different
|
||||
process-supervisor command.
|
||||
"""
|
||||
monkeypatch.setattr(platform, "system", lambda: "Linux")
|
||||
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(state_mod, "STATE_PATH", state_path, raising=True)
|
||||
|
||||
call_log: list[str] = []
|
||||
|
||||
real_save_state = state_mod.save_state
|
||||
|
||||
def tracking_save_state(state: dict) -> None:
|
||||
call_log.append(f"save_state:{state.get('user_requested_shutdown')}")
|
||||
real_save_state(state)
|
||||
|
||||
monkeypatch.setattr(state_mod, "save_state", tracking_save_state)
|
||||
|
||||
def fake_run(argv, **_kwargs):
|
||||
call_log.append(f"subprocess.run:{argv[0]}")
|
||||
return type("R", (), {"returncode": 0})()
|
||||
|
||||
monkeypatch.setattr(cli_mod.subprocess, "run", fake_run)
|
||||
|
||||
rc = cli_mod.main(["daemon", "stop"])
|
||||
assert rc == 0
|
||||
|
||||
import json as json_mod
|
||||
persisted = json_mod.loads(state_path.read_text())
|
||||
assert persisted.get("user_requested_shutdown") is True
|
||||
|
||||
save_idx = next(
|
||||
i for i, e in enumerate(call_log) if e.startswith("save_state:")
|
||||
)
|
||||
systemctl_idx = next(
|
||||
i for i, e in enumerate(call_log)
|
||||
if e.startswith("subprocess.run:systemctl")
|
||||
)
|
||||
assert save_idx < systemctl_idx, call_log
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test G -- _USER_SHUTDOWN_FLAG constant pinned (cross-process protocol)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_g_user_shutdown_flag_constant_is_stable() -> None:
|
||||
"""The CLI (separate process) and daemon both reference this
|
||||
string literal in different code paths; renaming it would silently
|
||||
break the cross-process protocol from 541c874.
|
||||
"""
|
||||
assert daemon_mod._USER_SHUTDOWN_FLAG == "user_requested_shutdown"
|
||||
207
tests/test_daemon_s4_first_iter_defer.py
Normal file
207
tests/test_daemon_s4_first_iter_defer.py
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
"""Phase 07.6 W1 / tests for the startup grace before the first
|
||||
`_s4_offline_loop` iteration.
|
||||
|
||||
Defends against the regression where a freshly-spawned daemon immediately
|
||||
runs the heavy S4 viability scan (sigma.compute_and_emit ->
|
||||
retrieve.build_runtime_graph -> runtime_graph_cache.save -> json.dumps),
|
||||
materialising a multi-GB intermediate Python string (CONTEXT.md D-01:
|
||||
py-spy 2026-04-29 PID 7959 RSS 7.6GB).
|
||||
|
||||
Project async-test idiom (mandatory): sync `def test_X(...)` body wraps
|
||||
`asyncio.run(_async_body(...))`. The project does NOT depend on
|
||||
`pytest-asyncio`; `@pytest.mark.asyncio` markers silently pass without
|
||||
running. See tests/test_cpu_watchdog.py:12, tests/test_cascade_no_block.py:11
|
||||
for the canonical pattern. The plan template prescribed pytest-asyncio
|
||||
markers; this file deviates (Rule 1 — fake-GREEN avoidance) per project
|
||||
precedent.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _fake_store():
|
||||
"""_s4_offline_loop only forwards `store` to s4.run_offline_pass and
|
||||
write_event; both are stubbed in these tests, so a SimpleNamespace
|
||||
placeholder is enough — never touches LanceDB.
|
||||
"""
|
||||
return SimpleNamespace()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: grace=0 fast-path — first iter runs within ≤100ms
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_grace_zero_runs_first_iter_within_100ms(monkeypatch):
|
||||
"""D-06 (a): grace=0 => stubbed run_offline_pass invoked within ≤100ms."""
|
||||
asyncio.run(_grace_zero_fast_path_body(monkeypatch))
|
||||
|
||||
|
||||
async def _grace_zero_fast_path_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 0.0)
|
||||
called = asyncio.Event()
|
||||
call_count = {"n": 0}
|
||||
|
||||
def _stub_run_offline_pass(_store):
|
||||
call_count["n"] += 1
|
||||
called.set()
|
||||
|
||||
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
|
||||
shutdown = asyncio.Event()
|
||||
store = _fake_store()
|
||||
t0 = time.monotonic()
|
||||
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
|
||||
try:
|
||||
await asyncio.wait_for(called.wait(), timeout=0.1)
|
||||
elapsed = time.monotonic() - t0
|
||||
assert elapsed <= 0.15, (
|
||||
f"first run_offline_pass took {elapsed*1000:.1f}ms; expected <=100ms "
|
||||
f"(plus ~50ms slack for to_thread schedule)"
|
||||
)
|
||||
finally:
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(task, timeout=1.0)
|
||||
except asyncio.TimeoutError:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
assert call_count["n"] >= 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: grace>0 deferred-path — no call before grace, ≥1 call after
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_grace_positive_defers_first_iter(monkeypatch):
|
||||
"""D-06 (b): grace=0.5 => no call before 0.4s; ≥1 call after 0.7s."""
|
||||
asyncio.run(_grace_positive_deferred_body(monkeypatch))
|
||||
|
||||
|
||||
async def _grace_positive_deferred_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 0.5)
|
||||
call_count = {"n": 0}
|
||||
|
||||
def _stub_run_offline_pass(_store):
|
||||
call_count["n"] += 1
|
||||
|
||||
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
|
||||
shutdown = asyncio.Event()
|
||||
store = _fake_store()
|
||||
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
|
||||
try:
|
||||
await asyncio.sleep(0.4)
|
||||
assert call_count["n"] == 0, (
|
||||
f"S4 ran before 0.5s grace elapsed: call_count={call_count['n']}"
|
||||
)
|
||||
# Total ~0.7s — past 0.5s grace + to_thread schedule slack.
|
||||
await asyncio.sleep(0.3)
|
||||
assert call_count["n"] >= 1, (
|
||||
f"S4 did not run after grace elapsed: call_count={call_count['n']}"
|
||||
)
|
||||
finally:
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(task, timeout=1.0)
|
||||
except asyncio.TimeoutError:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: shutdown during grace — clean return, no run, no exception
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_shutdown_during_grace_returns_cleanly(monkeypatch):
|
||||
"""shutdown set during grace => loop returns cleanly, 0 calls."""
|
||||
asyncio.run(_shutdown_during_grace_body(monkeypatch))
|
||||
|
||||
|
||||
async def _shutdown_during_grace_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 5.0)
|
||||
call_count = {"n": 0}
|
||||
|
||||
def _stub_run_offline_pass(_store):
|
||||
call_count["n"] += 1
|
||||
|
||||
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
|
||||
shutdown = asyncio.Event()
|
||||
store = _fake_store()
|
||||
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
|
||||
await asyncio.sleep(0.05)
|
||||
shutdown.set()
|
||||
# raises if loop did not return cleanly within 1s.
|
||||
await asyncio.wait_for(task, timeout=1.0)
|
||||
assert call_count["n"] == 0, (
|
||||
f"S4 ran despite shutdown during grace: call_count={call_count['n']}"
|
||||
)
|
||||
assert task.done(), "loop task did not finish"
|
||||
assert task.exception() is None, (
|
||||
f"loop raised during shutdown-in-grace: {task.exception()!r}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: existing s4_offline_pass_error event-emit preserved
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_run_offline_pass_error_still_emits_event(monkeypatch):
|
||||
"""Existing layered-defense preserved: run_offline_pass raises => write_event
|
||||
called with kind='s4_offline_pass_error' + severity='warning'.
|
||||
"""
|
||||
asyncio.run(_error_event_preserved_body(monkeypatch))
|
||||
|
||||
|
||||
async def _error_event_preserved_body(monkeypatch):
|
||||
import iai_mcp.daemon as daemon_mod
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "S4_FIRST_ITER_GRACE_SEC", 0.0)
|
||||
events: list[tuple[str, dict, str]] = []
|
||||
|
||||
def _stub_run_offline_pass(_store):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
def _stub_write_event(_store, kind, payload, severity="info", **_kwargs):
|
||||
events.append((kind, dict(payload) if isinstance(payload, dict) else payload, severity))
|
||||
|
||||
monkeypatch.setattr(daemon_mod.s4, "run_offline_pass", _stub_run_offline_pass)
|
||||
monkeypatch.setattr(daemon_mod, "write_event", _stub_write_event)
|
||||
shutdown = asyncio.Event()
|
||||
store = _fake_store()
|
||||
task = asyncio.create_task(daemon_mod._s4_offline_loop(store, shutdown))
|
||||
# Give the loop time to: enter while-body, hit run_offline_pass raise,
|
||||
# emit s4_offline_pass_error, then await the inter-iteration wait_for.
|
||||
await asyncio.sleep(0.1)
|
||||
shutdown.set()
|
||||
try:
|
||||
await asyncio.wait_for(task, timeout=1.0)
|
||||
except asyncio.TimeoutError:
|
||||
task.cancel()
|
||||
try:
|
||||
await task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
matching = [
|
||||
e for e in events
|
||||
if e[0] == "s4_offline_pass_error"
|
||||
and e[2] == "warning"
|
||||
and "boom" in str(e[1])
|
||||
]
|
||||
assert matching, f"expected s4_offline_pass_error event with severity=warning + 'boom' payload, got: {events}"
|
||||
213
tests/test_daemon_state.py
Normal file
213
tests/test_daemon_state.py
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
"""Tests for iai_mcp.daemon_state -- Task 2.
|
||||
|
||||
Covers:
|
||||
1. save_state atomically persists and load_state round-trips.
|
||||
2. File mode is 0o600.
|
||||
3. save_state is atomic under simulated mid-write failure (temp file unlinked).
|
||||
4. get_pending_digest returns + clears digest when > threshold elapsed.
|
||||
5. get_pending_digest returns None when <18h since last shown.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_state_path(tmp_path, monkeypatch):
|
||||
"""Redirect STATE_PATH to tmp_path for test isolation."""
|
||||
from iai_mcp import daemon_state
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
|
||||
return state_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1 + 2: roundtrip + 0o600
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_save_and_load_roundtrip_with_0600_mode(isolated_state_path):
|
||||
from iai_mcp.daemon_state import load_state, save_state
|
||||
|
||||
# Fresh load -> {}.
|
||||
assert load_state() == {}
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"daemon_started_at": "2026-04-18T00:00:00+00:00",
|
||||
"pending_digest": {"cycles": 4, "insight": "test"},
|
||||
}
|
||||
save_state(state)
|
||||
|
||||
# File exists, mode is 0o600.
|
||||
assert isolated_state_path.exists()
|
||||
mode = isolated_state_path.stat().st_mode & 0o777
|
||||
assert mode == 0o600, f"expected 0o600, got {oct(mode)}"
|
||||
|
||||
# load returns identical dict.
|
||||
loaded = load_state()
|
||||
assert loaded == state
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: atomic write via tempfile + os.replace
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_save_state_atomic_rename_preserves_old_on_failure(isolated_state_path, monkeypatch):
|
||||
"""If os.replace raises, the target file must remain untouched and the
|
||||
temp file must be cleaned up."""
|
||||
from iai_mcp.daemon_state import load_state, save_state
|
||||
|
||||
# Seed a known-good file.
|
||||
original = {"fsm_state": "WAKE", "version": 1}
|
||||
save_state(original)
|
||||
assert load_state() == original
|
||||
|
||||
# Patch os.replace to raise on the next call so the atomic swap fails.
|
||||
import iai_mcp.daemon_state as ds
|
||||
real_replace = os.replace
|
||||
|
||||
def _boom(src, dst):
|
||||
raise OSError("simulated swap failure")
|
||||
|
||||
monkeypatch.setattr(ds.os, "replace", _boom)
|
||||
|
||||
with pytest.raises(OSError):
|
||||
save_state({"fsm_state": "SLEEP", "version": 2})
|
||||
|
||||
# Original file preserved (atomic rename never happened).
|
||||
loaded = load_state()
|
||||
assert loaded == original
|
||||
|
||||
# Temp file cleaned up -- no leftover .tmp files in the directory.
|
||||
leftovers = list(isolated_state_path.parent.glob(".daemon-state.*.tmp"))
|
||||
assert leftovers == [], f"temp files not cleaned: {leftovers}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: pending digest returned after threshold window
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_pending_digest_returned_after_18h(isolated_state_path):
|
||||
from iai_mcp.daemon_state import (
|
||||
DIGEST_SHOW_THRESHOLD_HOURS,
|
||||
get_pending_digest,
|
||||
load_state,
|
||||
save_state,
|
||||
)
|
||||
assert DIGEST_SHOW_THRESHOLD_HOURS == 18
|
||||
|
||||
now = datetime(2026, 4, 18, 20, 0, tzinfo=timezone.utc)
|
||||
last_shown = now - timedelta(hours=20)
|
||||
state = {
|
||||
"last_digest_shown_at": last_shown.isoformat(),
|
||||
"pending_digest": {"cycles": 4, "insight": "after-threshold"},
|
||||
}
|
||||
save_state(state)
|
||||
|
||||
digest = get_pending_digest(state, now)
|
||||
assert digest == {"cycles": 4, "insight": "after-threshold"}
|
||||
|
||||
# State mutated and persisted: pending_digest cleared, last_digest_shown_at bumped.
|
||||
assert "pending_digest" not in state
|
||||
assert state["last_digest_shown_at"] == now.isoformat()
|
||||
|
||||
# Persisted to disk.
|
||||
on_disk = load_state()
|
||||
assert "pending_digest" not in on_disk
|
||||
assert on_disk["last_digest_shown_at"] == now.isoformat()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: digest withheld when <18h since last shown
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_pending_digest_withheld_before_18h(isolated_state_path):
|
||||
from iai_mcp.daemon_state import get_pending_digest
|
||||
|
||||
now = datetime(2026, 4, 18, 20, 0, tzinfo=timezone.utc)
|
||||
last_shown = now - timedelta(hours=4)
|
||||
state = {
|
||||
"last_digest_shown_at": last_shown.isoformat(),
|
||||
"pending_digest": {"cycles": 4, "insight": "too-early"},
|
||||
}
|
||||
digest = get_pending_digest(state, now)
|
||||
assert digest is None
|
||||
|
||||
# State preserved (digest still pending for later).
|
||||
assert state["pending_digest"] == {"cycles": 4, "insight": "too-early"}
|
||||
assert state["last_digest_shown_at"] == last_shown.isoformat()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Extra: no digest when state has no pending_digest
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_pending_digest_none_when_not_set(isolated_state_path):
|
||||
from iai_mcp.daemon_state import get_pending_digest
|
||||
|
||||
now = datetime(2026, 4, 18, 20, 0, tzinfo=timezone.utc)
|
||||
state: dict = {}
|
||||
assert get_pending_digest(state, now) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# prune_stale_first_turn: evicts legacy bool + aged ISO entries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_prune_evicts_legacy_bool_first_turn_pending():
|
||||
"""Legacy {sid: True} entries evict on first prune — they have no
|
||||
recoverable timestamp so we cannot age them sensibly."""
|
||||
from iai_mcp.daemon_state import prune_stale_first_turn
|
||||
|
||||
state = {"first_turn_pending": {"sess-1": True, "sess-2": False, "sess-3": True}}
|
||||
removed = prune_stale_first_turn(state)
|
||||
|
||||
assert removed == 3
|
||||
assert state["first_turn_pending"] == {}
|
||||
|
||||
|
||||
def test_prune_keeps_fresh_iso_entries_and_evicts_aged():
|
||||
"""ISO timestamps within TTL survive; older than TTL get evicted."""
|
||||
from iai_mcp.daemon_state import prune_stale_first_turn
|
||||
|
||||
now = datetime(2026, 4, 23, 12, 0, tzinfo=timezone.utc)
|
||||
fresh = (now - timedelta(hours=1)).isoformat()
|
||||
stale = (now - timedelta(hours=48)).isoformat()
|
||||
state = {"first_turn_pending": {"fresh": fresh, "stale": stale}}
|
||||
|
||||
removed = prune_stale_first_turn(state, now=now, ttl_hours=24)
|
||||
|
||||
assert removed == 1
|
||||
assert "fresh" in state["first_turn_pending"]
|
||||
assert "stale" not in state["first_turn_pending"]
|
||||
|
||||
|
||||
def test_prune_caps_max_entries_keeps_newest():
|
||||
"""Secondary cap: keep newest max_entries entries by timestamp."""
|
||||
from iai_mcp.daemon_state import prune_stale_first_turn
|
||||
|
||||
now = datetime(2026, 4, 23, 12, 0, tzinfo=timezone.utc)
|
||||
pending = {f"sess-{i}": (now - timedelta(minutes=i)).isoformat() for i in range(10)}
|
||||
state = {"first_turn_pending": pending}
|
||||
|
||||
removed = prune_stale_first_turn(state, now=now, ttl_hours=24, max_entries=3)
|
||||
|
||||
assert removed == 7
|
||||
kept = state["first_turn_pending"]
|
||||
assert len(kept) == 3
|
||||
# Newest three minutes (0, 1, 2) survive.
|
||||
assert set(kept.keys()) == {"sess-0", "sess-1", "sess-2"}
|
||||
|
||||
|
||||
def test_prune_handles_empty_and_missing_pending():
|
||||
"""Idempotent on empty / missing first_turn_pending."""
|
||||
from iai_mcp.daemon_state import prune_stale_first_turn
|
||||
|
||||
assert prune_stale_first_turn({}) == 0
|
||||
assert prune_stale_first_turn({"first_turn_pending": {}}) == 0
|
||||
assert prune_stale_first_turn({"first_turn_pending": None}) == 0
|
||||
403
tests/test_daemon_tick_flags.py
Normal file
403
tests/test_daemon_tick_flags.py
Normal file
|
|
@ -0,0 +1,403 @@
|
|||
"""Tests for _tick_body honoring socket control flags (Plan 04-gap-1).
|
||||
|
||||
The dispatcher (tests/test_daemon_dispatcher.py) proves the flags are
|
||||
SET correctly on the daemon state. These tests prove the scheduler
|
||||
READS those flags and acts on them:
|
||||
|
||||
- scheduler_paused=True -> _tick_body emits daemon_tick_skipped and
|
||||
returns without acquiring the lock.
|
||||
- user_sleep_request.pending=True + empty quiet_window -> _tick_body
|
||||
still bypasses the gate, enters SLEEP,
|
||||
clears the flag.
|
||||
- force_rem_request.pending=True -> ONE REM cycle runs out of schedule
|
||||
(total_cycles=1), flag cleared.
|
||||
- force_wake_request.pending=True set mid-night -> REM loop breaks
|
||||
early with daemon_yielded reason=
|
||||
force_wake_requested; flag cleared.
|
||||
|
||||
All REM cycles are mocked with a coroutine that sleeps 0.01s to avoid
|
||||
the real 15-minute cap + real consolidation pipeline.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tick_env(tmp_path, monkeypatch):
|
||||
"""Isolate LOCK_PATH / STATE_PATH to tmp_path; mock REM cycle.
|
||||
|
||||
Returns (store, lock, state_path, rem_calls_list).
|
||||
|
||||
`state_path` points at the tmp_path state file so tests can verify
|
||||
flag persistence via load_state().
|
||||
"""
|
||||
from iai_mcp import concurrency, daemon_state
|
||||
from iai_mcp.concurrency import ProcessLock
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
lock_path = tmp_path / ".lock"
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
|
||||
monkeypatch.setattr(concurrency, "LOCK_PATH", lock_path)
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
|
||||
|
||||
store = MemoryStore()
|
||||
|
||||
# Seed a single record so _store_is_empty returns False (we want the
|
||||
# scheduler to reach the flag-gate, not the empty-store shortcut).
|
||||
from iai_mcp.types import MemoryRecord
|
||||
from uuid import uuid4
|
||||
rec = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="semantic",
|
||||
literal_surface="seed record so the store is not empty",
|
||||
aaak_index="",
|
||||
embedding=[0.0] * store.embed_dim,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=1,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=datetime.now(timezone.utc),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
tags=[],
|
||||
language="en",
|
||||
)
|
||||
store.insert(rec)
|
||||
|
||||
lock = ProcessLock(lock_path)
|
||||
yield store, lock, state_path, tmp_path
|
||||
try:
|
||||
lock.release()
|
||||
except Exception:
|
||||
pass
|
||||
lock.close()
|
||||
|
||||
|
||||
async def _fast_rem_cycle(
|
||||
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
|
||||
):
|
||||
"""Stand-in for dream.run_rem_cycle -- completes in 0.01s."""
|
||||
await asyncio.sleep(0.01)
|
||||
return {
|
||||
"cycle": cycle_num,
|
||||
"summaries_created": 1,
|
||||
"schemas_induced": 0,
|
||||
"schema_candidates": 0,
|
||||
"claude_call_used": False,
|
||||
"main_insight_text": None,
|
||||
"timed_out": False,
|
||||
}
|
||||
|
||||
|
||||
def _window_covering_now() -> list[int]:
|
||||
"""A quiet_window [start_bucket, duration] that contains the current local time."""
|
||||
from iai_mcp.tz import load_user_tz
|
||||
tz = load_user_tz()
|
||||
now_local = datetime.now(timezone.utc).astimezone(tz)
|
||||
cur_bucket = (now_local.hour * 60 + now_local.minute) // 30
|
||||
start = (cur_bucket - 2) % 48
|
||||
return [start, 8]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: scheduler_paused=True short-circuits the tick
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_scheduler_paused_emits_skip_event_and_returns(tick_env, monkeypatch):
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp.daemon_state import load_state
|
||||
from iai_mcp.events import query_events
|
||||
|
||||
store, lock, state_path, tmp_path = tick_env
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"scheduler_paused": True,
|
||||
"quiet_window": _window_covering_now(),
|
||||
}
|
||||
|
||||
# If the body reaches the REM loop, this mock fails the test.
|
||||
monkeypatch.setattr(daemon_mod, "run_rem_cycle", AsyncMock(
|
||||
side_effect=AssertionError("REM loop must not run when paused")
|
||||
))
|
||||
|
||||
asyncio.run(daemon_mod._tick_body(store, lock, state))
|
||||
|
||||
# State reports the pause reason.
|
||||
assert state.get("last_tick_skipped_reason") == "paused"
|
||||
# Event recorded.
|
||||
events = query_events(store, kind="daemon_tick_skipped", limit=1)
|
||||
assert len(events) == 1
|
||||
assert events[0]["data"]["reason"] == "paused"
|
||||
# FSM stayed at WAKE.
|
||||
assert state["fsm_state"] == "WAKE"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: user_sleep_request bypasses quiet-window gate
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_user_sleep_request_bypasses_quiet_window(tick_env, monkeypatch):
|
||||
"""Empty quiet_window + no recent sessions should normally skip the tick
|
||||
(outside_window). A pending user_sleep_request must override that gate
|
||||
and actually run the REM loop + clear the flag.
|
||||
"""
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp.daemon_state import load_state
|
||||
|
||||
store, lock, state_path, tmp_path = tick_env
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"quiet_window": None, # Empty quiet window -- gate would normally skip.
|
||||
"user_sleep_request": {
|
||||
"reason": "I am going to bed now",
|
||||
"ts": "2026-04-18T23:00:00+00:00",
|
||||
"pending": True,
|
||||
},
|
||||
# Ensure the bootstrap idle check ALSO fails (recent session marker).
|
||||
"last_session_ts": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _fast_rem_cycle)
|
||||
# Skip quiet-window relearn path entirely.
|
||||
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
|
||||
|
||||
asyncio.run(daemon_mod._tick_body(store, lock, state))
|
||||
|
||||
# Flag cleared after honoring the request.
|
||||
assert state["user_sleep_request"]["pending"] is False
|
||||
assert "honored_at" in state["user_sleep_request"]
|
||||
# FSM returned to WAKE after the full cycle loop.
|
||||
assert state["fsm_state"] == "WAKE"
|
||||
# At least one cycle completed.
|
||||
assert state.get("last_completed_cycles", 0) >= 1
|
||||
|
||||
# State was persisted.
|
||||
loaded = load_state()
|
||||
assert loaded["user_sleep_request"]["pending"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: force_rem_request runs EXACTLY ONE REM cycle out of schedule
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_force_rem_request_runs_single_cycle(tick_env, monkeypatch):
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
|
||||
store, lock, state_path, tmp_path = tick_env
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"quiet_window": None,
|
||||
"force_rem_request": {
|
||||
"ts": "2026-04-18T10:00:00+00:00",
|
||||
"pending": True,
|
||||
},
|
||||
# rem_cycle_count=4 -- we want to confirm force_rem overrides this
|
||||
# with total_cycles=1 (NOT 4).
|
||||
"rem_cycle_count": 4,
|
||||
"last_session_ts": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
|
||||
cycle_calls: list[int] = []
|
||||
|
||||
async def _tracking_rem(
|
||||
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
|
||||
):
|
||||
cycle_calls.append(cycle_num)
|
||||
await asyncio.sleep(0.005)
|
||||
return {
|
||||
"cycle": cycle_num,
|
||||
"summaries_created": 0,
|
||||
"schemas_induced": 0,
|
||||
"schema_candidates": 0,
|
||||
"claude_call_used": False,
|
||||
"main_insight_text": None,
|
||||
"timed_out": False,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _tracking_rem)
|
||||
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
|
||||
|
||||
asyncio.run(daemon_mod._tick_body(store, lock, state))
|
||||
|
||||
# Exactly ONE cycle fired despite rem_cycle_count=4 being set.
|
||||
assert cycle_calls == [1], (
|
||||
f"force_rem must bound the loop to 1 cycle, got {cycle_calls}"
|
||||
)
|
||||
# Flag cleared.
|
||||
assert state["force_rem_request"]["pending"] is False
|
||||
assert state["fsm_state"] == "WAKE"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: force_wake_request mid-night breaks the REM loop early
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_force_wake_request_breaks_rem_loop_early(tick_env, monkeypatch):
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp.events import query_events
|
||||
|
||||
store, lock, state_path, tmp_path = tick_env
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"quiet_window": _window_covering_now(),
|
||||
"rem_cycle_count": 5,
|
||||
}
|
||||
|
||||
cycle_calls: list[int] = []
|
||||
|
||||
async def _rem_sets_force_wake_on_second_cycle(
|
||||
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
|
||||
):
|
||||
cycle_calls.append(cycle_num)
|
||||
await asyncio.sleep(0.005)
|
||||
# Halfway into the night, simulate the dispatcher flipping the flag.
|
||||
# The _tick_body loop checks force_wake_request.pending AFTER each
|
||||
# cycle completes -- so setting it on cycle 2 breaks before cycle 3.
|
||||
if cycle_num == 2:
|
||||
state["force_wake_request"] = {
|
||||
"ts": datetime.now(timezone.utc).isoformat(),
|
||||
"pending": True,
|
||||
}
|
||||
return {
|
||||
"cycle": cycle_num,
|
||||
"summaries_created": 0,
|
||||
"schemas_induced": 0,
|
||||
"schema_candidates": 0,
|
||||
"claude_call_used": False,
|
||||
"main_insight_text": None,
|
||||
"timed_out": False,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _rem_sets_force_wake_on_second_cycle)
|
||||
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
|
||||
|
||||
asyncio.run(daemon_mod._tick_body(store, lock, state))
|
||||
|
||||
# Loop broke after cycle 2; cycles 3/4/5 never ran.
|
||||
assert cycle_calls == [1, 2], (
|
||||
f"force_wake must break the loop after cycle 2, got {cycle_calls}"
|
||||
)
|
||||
# Flag cleared.
|
||||
assert state["force_wake_request"]["pending"] is False
|
||||
assert "honored_at" in state["force_wake_request"]
|
||||
# daemon_yielded event emitted with the correct reason.
|
||||
yield_events = query_events(store, kind="daemon_yielded", limit=5)
|
||||
reasons = [e["data"].get("reason") for e in yield_events]
|
||||
assert "force_wake_requested" in reasons, (
|
||||
f"expected force_wake_requested in {reasons}"
|
||||
)
|
||||
# FSM returned cleanly to WAKE.
|
||||
assert state["fsm_state"] == "WAKE"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: flags work under concurrent state changes (realistic race)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_user_sleep_plus_force_rem_still_bounds_one_cycle(tick_env, monkeypatch):
|
||||
"""If both user_sleep_request AND force_rem_request are pending (e.g.
|
||||
the user sent both MCP messages in quick succession), force_rem still
|
||||
constrains the loop to 1 cycle, and BOTH flags get cleared.
|
||||
"""
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
|
||||
store, lock, state_path, tmp_path = tick_env
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"quiet_window": None,
|
||||
"user_sleep_request": {
|
||||
"reason": "bedtime",
|
||||
"ts": "2026-04-18T23:00:00+00:00",
|
||||
"pending": True,
|
||||
},
|
||||
"force_rem_request": {
|
||||
"ts": "2026-04-18T23:00:01+00:00",
|
||||
"pending": True,
|
||||
},
|
||||
"rem_cycle_count": 4,
|
||||
}
|
||||
|
||||
cycle_calls: list[int] = []
|
||||
|
||||
async def _tracking_rem(
|
||||
store, cycle_num, total_cycles, session_id, *, is_last, claude_enabled,
|
||||
):
|
||||
cycle_calls.append(cycle_num)
|
||||
await asyncio.sleep(0.005)
|
||||
return {
|
||||
"cycle": cycle_num,
|
||||
"summaries_created": 0,
|
||||
"schemas_induced": 0,
|
||||
"schema_candidates": 0,
|
||||
"claude_call_used": False,
|
||||
"main_insight_text": None,
|
||||
"timed_out": False,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(daemon_mod, "run_rem_cycle", _tracking_rem)
|
||||
monkeypatch.setattr(daemon_mod, "should_relearn", lambda last, now: False)
|
||||
|
||||
asyncio.run(daemon_mod._tick_body(store, lock, state))
|
||||
|
||||
# force_rem bounded to 1 cycle even though rem_cycle_count=4.
|
||||
assert cycle_calls == [1]
|
||||
# Both pending flags cleared.
|
||||
assert state["user_sleep_request"]["pending"] is False
|
||||
assert state["force_rem_request"]["pending"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 6: paused=True state persisted AND surfaced via load_state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_paused_skip_persists_to_disk(tick_env, monkeypatch):
|
||||
"""save_state must persist scheduler_paused+last_tick_skipped_reason so
|
||||
a daemon restart observes the same state.
|
||||
"""
|
||||
from iai_mcp import daemon as daemon_mod
|
||||
from iai_mcp.daemon_state import load_state
|
||||
|
||||
store, lock, state_path, tmp_path = tick_env
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"scheduler_paused": True,
|
||||
}
|
||||
|
||||
asyncio.run(daemon_mod._tick_body(store, lock, state))
|
||||
|
||||
loaded = load_state()
|
||||
assert loaded["last_tick_skipped_reason"] == "paused"
|
||||
assert loaded["scheduler_paused"] is True
|
||||
# last_tick_at is an ISO string.
|
||||
datetime.fromisoformat(loaded["last_tick_at"])
|
||||
315
tests/test_data_integrity_soak.py
Normal file
315
tests/test_data_integrity_soak.py
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
"""Phase 07.9 W5 / — cross-cut data-integrity integration soak.
|
||||
|
||||
Exercises the W1-W4 hardening fixes *together* under load shapes that no
|
||||
per-wave unit test reaches. Each case maps 1:1 to the four CONTEXT.md
|
||||
D-05 sub-requirements:
|
||||
|
||||
1. provenance overflow round-trip under sustained load (W1 / D-01)
|
||||
2. capture drain partial-failure preserves evidence (W2 / D-02)
|
||||
3. graph-cache encryption round-trip + plaintext absence (W3 / D-03)
|
||||
4. anti-hits malformed edge does not crash recall (W4 / D-04)
|
||||
|
||||
All cases run against a real ``MemoryStore`` in tmp_path with a
|
||||
deterministic passphrase fallback (no keyring required).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# Deterministic passphrase so encryption paths work without a keyring
|
||||
# backend on this construction host.
|
||||
os.environ.setdefault("IAI_MCP_CRYPTO_PASSPHRASE", "test-soak-w5-passphrase")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Force keyring fail-backend so the passphrase fallback fires."""
|
||||
import keyring as _keyring
|
||||
|
||||
fake: dict[tuple[str, str], str] = {}
|
||||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||||
monkeypatch.setattr(
|
||||
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||||
)
|
||||
yield fake
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Case 1 — provenance overflow round-trip under sustained load (W1 / D-01)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_w5_provenance_overflow_sustained_load(tmp_path, monkeypatch):
|
||||
"""W5 / case 1: drive 10 batches into a queue sized for 2 in-memory
|
||||
slots while the worker is throttled. Assert zero pairs lost; the spill
|
||||
dir transient (drains to empty after release + flush)."""
|
||||
from iai_mcp.provenance_queue import ProvenanceWriteQueue
|
||||
from iai_mcp.store import MemoryStore
|
||||
from tests.test_store import _make as _make_record
|
||||
|
||||
# Init store BEFORE redirecting HOME so MemoryStore uses the real
|
||||
# keyring resolver path (then falls through to the passphrase since
|
||||
# the keyring fail-backend is monkeypatched). Spill dir under HOME
|
||||
# is exactly what we want isolated to tmp.
|
||||
store = MemoryStore(path=tmp_path / "store")
|
||||
r = _make_record()
|
||||
store.insert(r)
|
||||
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
flushed: list = []
|
||||
release = threading.Event()
|
||||
real_batch = store.append_provenance_batch
|
||||
|
||||
def slow_batch(pairs, records_cache=None):
|
||||
release.wait(timeout=15.0)
|
||||
flushed.extend(pairs)
|
||||
return real_batch(pairs, records_cache=records_cache)
|
||||
|
||||
store.append_provenance_batch = slow_batch # type: ignore[method-assign]
|
||||
|
||||
q = ProvenanceWriteQueue(
|
||||
store, coalesce_ms=10, max_queue_size=2, max_batch_pairs=1,
|
||||
)
|
||||
q.start()
|
||||
try:
|
||||
for i in range(10):
|
||||
q.enqueue([(r.id, {
|
||||
"ts": f"t{i}", "cue": f"sustained-{i}", "session_id": "soak",
|
||||
})])
|
||||
# Some spilled by now.
|
||||
time.sleep(0.15)
|
||||
overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
|
||||
spilled = list(overflow_dir.glob("*.jsonl"))
|
||||
assert len(spilled) >= 1, (
|
||||
f"expected ≥1 spilled file under sustained overload; got {spilled}"
|
||||
)
|
||||
|
||||
# Release the worker — drains in-memory items first.
|
||||
release.set()
|
||||
|
||||
# Production: the worker's idle-poll picks up the spill dir
|
||||
# every _WORKER_IDLE_POLL_S (5s) when _q is empty. For test
|
||||
# speed we drive the drain explicitly via the internal helper
|
||||
# — same code path the worker uses on its idle tick.
|
||||
deadline = time.time() + 15.0
|
||||
while time.time() < deadline:
|
||||
# First let the worker drain whatever's currently in _q.
|
||||
q.flush(timeout=2.0)
|
||||
# Then explicitly re-enqueue any spilled files. The worker
|
||||
# will pull them on the next get() in its outer loop.
|
||||
q._drain_overflow_dir()
|
||||
q.flush(timeout=2.0)
|
||||
if not list(overflow_dir.glob("*.jsonl")):
|
||||
break
|
||||
time.sleep(0.05)
|
||||
finally:
|
||||
q.stop()
|
||||
|
||||
cues = [p[1]["cue"] for p in flushed]
|
||||
assert sorted(cues) == [f"sustained-{i}" for i in range(10)], (
|
||||
f"MEM-05 violated: expected all 10 cues exactly once; got {sorted(cues)}"
|
||||
)
|
||||
overflow_dir = tmp_path / ".iai-mcp" / ".provenance-overflow"
|
||||
assert list(overflow_dir.glob("*.jsonl")) == []
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Case 2 — capture drain partial-failure preserves evidence (W2 / D-02)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_w5_capture_drain_partial_failure_preserves_evidence(tmp_path, monkeypatch):
|
||||
"""W5 / case 2: a deferred file with a mixed-success transcript
|
||||
is renamed .failed-<ts>.jsonl when any event hits insert-failed:*.
|
||||
Pre-07.9 the file was unlinked with the events permanently lost."""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / ".iai-mcp" / "lance"))
|
||||
|
||||
deferred = tmp_path / ".iai-mcp" / ".deferred-captures"
|
||||
deferred.mkdir(parents=True)
|
||||
fpath = deferred / "soak-mixed-1.jsonl"
|
||||
fpath.write_text(
|
||||
json.dumps({
|
||||
"version": 1,
|
||||
"deferred_at": "2026-04-30T00:00:00Z",
|
||||
"session_id": "soak-2",
|
||||
"cwd": "/tmp",
|
||||
}) + "\n"
|
||||
+ json.dumps({
|
||||
"cue": "good a", "text": "first valid event with ample length here",
|
||||
"tier": "episodic", "role": "user",
|
||||
}) + "\n"
|
||||
+ json.dumps({
|
||||
"cue": "poison", "text": "INSERT_FAIL_SENTINEL_W5_SOAK middle event",
|
||||
"tier": "episodic", "role": "user",
|
||||
}) + "\n"
|
||||
+ json.dumps({
|
||||
"cue": "good b", "text": "third valid event with sufficient text",
|
||||
"tier": "episodic", "role": "user",
|
||||
}) + "\n"
|
||||
)
|
||||
|
||||
real_insert = MemoryStore.insert
|
||||
|
||||
def insert_or_fail(self, rec):
|
||||
if "INSERT_FAIL_SENTINEL_W5_SOAK" in rec.literal_surface:
|
||||
raise RuntimeError("simulated lance failure at soak")
|
||||
return real_insert(self, rec)
|
||||
|
||||
monkeypatch.setattr(MemoryStore, "insert", insert_or_fail)
|
||||
|
||||
store = MemoryStore()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
assert not fpath.exists()
|
||||
failed = list(deferred.glob("soak-mixed-1.failed-*.jsonl"))
|
||||
assert len(failed) == 1, (
|
||||
f"expected 1 .failed-* file; got {failed} "
|
||||
f"(deferred contents: {list(deferred.iterdir())})"
|
||||
)
|
||||
assert counts["events_inserted"] == 2, counts
|
||||
assert counts["events_skipped_insert_failed"] == 1, counts
|
||||
assert counts["files_drained"] == 0, counts
|
||||
assert counts["files_failed"] == 1, counts
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Case 3 — graph-cache encryption round-trip + plaintext absence (W3 / D-03)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_w5_graph_cache_encryption_no_plaintext_canary(tmp_path):
|
||||
"""W5 / case 3: save() with surface containing a canary; the
|
||||
canary must NOT appear anywhere in the on-disk bytes; try_load
|
||||
decrypts back to the original surface byte-for-byte."""
|
||||
from iai_mcp import runtime_graph_cache
|
||||
from iai_mcp.community import CommunityAssignment
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path / "lancedb")
|
||||
store.root = tmp_path # cache file under tmp_path
|
||||
|
||||
rid = uuid4()
|
||||
canary = "PLAINTEXT_CANARY_W5_SOAK_aaak_07_9"
|
||||
node_payload = {
|
||||
str(rid): {
|
||||
"embedding": [0.1] * 384,
|
||||
"surface": canary,
|
||||
"centrality": 0.3,
|
||||
"tier": "episodic",
|
||||
"pinned": False,
|
||||
"tags": [],
|
||||
"language": "en",
|
||||
}
|
||||
}
|
||||
assignment = CommunityAssignment(
|
||||
node_to_community={rid: rid},
|
||||
community_centroids={rid: [0.1] * 384},
|
||||
modularity=0.4,
|
||||
backend="leiden",
|
||||
top_communities=[rid],
|
||||
mid_regions={rid: [rid]},
|
||||
)
|
||||
rich_club = [rid]
|
||||
|
||||
ok = runtime_graph_cache.save(
|
||||
store, assignment, rich_club,
|
||||
node_payload=node_payload, max_degree=2,
|
||||
)
|
||||
assert ok is True
|
||||
|
||||
cache_path = tmp_path / "runtime_graph_cache.json"
|
||||
raw_bytes = cache_path.read_bytes()
|
||||
assert canary.encode("utf-8") not in raw_bytes, (
|
||||
"plaintext canary leaked into the on-disk sidecar"
|
||||
)
|
||||
assert raw_bytes.startswith(b"iai:enc:v1:")
|
||||
|
||||
loaded = runtime_graph_cache.try_load(store)
|
||||
assert loaded is not None
|
||||
_, _, payload, _ = loaded
|
||||
assert payload[str(rid)]["surface"] == canary
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Case 4 — anti-hits malformed edge does not crash recall (W4 / D-04)
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def test_w5_recall_survives_malformed_anti_edge(tmp_path):
|
||||
"""W5 / case 4: end-to-end through _find_anti_hits with one
|
||||
valid + one malformed contradicts edge. The recall pipeline must
|
||||
survive; the valid anti-hit surfaces; the skip is logged."""
|
||||
from iai_mcp.graph import MemoryGraph
|
||||
from iai_mcp.pipeline import _find_anti_hits
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import EMBED_DIM, MemoryHit, MemoryRecord
|
||||
|
||||
store = MemoryStore(path=tmp_path / "lancedb")
|
||||
|
||||
rid_hit = uuid4()
|
||||
rid_anti = uuid4()
|
||||
now = datetime.now(timezone.utc)
|
||||
for rid, surface in [(rid_hit, "primary"), (rid_anti, "anti")]:
|
||||
store.insert(MemoryRecord(
|
||||
id=rid, tier="episodic", literal_surface=surface,
|
||||
aaak_index="", embedding=[0.1] * EMBED_DIM,
|
||||
community_id=None, centrality=0.0, detail_level=2,
|
||||
pinned=False, stability=0.0, difficulty=0.0,
|
||||
last_reviewed=None, never_decay=False, never_merge=False,
|
||||
provenance=[], created_at=now, updated_at=now,
|
||||
tags=[], language="en",
|
||||
))
|
||||
|
||||
edges = store.db.open_table("edges")
|
||||
edges.add([
|
||||
{"src": str(rid_hit), "dst": str(rid_anti),
|
||||
"edge_type": "contradicts", "weight": 1.0,
|
||||
"updated_at": now},
|
||||
{"src": str(rid_hit), "dst": "not-a-uuid-soak",
|
||||
"edge_type": "contradicts", "weight": 1.0,
|
||||
"updated_at": now},
|
||||
])
|
||||
|
||||
hit = MemoryHit(
|
||||
record_id=rid_hit, score=0.9, reason="soak",
|
||||
literal_surface="primary", adjacent_suggestions=[],
|
||||
)
|
||||
|
||||
caplog_records: list = []
|
||||
|
||||
class _Capture(logging.Handler):
|
||||
def emit(self, record):
|
||||
caplog_records.append(record.getMessage())
|
||||
|
||||
handler = _Capture(level=logging.WARNING)
|
||||
logging.getLogger("iai_mcp.pipeline").addHandler(handler)
|
||||
try:
|
||||
anti = _find_anti_hits(
|
||||
[hit], store, MemoryGraph(), k=3, records_cache=None,
|
||||
)
|
||||
finally:
|
||||
logging.getLogger("iai_mcp.pipeline").removeHandler(handler)
|
||||
|
||||
assert len(anti) == 1
|
||||
assert anti[0].record_id == rid_anti
|
||||
assert any("anti_hits_skip_malformed_edge" in m for m in caplog_records), (
|
||||
f"expected log line; got {caplog_records}"
|
||||
)
|
||||
108
tests/test_delta_encoding.py
Normal file
108
tests/test_delta_encoding.py
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
"""Tests for TOK-08 delta encoding (Plan 02-04 Task 2, D-28).
|
||||
|
||||
Hash each session-start component (L0, L1, L2, rich_club). Subsequent turns
|
||||
send only changed components; unchanged ones are represented by their hash.
|
||||
On hash miss, fall back to full payload.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_hash_component_deterministic():
|
||||
from iai_mcp.delta import hash_component
|
||||
|
||||
a = hash_component("hello world")
|
||||
b = hash_component("hello world")
|
||||
c = hash_component("hello world!")
|
||||
assert a == b
|
||||
assert a != c
|
||||
|
||||
|
||||
def test_hash_component_returns_hex_string():
|
||||
from iai_mcp.delta import hash_component
|
||||
|
||||
h = hash_component("test")
|
||||
assert isinstance(h, str)
|
||||
# sha256 truncated to 16 chars per plan
|
||||
assert len(h) == 16
|
||||
# Must be valid hex.
|
||||
int(h, 16)
|
||||
|
||||
|
||||
def test_build_delta_first_session_returns_full_payload():
|
||||
from iai_mcp.delta import build_delta
|
||||
|
||||
payload = {
|
||||
"l0": "identity",
|
||||
"l1": "critical facts",
|
||||
"l2": ["community a", "community b"],
|
||||
"rich_club": "hubs",
|
||||
}
|
||||
delta, new_hashes = build_delta({}, payload)
|
||||
# First session: delta must contain every component.
|
||||
assert "l0" in delta
|
||||
assert "l1" in delta
|
||||
assert "l2" in delta
|
||||
assert "rich_club" in delta
|
||||
# And hashes for every component.
|
||||
for k in ("l0", "l1", "l2", "rich_club"):
|
||||
assert k in new_hashes
|
||||
|
||||
|
||||
def test_build_delta_unchanged_is_empty():
|
||||
from iai_mcp.delta import build_delta, hash_component
|
||||
|
||||
payload = {
|
||||
"l0": "identity",
|
||||
"l1": "critical facts",
|
||||
"l2": ["community a"],
|
||||
"rich_club": "hubs",
|
||||
}
|
||||
_first, hashes = build_delta({}, payload)
|
||||
# Second call with same payload: delta should be empty.
|
||||
delta2, _hashes2 = build_delta(hashes, payload)
|
||||
assert delta2 == {}
|
||||
|
||||
|
||||
def test_build_delta_partial_change():
|
||||
from iai_mcp.delta import build_delta
|
||||
|
||||
payload_a = {
|
||||
"l0": "identity",
|
||||
"l1": "critical facts",
|
||||
"l2": ["community a"],
|
||||
"rich_club": "hubs",
|
||||
}
|
||||
_first, hashes = build_delta({}, payload_a)
|
||||
payload_b = dict(payload_a)
|
||||
payload_b["l2"] = ["community a", "community b"]
|
||||
delta, new_hashes = build_delta(hashes, payload_b)
|
||||
assert "l2" in delta
|
||||
assert "l0" not in delta
|
||||
assert "l1" not in delta
|
||||
assert "rich_club" not in delta
|
||||
|
||||
|
||||
def test_apply_delta_reconstructs():
|
||||
from iai_mcp.delta import apply_delta, build_delta
|
||||
|
||||
base = {"l0": "a", "l1": "b", "l2": ["x"], "rich_club": "c"}
|
||||
_first, hashes = build_delta({}, base)
|
||||
# A second payload where only l0 changed
|
||||
new = {"l0": "z", "l1": "b", "l2": ["x"], "rich_club": "c"}
|
||||
delta, _ = build_delta(hashes, new)
|
||||
reconstructed = apply_delta(base, delta)
|
||||
assert reconstructed == new
|
||||
|
||||
|
||||
def test_delta_on_hash_miss_returns_full_component():
|
||||
"""Caller's stale hash -> delta contains the full component."""
|
||||
from iai_mcp.delta import build_delta
|
||||
|
||||
stale = {"l0": "deadbeef00000000", "l1": "cafebabe00000000"}
|
||||
payload = {"l0": "new", "l1": "facts", "l2": [], "rich_club": ""}
|
||||
delta, _ = build_delta(stale, payload)
|
||||
assert "l0" in delta
|
||||
assert delta["l0"] == "new"
|
||||
assert "l1" in delta
|
||||
453
tests/test_doctor.py
Normal file
453
tests/test_doctor.py
Normal file
|
|
@ -0,0 +1,453 @@
|
|||
"""Phase 10.4 — regression tests for doctor rows (m) and (n).
|
||||
|
||||
Tests cover:
|
||||
- (m) heartbeat scanner row with fresh wrappers + empty wrappers dir.
|
||||
- (n) HID idle source row in the macOS-tools-available case + the
|
||||
fallback case where ``ioreg`` is missing (cross-OS portability).
|
||||
|
||||
The CONTEXT 10.4 specification requires:
|
||||
- Row (m): PASS if wrappers dir readable; display "n=X fresh, Y stale,
|
||||
Z orphan".
|
||||
- Row (n): PASS if ``available_signals`` includes ``"HIDIdleTime"``;
|
||||
WARN otherwise; display includes HID idle seconds + pmset state.
|
||||
|
||||
All subprocess interactions in this file are mocked so the suite is
|
||||
deterministic and runs on non-macOS hosts as well (real ioreg / pmset
|
||||
calls would make the suite host-dependent).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.idle_detector import IdleStatus
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- fixtures
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wrappers_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""``IAI_MCP_STORE`` -> tmp_path; ensure ``<root>/wrappers/`` exists.
|
||||
|
||||
The doctor row (m) resolves the wrappers dir from ``IAI_MCP_STORE``
|
||||
(test isolation pattern carried from check_i). Returns the wrappers
|
||||
subdirectory so tests can drop heartbeat fixtures directly.
|
||||
"""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
wdir = tmp_path / "wrappers"
|
||||
wdir.mkdir(parents=True)
|
||||
return wdir
|
||||
|
||||
|
||||
def _write_fresh_heartbeat(wrappers_dir: Path, pid: int, uuid: str) -> Path:
|
||||
"""Drop a heartbeat file with a current PID and now() timestamp.
|
||||
|
||||
Uses ``os.getpid()`` by default so ``_is_pid_alive`` returns True
|
||||
deterministically — caller can override with a known-dead PID.
|
||||
"""
|
||||
now = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
path = wrappers_dir / f"heartbeat-{pid}-{uuid}.json"
|
||||
path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"pid": pid,
|
||||
"uuid": uuid,
|
||||
"started_at": now,
|
||||
"last_refresh": now,
|
||||
"wrapper_version": "1.0.0",
|
||||
"schema_version": 1,
|
||||
}
|
||||
)
|
||||
)
|
||||
return path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- row (m)
|
||||
|
||||
|
||||
def test_doctor_row_m_heartbeat_scanner_with_fresh_wrappers(
|
||||
wrappers_dir: Path,
|
||||
) -> None:
|
||||
"""Row (m) PASS with display showing the fresh count when wrappers exist."""
|
||||
own_pid = os.getpid()
|
||||
_write_fresh_heartbeat(wrappers_dir, own_pid, "uuid-aaa")
|
||||
_write_fresh_heartbeat(wrappers_dir, own_pid, "uuid-bbb")
|
||||
|
||||
from iai_mcp.doctor import check_m_heartbeat_scanner
|
||||
|
||||
result = check_m_heartbeat_scanner()
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "n=2 fresh" in result.detail
|
||||
assert "0 stale" in result.detail
|
||||
assert "0 orphan" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_m_heartbeat_scanner_empty(wrappers_dir: Path) -> None:
|
||||
"""Row (m) PASS with display 'n=0 fresh' when wrappers dir is empty."""
|
||||
from iai_mcp.doctor import check_m_heartbeat_scanner
|
||||
|
||||
result = check_m_heartbeat_scanner()
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "n=0 fresh" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_m_heartbeat_scanner_dir_absent(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Row (m) PASS with 'not present yet' when wrappers dir absent.
|
||||
|
||||
This is the steady-state on a fresh install before any wrapper has
|
||||
refreshed — must NOT report FAIL (the daemon is healthy, the dir
|
||||
just hasn't been created yet).
|
||||
"""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
# Note: do NOT mkdir wrappers/ — that's the absent-state we're testing.
|
||||
from iai_mcp.doctor import check_m_heartbeat_scanner
|
||||
|
||||
result = check_m_heartbeat_scanner()
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "not present yet" in result.detail
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- row (n)
|
||||
|
||||
|
||||
def test_doctor_row_n_hid_idle_source_macos() -> None:
|
||||
"""Row (n) PASS when IdleDetector reports HIDIdleTime available.
|
||||
|
||||
Patches ``IdleDetector.status`` to return a synthetic ``IdleStatus``
|
||||
with both signals available — avoids real ioreg/pmset calls so the
|
||||
test is deterministic on non-macOS CI hosts as well.
|
||||
"""
|
||||
fake_status = IdleStatus(
|
||||
hid_idle_sec=612,
|
||||
pmset_recent_sleep=False,
|
||||
available_signals=["HIDIdleTime", "pmset"],
|
||||
)
|
||||
|
||||
with patch(
|
||||
"iai_mcp.idle_detector.IdleDetector.status",
|
||||
return_value=fake_status,
|
||||
):
|
||||
from iai_mcp.doctor import check_n_hid_idle_source
|
||||
|
||||
result = check_n_hid_idle_source()
|
||||
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "HIDIdleTime: 612s" in result.detail
|
||||
assert "pmset: clean" in result.detail
|
||||
assert "HIDIdleTime" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_n_hid_idle_source_missing() -> None:
|
||||
"""Row (n) WARN when no hardware signals are available.
|
||||
|
||||
Patches ``IdleDetector.status`` to return an empty signal list —
|
||||
simulates ioreg + pmset both missing (non-macOS host or broken
|
||||
install). Must report WARN and ``passed=True`` (advisory; does NOT
|
||||
flip the doctor exit code, mirroring check_i WARN).
|
||||
"""
|
||||
fake_status = IdleStatus(
|
||||
hid_idle_sec=None,
|
||||
pmset_recent_sleep=False,
|
||||
available_signals=[],
|
||||
)
|
||||
|
||||
with patch(
|
||||
"iai_mcp.idle_detector.IdleDetector.status",
|
||||
return_value=fake_status,
|
||||
):
|
||||
from iai_mcp.doctor import check_n_hid_idle_source
|
||||
|
||||
result = check_n_hid_idle_source()
|
||||
|
||||
assert result.status == "WARN"
|
||||
# WARN must NOT flip the gate — passed stays True per CheckResult contract.
|
||||
assert result.passed is True
|
||||
assert "HIDIdleTime: unavailable" in result.detail
|
||||
assert "available: none" in result.detail
|
||||
assert "fall back to heartbeat-idle only" in result.detail
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- run_diagnosis wire-in
|
||||
|
||||
|
||||
def test_run_diagnosis_includes_rows_m_and_n(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Phase 10.4 wire-in: run_diagnosis() now includes rows (m) and (n)."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.doctor import run_diagnosis
|
||||
|
||||
results = run_diagnosis()
|
||||
names = [r.name for r in results]
|
||||
|
||||
m_rows = [r for r in results if "(m)" in r.name]
|
||||
n_rows = [r for r in results if "(n)" in r.name]
|
||||
assert len(m_rows) == 1, f"expected exactly one (m) row, got {names}"
|
||||
assert len(n_rows) == 1, f"expected exactly one (n) row, got {names}"
|
||||
# (m) must come before (n) in the checklist sequence.
|
||||
assert names.index(m_rows[0].name) < names.index(n_rows[0].name)
|
||||
|
||||
|
||||
# ----------------- Plan 10.6-01 Task 1.3: rows (j), (k), (l) ------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lifecycle_state_root(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch,
|
||||
) -> Path:
|
||||
"""``IAI_MCP_STORE`` -> tmp_path; lets doctor's resolver point to tmp."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
return tmp_path
|
||||
|
||||
|
||||
def test_doctor_row_j_lifecycle_state_default_when_absent(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (j) PASS reporting WAKE when no lifecycle_state.json exists."""
|
||||
from iai_mcp.doctor import check_j_lifecycle_current_state
|
||||
|
||||
result = check_j_lifecycle_current_state()
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "WAKE" in result.detail
|
||||
# shadow_run default for default_state() is True; this test does not
|
||||
# care about its value, only that the row formats it.
|
||||
assert "shadow_run=" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_j_lifecycle_state_reports_drowsy(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (j) reports the recorded state when lifecycle_state.json present."""
|
||||
from iai_mcp.lifecycle_state import save_state
|
||||
|
||||
record = {
|
||||
"current_state": "DROWSY",
|
||||
"since_ts": "2026-05-02T15:00:00+00:00",
|
||||
"last_activity_ts": "2026-05-02T15:00:00+00:00",
|
||||
"wrapper_event_seq": 7,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": None,
|
||||
"shadow_run": False,
|
||||
}
|
||||
save_state(record, lifecycle_state_root / "lifecycle_state.json")
|
||||
|
||||
from iai_mcp.doctor import check_j_lifecycle_current_state
|
||||
|
||||
result = check_j_lifecycle_current_state()
|
||||
assert result.status == "PASS"
|
||||
assert "DROWSY" in result.detail
|
||||
assert "shadow_run=false" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_k_lifecycle_history_24h_no_log(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (k) PASS with 'no event log yet' when log dir absent."""
|
||||
from iai_mcp.doctor import check_k_lifecycle_history_24h
|
||||
|
||||
result = check_k_lifecycle_history_24h()
|
||||
assert result.status == "PASS"
|
||||
assert "no event log" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_k_lifecycle_history_24h_zero_transitions(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (k) PASS with '0 transitions' when log dir empty."""
|
||||
(lifecycle_state_root / "logs").mkdir()
|
||||
from iai_mcp.doctor import check_k_lifecycle_history_24h
|
||||
|
||||
result = check_k_lifecycle_history_24h()
|
||||
assert result.status == "PASS"
|
||||
assert "0 transitions" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_k_lifecycle_history_24h_counts_transitions(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (k) sums state_transition events from today's JSONL file."""
|
||||
from iai_mcp.lifecycle_event_log import LifecycleEventLog
|
||||
|
||||
log = LifecycleEventLog(log_dir=lifecycle_state_root / "logs")
|
||||
# Three transitions: WAKE->DROWSY, DROWSY->WAKE, DROWSY->SLEEP.
|
||||
log.append(
|
||||
{"event": "state_transition", "from": "WAKE", "to": "DROWSY",
|
||||
"trigger": "idle_5min"}
|
||||
)
|
||||
log.append(
|
||||
{"event": "state_transition", "from": "DROWSY", "to": "WAKE",
|
||||
"trigger": "heartbeat_refresh"}
|
||||
)
|
||||
log.append(
|
||||
{"event": "state_transition", "from": "DROWSY", "to": "SLEEP",
|
||||
"trigger": "idle_30min"}
|
||||
)
|
||||
# Non-transition event must NOT be counted.
|
||||
log.append({"event": "wrapper_event", "kind": "boot"})
|
||||
|
||||
from iai_mcp.doctor import check_k_lifecycle_history_24h
|
||||
|
||||
result = check_k_lifecycle_history_24h()
|
||||
assert result.status == "PASS"
|
||||
assert "3 transitions" in result.detail
|
||||
# Bucket summary names destinations.
|
||||
assert "DROWSY=" in result.detail
|
||||
assert "WAKE=" in result.detail
|
||||
assert "SLEEP=" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_l_quarantine_none_passes(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (l) PASS when no quarantine record present."""
|
||||
from iai_mcp.doctor import check_l_sleep_cycle_status
|
||||
|
||||
result = check_l_sleep_cycle_status()
|
||||
assert result.status == "PASS"
|
||||
assert "no quarantine" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_l_quarantine_active_short_warns(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (l) WARN for an active quarantine younger than 12 hours."""
|
||||
from datetime import datetime as _dt
|
||||
from datetime import timedelta as _td
|
||||
from datetime import timezone as _tz
|
||||
|
||||
from iai_mcp.lifecycle_state import save_state
|
||||
|
||||
now = _dt.now(_tz.utc)
|
||||
since = (now - _td(hours=2)).isoformat()
|
||||
until = (now + _td(hours=22)).isoformat()
|
||||
record = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": now.isoformat(),
|
||||
"last_activity_ts": now.isoformat(),
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": {
|
||||
"since_ts": since,
|
||||
"until_ts": until,
|
||||
"reason": "sleep step 3 (DREAM_DECAY) failed 3x",
|
||||
},
|
||||
"shadow_run": False,
|
||||
}
|
||||
save_state(record, lifecycle_state_root / "lifecycle_state.json")
|
||||
|
||||
from iai_mcp.doctor import check_l_sleep_cycle_status
|
||||
|
||||
result = check_l_sleep_cycle_status()
|
||||
assert result.status == "WARN"
|
||||
assert result.passed is True # WARN advisory only
|
||||
assert "quarantined" in result.detail
|
||||
assert "DREAM_DECAY" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_l_quarantine_active_long_fails(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (l) FAIL for a quarantine 12+ hours old."""
|
||||
from datetime import datetime as _dt
|
||||
from datetime import timedelta as _td
|
||||
from datetime import timezone as _tz
|
||||
|
||||
from iai_mcp.lifecycle_state import save_state
|
||||
|
||||
now = _dt.now(_tz.utc)
|
||||
since = (now - _td(hours=14)).isoformat() # 14h ago
|
||||
until = (now + _td(hours=10)).isoformat()
|
||||
record = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": now.isoformat(),
|
||||
"last_activity_ts": now.isoformat(),
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": {
|
||||
"since_ts": since,
|
||||
"until_ts": until,
|
||||
"reason": "sleep step 4 (OPTIMIZE_LANCE) failed 3x",
|
||||
},
|
||||
"shadow_run": False,
|
||||
}
|
||||
save_state(record, lifecycle_state_root / "lifecycle_state.json")
|
||||
|
||||
from iai_mcp.doctor import check_l_sleep_cycle_status
|
||||
|
||||
result = check_l_sleep_cycle_status()
|
||||
assert result.status == "FAIL"
|
||||
assert result.passed is False # FAIL flips the exit code
|
||||
assert "reset-quarantine" in result.detail
|
||||
|
||||
|
||||
def test_doctor_row_l_quarantine_expired_passes(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Row (l) PASS for a quarantine whose until_ts is already in the past."""
|
||||
from datetime import datetime as _dt
|
||||
from datetime import timedelta as _td
|
||||
from datetime import timezone as _tz
|
||||
|
||||
from iai_mcp.lifecycle_state import save_state
|
||||
|
||||
now = _dt.now(_tz.utc)
|
||||
since = (now - _td(hours=25)).isoformat()
|
||||
until = (now - _td(hours=1)).isoformat() # already expired
|
||||
record = {
|
||||
"current_state": "WAKE",
|
||||
"since_ts": now.isoformat(),
|
||||
"last_activity_ts": now.isoformat(),
|
||||
"wrapper_event_seq": 0,
|
||||
"sleep_cycle_progress": None,
|
||||
"quarantine": {
|
||||
"since_ts": since,
|
||||
"until_ts": until,
|
||||
"reason": "sleep step 5 (COMPACT_RECORDS) failed 3x",
|
||||
},
|
||||
"shadow_run": False,
|
||||
}
|
||||
save_state(record, lifecycle_state_root / "lifecycle_state.json")
|
||||
|
||||
from iai_mcp.doctor import check_l_sleep_cycle_status
|
||||
|
||||
result = check_l_sleep_cycle_status()
|
||||
assert result.status == "PASS"
|
||||
assert "expired" in result.detail
|
||||
|
||||
|
||||
def test_run_diagnosis_includes_rows_j_k_l_in_order(
|
||||
lifecycle_state_root: Path,
|
||||
) -> None:
|
||||
"""Phase 10.6 wire-in: run_diagnosis returns 14 rows in correct order."""
|
||||
from iai_mcp.doctor import run_diagnosis
|
||||
|
||||
results = run_diagnosis()
|
||||
names = [r.name for r in results]
|
||||
|
||||
# Expect 14 rows: a..i (9), j/k/l (3), m/n (2).
|
||||
assert len(results) == 14, f"expected 14 rows, got {len(results)}: {names}"
|
||||
|
||||
# The new rows are present...
|
||||
j_idx = next(i for i, r in enumerate(results) if "(j)" in r.name)
|
||||
k_idx = next(i for i, r in enumerate(results) if "(k)" in r.name)
|
||||
l_idx = next(i for i, r in enumerate(results) if "(l)" in r.name)
|
||||
m_idx = next(i for i, r in enumerate(results) if "(m)" in r.name)
|
||||
|
||||
# ...and ordered j < k < l < m so the lifecycle block is contiguous.
|
||||
assert j_idx < k_idx < l_idx < m_idx, (
|
||||
f"row order broken: j={j_idx} k={k_idx} l={l_idx} m={m_idx}"
|
||||
)
|
||||
361
tests/test_doctor_apply_recovery.py
Normal file
361
tests/test_doctor_apply_recovery.py
Normal file
|
|
@ -0,0 +1,361 @@
|
|||
"""Plan 07-05 Wave 5 R9/A11 acceptance — `iai-mcp doctor --apply --yes`
|
||||
recovers from `kill -9 <daemon_pid>`.
|
||||
|
||||
Flow:
|
||||
1. Spawn a real `python -m iai_mcp.daemon` against an isolated tmp socket
|
||||
(HIGH-4 LOCK pattern: IAI_DAEMON_SOCKET_PATH + IAI_MCP_STORE + HOME
|
||||
env propagation isolates state file too).
|
||||
2. Wait for socket bind + state file with daemon_pid populated.
|
||||
3. SIGKILL the daemon.
|
||||
4. Run `cmd_doctor(args)` with apply=True, yes=True.
|
||||
5. Assert: rc=0, post-recovery checks all PASS, doctor_action events
|
||||
written to the events ledger, total elapsed time within budget.
|
||||
|
||||
A11 budget: SPEC says ≤5 s recovery on warm cache. Test uses 15 s safety
|
||||
budget to absorb cold-cache bge-small load (~3-10 s) + LanceDB store open
|
||||
(~1 s) + harness overhead — same precedent as cold-start tests.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: full HIGH-4 LOCK isolation including HOME for state file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_daemon_paths(tmp_path, monkeypatch):
|
||||
"""HOME + socket + store env overrides isolate the daemon completely.
|
||||
|
||||
Setting HOME=tmp_path makes both the test process and any spawned
|
||||
subprocess agree that ~/.iai-mcp/ resolves to tmp_path/.iai-mcp/.
|
||||
`daemon_state.STATE_PATH` is also monkeypatched in-process because it
|
||||
was bound at module import time before our HOME override.
|
||||
|
||||
Returns (sock_path, state_path, store_dir, lock_path).
|
||||
"""
|
||||
# Real ~/.iai-mcp lives outside tmp; create the parallel iai dir under tmp.
|
||||
iai_dir = tmp_path / ".iai-mcp"
|
||||
iai_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
state_path = iai_dir / ".daemon-state.json"
|
||||
lock_path = iai_dir / ".lock"
|
||||
store_dir = iai_dir / "store"
|
||||
store_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Socket lives under /tmp/iai-rec-<pid>-<n>/ (AF_UNIX 104-byte cap).
|
||||
sock_dir = Path(f"/tmp/iai-rec-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
|
||||
# CRITICAL: capture the user's real HF cache BEFORE we override HOME.
|
||||
# Otherwise the spawned daemon's prewarm step (sentence-transformers
|
||||
# bge-small load) sees an empty HF cache under tmp HOME and tries to
|
||||
# download the model from HuggingFace — a 60+ second hang. By
|
||||
# propagating HF_HOME explicitly, the daemon reuses the user's already-
|
||||
# cached model and prewarm completes in <1s.
|
||||
real_hf_home = Path.home() / ".cache" / "huggingface"
|
||||
|
||||
# HOME propagates to subprocesses via os.environ.copy() — daemon's
|
||||
# daemon_state module reads Path.home() at import, so subprocess sees
|
||||
# the tmp HOME and writes to tmp_path/.iai-mcp/.daemon-state.json.
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("HF_HOME", str(real_hf_home))
|
||||
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(store_dir))
|
||||
monkeypatch.setenv("IAI_DAEMON_IDLE_SHUTDOWN_SECS", "99999")
|
||||
# CRITICAL: force the keyring "fail" backend in the test process too,
|
||||
# so the doctor's `_respawn_daemon` audit-event write — which goes
|
||||
# through MemoryStore()._key() → crypto.get_or_create() → keyring —
|
||||
# triggers the D-GUARD passphrase fallback rather than hanging on
|
||||
# the macOS Security framework's interactive keychain prompt under
|
||||
# fresh HOME. The fixture's finally clause resets keyring's cached
|
||||
# backend so this isolation does NOT leak to subsequent tests.
|
||||
monkeypatch.setenv(
|
||||
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
|
||||
)
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-recovery-passphrase")
|
||||
# Reset keyring's already-imported backend cache so PYTHON_KEYRING_BACKEND
|
||||
# takes effect in this process (keyring resolves backend at first
|
||||
# access and caches; without this nudge, the prior cache wins).
|
||||
# MemoryStore's per-instance _cached_key is fresh on every MemoryStore()
|
||||
# construction, so no module-level crypto cache reset is needed.
|
||||
import keyring.core
|
||||
|
||||
keyring.core._keyring_backend = None
|
||||
|
||||
# In-process: daemon_state.STATE_PATH was bound at import. Override it
|
||||
# so the doctor (running in this process) reads the same file the
|
||||
# spawned daemon writes to.
|
||||
from iai_mcp import cli, daemon_state
|
||||
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
|
||||
monkeypatch.setattr(cli, "LOCK_PATH", lock_path)
|
||||
monkeypatch.setattr(cli, "SOCKET_PATH", sock_path)
|
||||
|
||||
try:
|
||||
yield sock_path, state_path, store_dir, lock_path
|
||||
finally:
|
||||
# Aggressive cleanup: kill any test-spawned daemon by env match
|
||||
# (avoids touching the user's real production daemon).
|
||||
_kill_test_daemons(sock_path)
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
# Reset keyring backend so the fail-backend cache doesn't leak
|
||||
# into subsequent tests in the same pytest process. monkeypatch
|
||||
# already restored the env var; we just need to force keyring to
|
||||
# re-resolve on next access.
|
||||
import keyring.core
|
||||
|
||||
keyring.core._keyring_backend = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _spawn_daemon(sock_path: Path, store_dir: Path, home: Path) -> subprocess.Popen:
|
||||
"""Spawn `python -m iai_mcp.daemon` with the test's env propagated.
|
||||
|
||||
Adds PYTHON_KEYRING_BACKEND + IAI_MCP_CRYPTO_PASSPHRASE explicitly here
|
||||
(NOT in the test process env) so the spawned daemon's first write_event
|
||||
call uses the D-GUARD passphrase fallback instead of hanging on the
|
||||
macOS Security framework's interactive keychain prompt. Setting these
|
||||
in-process would poison the test's keyring module cache.
|
||||
"""
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = str(home)
|
||||
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
|
||||
env["IAI_MCP_STORE"] = str(store_dir)
|
||||
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "99999"
|
||||
# Force fail-backend → passphrase fallback in the daemon subprocess.
|
||||
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
|
||||
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-recovery-passphrase"
|
||||
return subprocess.Popen(
|
||||
[sys.executable, "-m", "iai_mcp.daemon"],
|
||||
env=env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
def _wait_for_socket_and_pid(
|
||||
sock_path: Path, state_path: Path, expected_pid: int, timeout_sec: float = 30.0
|
||||
) -> bool:
|
||||
"""Poll until socket binds AND state file has daemon_pid == expected_pid."""
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
while time.monotonic() < deadline:
|
||||
if sock_path.exists() and state_path.exists():
|
||||
try:
|
||||
state = json.loads(state_path.read_text())
|
||||
if state.get("daemon_pid") == expected_pid:
|
||||
return True
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
def _wait_for_socket_only(sock_path: Path, timeout_sec: float = 15.0) -> bool:
|
||||
"""Poll until socket binds (used after respawn to detect new daemon)."""
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
while time.monotonic() < deadline:
|
||||
if sock_path.exists():
|
||||
return True
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
def _kill_test_daemons(sock_path: Path) -> None:
|
||||
"""Match-by-env cleanup: SIGTERM any iai_mcp.daemon subprocess whose
|
||||
psutil environ has our IAI_DAEMON_SOCKET_PATH value.
|
||||
|
||||
Avoids killing the user's real production daemon (which has no env
|
||||
override or a different socket path).
|
||||
"""
|
||||
target = str(sock_path)
|
||||
for p in psutil.process_iter(["pid", "cmdline"]):
|
||||
try:
|
||||
cl = " ".join(p.info.get("cmdline") or [])
|
||||
if "iai_mcp.daemon" not in cl:
|
||||
continue
|
||||
try:
|
||||
env = p.environ()
|
||||
except (psutil.AccessDenied, psutil.NoSuchProcess):
|
||||
continue
|
||||
if env.get("IAI_DAEMON_SOCKET_PATH") == target:
|
||||
try:
|
||||
p.send_signal(signal.SIGTERM)
|
||||
p.wait(timeout=3)
|
||||
except (psutil.NoSuchProcess, psutil.TimeoutExpired):
|
||||
try:
|
||||
p.send_signal(signal.SIGKILL)
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: kill -9 → --apply --yes recovers within budget, all PASS, exit 0
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_apply_yes_recovers_from_kill(isolated_daemon_paths):
|
||||
"""R9/A11 acceptance: simulate kill -9 → cmd_doctor(apply=True, yes=True) →
|
||||
daemon respawns, socket reappears, all 6 checks PASS, exit 0; doctor_action
|
||||
events emitted to the events ledger.
|
||||
"""
|
||||
sock_path, state_path, store_dir, _ = isolated_daemon_paths
|
||||
|
||||
# Boot daemon #1.
|
||||
proc = _spawn_daemon(sock_path, store_dir, home=Path(os.environ["HOME"]))
|
||||
try:
|
||||
assert _wait_for_socket_and_pid(
|
||||
sock_path, state_path, proc.pid, timeout_sec=30
|
||||
), (
|
||||
f"daemon never bound socket + stamped daemon_pid={proc.pid} within 30s"
|
||||
)
|
||||
|
||||
original_pid = proc.pid
|
||||
|
||||
# Pre-condition: doctor (no flags) should report at least (a) and (b)
|
||||
# FAIL after the kill (other checks may also fail, but those two are
|
||||
# the minimum diagnostic surface per A11).
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
proc.wait(timeout=5)
|
||||
time.sleep(0.5) # let psutil reflect death
|
||||
|
||||
from iai_mcp.doctor import cmd_doctor, run_diagnosis
|
||||
|
||||
pre_results = run_diagnosis()
|
||||
pre_fail_names = [r.name for r in pre_results if not r.passed]
|
||||
assert "(a) daemon process alive" in pre_fail_names, (
|
||||
f"after kill, check (a) should FAIL; got fails: {pre_fail_names}"
|
||||
)
|
||||
assert "(b) socket file fresh" in pre_fail_names, (
|
||||
f"after kill, check (b) should FAIL; got fails: {pre_fail_names}"
|
||||
)
|
||||
|
||||
# Run the recovery and time it.
|
||||
t0 = time.monotonic()
|
||||
args = argparse.Namespace(apply=True, yes=True)
|
||||
rc = cmd_doctor(args)
|
||||
elapsed = time.monotonic() - t0
|
||||
|
||||
assert rc == 0, (
|
||||
f"doctor recovery returned rc={rc}, elapsed={elapsed:.2f}s "
|
||||
"— expected exit 0 (all PASS after recovery)"
|
||||
)
|
||||
# 15s safety budget covers cold-cache bge-small + LanceDB open +
|
||||
# harness overhead; SPEC A11 5s budget is verified by Wave 6
|
||||
# acceptance against the production warm-cache daemon.
|
||||
assert elapsed < 15.0, (
|
||||
f"doctor recovery took {elapsed:.2f}s, exceeds 15s safety budget"
|
||||
)
|
||||
|
||||
# Post-condition: state file has a NEW daemon_pid (respawn worked).
|
||||
# NOTE: relying on run_diagnosis returning all-PASS already guarantees
|
||||
# check_a found a live iai_mcp.daemon at the stamped PID; the
|
||||
# original_pid != new_pid sanity check is belt-and-suspenders.
|
||||
assert state_path.exists(), "respawned daemon never wrote state file"
|
||||
s2 = json.loads(state_path.read_text())
|
||||
new_pid = s2.get("daemon_pid")
|
||||
assert new_pid is not None, "respawned daemon did not stamp daemon_pid"
|
||||
assert new_pid != original_pid, (
|
||||
f"daemon was not actually respawned: same PID {new_pid} after recovery"
|
||||
)
|
||||
|
||||
post_results = run_diagnosis()
|
||||
post_fails = [r.name for r in post_results if not r.passed]
|
||||
assert post_fails == [], f"post-recovery FAILs remain: {post_fails}"
|
||||
|
||||
# Audit events: at least one doctor_action event for the respawn.
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore()
|
||||
recent = query_events(store, kind="doctor_action", limit=10)
|
||||
assert len(recent) >= 1, (
|
||||
"doctor_action events not written to ledger after --apply"
|
||||
)
|
||||
# At minimum the respawn_daemon action must be present.
|
||||
action_labels = {e["data"].get("action") for e in recent}
|
||||
assert "respawn_daemon" in action_labels, (
|
||||
f"respawn_daemon event missing; saw actions: {action_labels}"
|
||||
)
|
||||
finally:
|
||||
# Best-effort cleanup of the original (already dead) + any respawned daemon.
|
||||
if proc.poll() is None:
|
||||
try:
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
proc.wait(timeout=5)
|
||||
except (subprocess.TimeoutExpired, ProcessLookupError):
|
||||
pass
|
||||
# _kill_test_daemons is also called by the fixture's finally clause.
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: --apply WITHOUT --yes prompts for each destructive action;
|
||||
# 'n' answer skips the action and the FAIL persists → rc=2.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_apply_no_yes_skips_destructive_action_on_n_response(
|
||||
isolated_daemon_paths, monkeypatch
|
||||
):
|
||||
"""R9 UX: --apply without --yes presents [y/N] prompts; user typing 'n'
|
||||
skips the destructive action; the unfixed FAIL persists → rc=2.
|
||||
|
||||
Setup: monkeypatch psutil.process_iter to fabricate one orphan
|
||||
iai_mcp.core hit (so check (d) FAILs and triggers the kill action).
|
||||
Then patch builtins.input to return 'n' so the [y/N] prompt
|
||||
deflects.
|
||||
"""
|
||||
sock_path, _, _, _ = isolated_daemon_paths
|
||||
|
||||
# Synthetic orphan: causes check (d) to FAIL, which schedules the
|
||||
# kill_orphan_cores destructive action.
|
||||
import psutil
|
||||
|
||||
class _FakeProc:
|
||||
def __init__(self, pid: int, cmdline: list[str]):
|
||||
self.info = {"pid": pid, "cmdline": cmdline}
|
||||
|
||||
fake = _FakeProc(99_999, ["python", "-m", "iai_mcp.core"])
|
||||
monkeypatch.setattr(psutil, "process_iter", lambda *a, **kw: [fake])
|
||||
|
||||
# Auto-decline every input prompt.
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
|
||||
|
||||
from iai_mcp.doctor import cmd_doctor
|
||||
|
||||
args = argparse.Namespace(apply=True, yes=False)
|
||||
rc = cmd_doctor(args)
|
||||
|
||||
# The orphan FAIL persists (we declined to fix it) and check (a)/(b)
|
||||
# also fail (no daemon running in the tmp env), so re-check still has
|
||||
# FAILs → rc=2.
|
||||
assert rc == 2, (
|
||||
f"declining destructive action should leave FAILs unfixed → rc=2; got {rc}"
|
||||
)
|
||||
166
tests/test_doctor_check_i_lance_versions.py
Normal file
166
tests/test_doctor_check_i_lance_versions.py
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
"""Plan 07.14-03 [Wave2-Option-C] regression test for doctor row (i).
|
||||
|
||||
PASS: <=500 manifests. WARN: 501..2000. FAIL: >2000.
|
||||
|
||||
The check reads ``IAI_MCP_STORE/lancedb/records.lance/_versions/*.manifest``
|
||||
(env-var first, ``~/.iai-mcp`` fallback). Tests redirect ``IAI_MCP_STORE``
|
||||
at a tmp_path to avoid touching the user's real store.
|
||||
|
||||
Status mapping is asserted both via direct call and via ``run_diagnosis()``.
|
||||
The wire-in test below uses name-based lookup rather than positional / count
|
||||
assertions so future doctor-row additions (e.g. added rows m, n)
|
||||
do not break this regression test.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_versions_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
|
||||
"""IAI_MCP_STORE -> tmp_path, with records.lance/_versions/ pre-created.
|
||||
|
||||
The check resolves ``IAI_MCP_STORE/lancedb/records.lance/_versions``;
|
||||
fixture creates the directory tree so seeding manifest files is direct.
|
||||
"""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
versions = tmp_path / "lancedb" / "records.lance" / "_versions"
|
||||
versions.mkdir(parents=True)
|
||||
return versions
|
||||
|
||||
|
||||
def _seed(versions_dir: Path, count: int) -> None:
|
||||
"""Create ``count`` distinct fake manifest files."""
|
||||
for i in range(count):
|
||||
(versions_dir / f"{i:020d}.manifest").write_bytes(b"x" * 10)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Direct check_i tests
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_pass_at_500(fake_versions_dir: Path) -> None:
|
||||
"""500 manifests -> PASS (boundary inclusive)."""
|
||||
_seed(fake_versions_dir, 500)
|
||||
from iai_mcp.doctor import check_i_lance_versions_count
|
||||
|
||||
result = check_i_lance_versions_count()
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "500" in result.detail
|
||||
|
||||
|
||||
def test_pass_at_low_count(fake_versions_dir: Path) -> None:
|
||||
"""100 manifests -> PASS (typical post-compaction state)."""
|
||||
_seed(fake_versions_dir, 100)
|
||||
from iai_mcp.doctor import check_i_lance_versions_count
|
||||
|
||||
result = check_i_lance_versions_count()
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "100" in result.detail
|
||||
|
||||
|
||||
def test_warn_at_1500(fake_versions_dir: Path) -> None:
|
||||
"""1500 manifests -> WARN with compact-records hint; still passes the gate."""
|
||||
_seed(fake_versions_dir, 1500)
|
||||
from iai_mcp.doctor import check_i_lance_versions_count
|
||||
|
||||
result = check_i_lance_versions_count()
|
||||
assert result.status == "WARN"
|
||||
# WARN must NOT flip the exit code -- advisory only.
|
||||
assert result.passed is True
|
||||
assert "compact-records" in result.detail
|
||||
|
||||
|
||||
def test_warn_boundary_at_2000(fake_versions_dir: Path) -> None:
|
||||
"""2000 manifests -> WARN (boundary inclusive)."""
|
||||
_seed(fake_versions_dir, 2000)
|
||||
from iai_mcp.doctor import check_i_lance_versions_count
|
||||
|
||||
result = check_i_lance_versions_count()
|
||||
assert result.status == "WARN"
|
||||
assert result.passed is True
|
||||
|
||||
|
||||
def test_fail_at_2500(fake_versions_dir: Path) -> None:
|
||||
"""2500 manifests -> FAIL with daemon-stop recovery instructions."""
|
||||
_seed(fake_versions_dir, 2500)
|
||||
from iai_mcp.doctor import check_i_lance_versions_count
|
||||
|
||||
result = check_i_lance_versions_count()
|
||||
assert result.status == "FAIL"
|
||||
assert result.passed is False
|
||||
assert "daemon stop" in result.detail
|
||||
assert "compact-records" in result.detail
|
||||
|
||||
|
||||
def test_fail_boundary_at_2001(fake_versions_dir: Path) -> None:
|
||||
"""2001 manifests -> FAIL (boundary just over)."""
|
||||
_seed(fake_versions_dir, 2001)
|
||||
from iai_mcp.doctor import check_i_lance_versions_count
|
||||
|
||||
result = check_i_lance_versions_count()
|
||||
assert result.status == "FAIL"
|
||||
assert result.passed is False
|
||||
|
||||
|
||||
def test_pass_when_dir_missing(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""No records.lance/_versions/ directory -> PASS (fresh install)."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.doctor import check_i_lance_versions_count
|
||||
|
||||
result = check_i_lance_versions_count()
|
||||
assert result.status == "PASS"
|
||||
assert result.passed is True
|
||||
assert "not present" in result.detail
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# run_diagnosis wire-in: row (i) is present and PASS on a clean store.
|
||||
# Tests use name-based lookup rather than positional indexing so future
|
||||
# row additions (Phase 10.4 added m + n) do not regress this check.
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_run_diagnosis_includes_lance_versions_row(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""Plan 07.14-03 wire-in: run_diagnosis() includes row (i) lance versions."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.doctor import run_diagnosis
|
||||
|
||||
results = run_diagnosis()
|
||||
matching = [
|
||||
r for r in results
|
||||
if "(i)" in r.name and "lance" in r.name.lower()
|
||||
]
|
||||
assert len(matching) == 1, (
|
||||
f"expected exactly one (i) lance versions row in run_diagnosis(); "
|
||||
f"got {len(matching)} from {[r.name for r in results]}"
|
||||
)
|
||||
|
||||
|
||||
def test_run_diagnosis_lance_row_pass_on_clean_state(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""With IAI_MCP_STORE pointing at a fresh tmp dir, (i) reports PASS."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
from iai_mcp.doctor import run_diagnosis
|
||||
|
||||
results = run_diagnosis()
|
||||
matching = [
|
||||
r for r in results
|
||||
if "(i)" in r.name and "lance" in r.name.lower()
|
||||
]
|
||||
assert len(matching) == 1
|
||||
assert matching[0].status == "PASS"
|
||||
assert matching[0].passed is True
|
||||
316
tests/test_doctor_checklist.py
Normal file
316
tests/test_doctor_checklist.py
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
"""Plan 07-05 Wave 5 R9 acceptance — doctor 6-row PASS/FAIL checklist.
|
||||
|
||||
Each individual failure scenario produces a FAIL on the matching check
|
||||
and the doctor exits with the documented code (D7-13: 0=all pass,
|
||||
1=any FAIL no --apply, 2=--apply but FAIL persists).
|
||||
|
||||
Checks (D7-11 ordering):
|
||||
(a) daemon process alive — daemon_pid in .daemon-state.json
|
||||
(b) socket file fresh — connect+status round-trip <250ms
|
||||
(c) lock file healthy — fcntl probe doesn't error
|
||||
(d) no orphan iai_mcp.core procs — psutil scan returns 0
|
||||
(e) daemon state file valid — fsm_state ∈ {WAKE, SLEEPING, DREAMING}
|
||||
(f) lancedb store readable — MemoryStore() opens without error
|
||||
|
||||
Tests use monkeypatching to construct each failure scenario in isolation
|
||||
without booting a real daemon (test_doctor_apply_recovery.py covers the
|
||||
end-to-end recovery scenario with a real subprocess daemon).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures: tmp socket + state + lock + store paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def short_socket_paths(tmp_path, monkeypatch):
|
||||
"""Yield (lock_path, sock_path, state_path) under tmp dirs.
|
||||
|
||||
AF_UNIX on macOS caps socket paths at ~104 bytes; pytest's tmp_path can
|
||||
be too long under xdist. Use a short /tmp/iai-doc-<pid>-<n>/ fallback
|
||||
for the socket.
|
||||
|
||||
Monkeypatches:
|
||||
- IAI_DAEMON_SOCKET_PATH env (read by doctor._resolve_socket_path)
|
||||
- iai_mcp.daemon_state.STATE_PATH (read by check (a)/(e) load_state)
|
||||
- iai_mcp.cli.LOCK_PATH (read by check (c) ProcessLock)
|
||||
- IAI_MCP_STORE env (read by check (f) MemoryStore)
|
||||
"""
|
||||
lock_path = tmp_path / ".lock"
|
||||
sock_dir = Path(f"/tmp/iai-doc-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
state_path = tmp_path / ".daemon-state.json"
|
||||
store_dir = tmp_path / "store"
|
||||
store_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
from iai_mcp import cli, daemon_state
|
||||
|
||||
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(store_dir))
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
|
||||
monkeypatch.setattr(cli, "LOCK_PATH", lock_path)
|
||||
# Also patch cli.SOCKET_PATH as a defensive fallback — doctor's
|
||||
# _resolve_socket_path prefers the env var, but if env propagation is
|
||||
# ever removed this guarantees test isolation.
|
||||
monkeypatch.setattr(cli, "SOCKET_PATH", sock_path)
|
||||
|
||||
try:
|
||||
yield lock_path, sock_path, state_path
|
||||
finally:
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_clean_environment_yields_check_a_fail_exit_1(short_socket_paths, capsys):
|
||||
"""Clean tmp env (no daemon, no state file) → cmd_doctor returns 1.
|
||||
|
||||
Check (a) reports ABSENT (no daemon_pid). Check (e) PASSES (no state file
|
||||
is acceptable — daemon never booted). Other FAILs depend on host process
|
||||
table for (d), but exit code is 1 either way (any FAIL → 1 without --apply).
|
||||
"""
|
||||
from iai_mcp.doctor import cmd_doctor
|
||||
|
||||
args = argparse.Namespace(apply=False, yes=False)
|
||||
rc = cmd_doctor(args)
|
||||
captured = capsys.readouterr()
|
||||
|
||||
assert rc == 1, f"expected 1 (FAIL no --apply), got {rc}"
|
||||
assert "IAI-MCP Doctor" in captured.out
|
||||
assert "(a) daemon process alive" in captured.out
|
||||
assert "ABSENT" in captured.out, "check (a) should say ABSENT when no daemon_pid"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"scenario,expected_fail_check",
|
||||
[
|
||||
("no_daemon_pid", "(a) daemon process alive"),
|
||||
("dead_pid_in_state", "(a) daemon process alive"),
|
||||
("stale_socket_unconnectable", "(b) socket file fresh"),
|
||||
("orphan_core_procs", "(d) no orphan iai_mcp.core procs"),
|
||||
("corrupt_state_fsm", "(e) daemon state file valid"),
|
||||
],
|
||||
)
|
||||
def test_individual_failure_modes(
|
||||
scenario, expected_fail_check, short_socket_paths, monkeypatch
|
||||
):
|
||||
"""R9: each failure scenario produces a FAIL on the matching check.
|
||||
|
||||
Cascading FAILs are allowed (e.g. dead daemon → check_a + check_b both
|
||||
fail) but the named expected_fail_check MUST appear in the FAIL list.
|
||||
"""
|
||||
_, sock_path, state_path = short_socket_paths
|
||||
|
||||
if scenario == "no_daemon_pid":
|
||||
# State file absent → check (a) FAIL with ABSENT.
|
||||
# Default fixture state — nothing more to do.
|
||||
pass
|
||||
|
||||
elif scenario == "dead_pid_in_state":
|
||||
# Stamp a high PID that almost certainly doesn't exist on a fresh
|
||||
# macOS / Linux box. Stay well under INT_MAX (2^31-1) so os.kill
|
||||
# doesn't raise OverflowError before the ProcessLookupError path.
|
||||
# PID_MAX defaults: macOS 99_999, Linux 4_194_304 — value 2_000_000
|
||||
# is above both default ranges (effectively guaranteed unallocated).
|
||||
state_path.write_text(json.dumps({"daemon_pid": 2_000_000, "fsm_state": "WAKE"}))
|
||||
|
||||
elif scenario == "stale_socket_unconnectable":
|
||||
# Create the socket file as a regular file (not a real socket) → connect
|
||||
# raises ConnectionRefusedError or OSError. check (b) FAIL.
|
||||
sock_path.write_text("")
|
||||
|
||||
elif scenario == "orphan_core_procs":
|
||||
# Monkeypatch psutil.process_iter to return a synthetic orphan hit.
|
||||
# Avoids actually spawning python -m iai_mcp.core (which would launch
|
||||
# a real Python core and pollute the process table for sibling tests).
|
||||
import psutil
|
||||
|
||||
class _FakeProc:
|
||||
def __init__(self, pid: int, cmdline: list[str]):
|
||||
self.info = {"pid": pid, "cmdline": cmdline}
|
||||
|
||||
fake = _FakeProc(99_999, ["python", "-m", "iai_mcp.core"])
|
||||
monkeypatch.setattr(
|
||||
psutil, "process_iter", lambda *a, **kw: [fake]
|
||||
)
|
||||
|
||||
elif scenario == "corrupt_state_fsm":
|
||||
# Write an invalid fsm_state value → check (e) FAIL.
|
||||
state_path.write_text(json.dumps({"fsm_state": "INVALID_STATE_VALUE"}))
|
||||
|
||||
from iai_mcp.doctor import run_diagnosis
|
||||
|
||||
results = run_diagnosis()
|
||||
fail_names = [r.name for r in results if not r.passed]
|
||||
assert expected_fail_check in fail_names, (
|
||||
f"Expected FAIL on '{expected_fail_check}' for scenario '{scenario}'; "
|
||||
f"got fails: {fail_names}"
|
||||
)
|
||||
|
||||
|
||||
def test_print_checklist_format_six_rows(short_socket_paths, monkeypatch, capsys):
|
||||
"""R9: print_checklist always emits 6 PASS/FAIL rows with consistent header.
|
||||
|
||||
Forces all 6 checks to PASS via monkeypatching to verify the formatter
|
||||
handles a fully-green checklist (default scenario in the other tests
|
||||
only verifies the FAIL path).
|
||||
"""
|
||||
from iai_mcp import doctor
|
||||
|
||||
forced_results = [
|
||||
doctor.CheckResult("(a) daemon process alive", True, "PID 99999 (iai_mcp.daemon)"),
|
||||
doctor.CheckResult("(b) socket file fresh", True, "connected in 5 ms"),
|
||||
doctor.CheckResult("(c) lock file healthy", True, "acquirable"),
|
||||
doctor.CheckResult("(d) no orphan iai_mcp.core procs", True, "0 found"),
|
||||
doctor.CheckResult("(e) daemon state file valid", True, "fsm_state=WAKE"),
|
||||
doctor.CheckResult("(f) lancedb store readable", True, "opens without error"),
|
||||
]
|
||||
doctor.print_checklist(forced_results)
|
||||
out = capsys.readouterr().out
|
||||
|
||||
assert "IAI-MCP Doctor" in out
|
||||
assert out.count("[PASS]") == 6
|
||||
assert out.count("[FAIL]") == 0
|
||||
|
||||
|
||||
def test_all_pass_returns_exit_0(short_socket_paths, monkeypatch, capsys):
|
||||
"""D7-13 exit 0: when run_diagnosis returns all PASS, cmd_doctor returns 0.
|
||||
|
||||
Monkeypatches run_diagnosis itself rather than constructing a passing
|
||||
world — the latter requires a real daemon subprocess (covered by
|
||||
test_doctor_apply_recovery.py).
|
||||
"""
|
||||
from iai_mcp import doctor
|
||||
|
||||
forced_pass = [
|
||||
doctor.CheckResult(name, True, "synthetic pass") for name in (
|
||||
"(a) daemon process alive",
|
||||
"(b) socket file fresh",
|
||||
"(c) lock file healthy",
|
||||
"(d) no orphan iai_mcp.core procs",
|
||||
"(e) daemon state file valid",
|
||||
"(f) lancedb store readable",
|
||||
)
|
||||
]
|
||||
monkeypatch.setattr(doctor, "run_diagnosis", lambda: forced_pass)
|
||||
|
||||
args = argparse.Namespace(apply=False, yes=False)
|
||||
rc = doctor.cmd_doctor(args)
|
||||
out = capsys.readouterr().out
|
||||
|
||||
assert rc == 0
|
||||
assert "All checks passed" in out
|
||||
|
||||
|
||||
def test_apply_without_yes_warns_when_yes_alone(short_socket_paths, monkeypatch, capsys):
|
||||
"""R9 UX: --yes without --apply prints a warning to stderr but still
|
||||
runs diagnosis (does not block the user).
|
||||
"""
|
||||
from iai_mcp import doctor
|
||||
|
||||
args = argparse.Namespace(apply=False, yes=True)
|
||||
rc = doctor.cmd_doctor(args)
|
||||
captured = capsys.readouterr()
|
||||
|
||||
# The warning goes to stderr.
|
||||
assert "--yes without --apply is meaningless" in captured.err
|
||||
# Diagnosis still runs — exit code mirrors check outcome (likely 1
|
||||
# because no daemon is running in the tmp env).
|
||||
assert rc in (0, 1)
|
||||
|
||||
|
||||
def test_exit_code_2_when_apply_cannot_fix(short_socket_paths, monkeypatch, capsys):
|
||||
"""D7-13: --apply runs all repair actions but final re-check still has
|
||||
FAIL → exit 2.
|
||||
|
||||
Construct a scenario where the FAIL is unfixable: corrupt fsm_state in
|
||||
the state file. _plan_repair_actions has no action mapped to check (e),
|
||||
so the FAIL persists through the re-check and cmd_doctor returns 2.
|
||||
"""
|
||||
_, _, state_path = short_socket_paths
|
||||
# Write an invalid fsm_state so check (e) always FAILs.
|
||||
state_path.write_text(json.dumps({"fsm_state": "TOTALLY_BOGUS"}))
|
||||
|
||||
# Also force every other check to PASS via run_diagnosis monkeypatch
|
||||
# so we isolate check (e) as the persistent FAIL. The first call returns
|
||||
# the bogus-state results; the second (after --apply) returns the same.
|
||||
from iai_mcp import doctor
|
||||
|
||||
def _forced_fail_e_only():
|
||||
return [
|
||||
doctor.CheckResult("(a) daemon process alive", True, "synthetic"),
|
||||
doctor.CheckResult("(b) socket file fresh", True, "synthetic"),
|
||||
doctor.CheckResult("(c) lock file healthy", True, "synthetic"),
|
||||
doctor.CheckResult("(d) no orphan iai_mcp.core procs", True, "synthetic"),
|
||||
doctor.CheckResult(
|
||||
"(e) daemon state file valid",
|
||||
False,
|
||||
"fsm_state='TOTALLY_BOGUS' not in [...]",
|
||||
),
|
||||
doctor.CheckResult("(f) lancedb store readable", True, "synthetic"),
|
||||
]
|
||||
|
||||
monkeypatch.setattr(doctor, "run_diagnosis", _forced_fail_e_only)
|
||||
|
||||
args = argparse.Namespace(apply=True, yes=True)
|
||||
rc = doctor.cmd_doctor(args)
|
||||
out = capsys.readouterr().out
|
||||
|
||||
assert rc == 2, f"expected 2 (--apply tried but FAIL persists), got {rc}"
|
||||
assert "STILL BROKEN" in out
|
||||
assert "(e) daemon state file valid" in out
|
||||
|
||||
|
||||
def test_check_b_returns_fail_when_socket_missing(short_socket_paths):
|
||||
"""Check (b) returns FAIL with explicit "does not exist" diagnosis when
|
||||
the socket file is missing entirely (not just unreachable).
|
||||
"""
|
||||
_, sock_path, _ = short_socket_paths
|
||||
# Defensive: ensure socket truly absent.
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
|
||||
from iai_mcp.doctor import check_b_socket_fresh
|
||||
|
||||
result = check_b_socket_fresh()
|
||||
assert result.passed is False
|
||||
assert "does not exist" in result.detail
|
||||
|
||||
|
||||
def test_check_e_passes_when_state_file_absent(short_socket_paths):
|
||||
"""Check (e) PASSES when state file is absent (daemon never booted is
|
||||
not a bug at this layer — check (a) catches it as ABSENT).
|
||||
"""
|
||||
_, _, state_path = short_socket_paths
|
||||
if state_path.exists():
|
||||
state_path.unlink()
|
||||
|
||||
from iai_mcp.doctor import check_e_state_file_valid
|
||||
|
||||
result = check_e_state_file_valid()
|
||||
assert result.passed is True
|
||||
assert "no state file" in result.detail
|
||||
316
tests/test_doctor_crypto_file_backend.py
Normal file
316
tests/test_doctor_crypto_file_backend.py
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
"""Phase 07.10 W3 / Plan 05: doctor `check_h_crypto_file_state` + top-of-output hint.
|
||||
|
||||
Locks the executable spec for the 8th doctor check row + the migration
|
||||
remediation hint that prints at the very top of doctor's output when the
|
||||
file-missing-but-Keychain-entry-exists state is detected (Phase 07.10 D-12).
|
||||
|
||||
Detection matrix:
|
||||
| file present + valid | keyring entry | doctor output |
|
||||
| yes | any | PASS |
|
||||
| no | yes | WARN + top-of-output hint pointing at `iai-mcp crypto migrate-to-file` |
|
||||
| no | no/error | PASS (clean fresh-install state) |
|
||||
| yes (malformed) | any | FAIL: prints the file's CryptoKeyError message |
|
||||
|
||||
These tests run independently of the existing `test_doctor_checklist.py`
|
||||
fixtures (no daemon socket, no lock file): they only exercise
|
||||
`check_h_crypto_file_state` directly + the top-of-output hint helper.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import os
|
||||
import secrets
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- check_h_crypto_file_state
|
||||
|
||||
def test_check_h_pass_when_file_present_and_valid(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""D-12 case 1 — valid 0o600 32-byte key file → PASS.
|
||||
|
||||
File-backend resolution honors `IAI_MCP_STORE`; pointing it at tmp_path
|
||||
makes the lazy `_key_file_path()` return `tmp_path/.crypto.key`. No
|
||||
keyring touch on the file-present branch.
|
||||
"""
|
||||
from iai_mcp.doctor import check_h_crypto_file_state
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(secrets.token_bytes(32))
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
|
||||
result = check_h_crypto_file_state()
|
||||
assert result.status == "PASS", f"unexpected status={result.status} detail={result.detail}"
|
||||
assert result.passed is True
|
||||
assert ".crypto.key" in result.detail
|
||||
|
||||
|
||||
def test_check_h_warn_when_file_missing_and_keyring_has_key(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""D-12 case 2 — file absent BUT keyring has a key → WARN with migrate-to-file hint.
|
||||
|
||||
Monkeypatches the LOCAL `keyring.get_password` import inside the check
|
||||
so the test does not actually probe the user's macOS Keychain.
|
||||
"""
|
||||
from iai_mcp.doctor import check_h_crypto_file_state
|
||||
|
||||
# File absent: nothing at tmp_path/.crypto.key.
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
assert not (tmp_path / ".crypto.key").exists()
|
||||
|
||||
# Pretend a Keychain entry exists.
|
||||
import keyring as _keyring
|
||||
|
||||
fake_b64 = "Zm9vYmFyZm9vYmFyZm9vYmFyZm9vYmFyZm9vYmFyZm9vYmE=" # 32-byte plausible base64url
|
||||
|
||||
def fake_get(service: str, username: str) -> str | None:
|
||||
return fake_b64
|
||||
|
||||
monkeypatch.setattr(_keyring, "get_password", fake_get)
|
||||
|
||||
result = check_h_crypto_file_state()
|
||||
assert result.status == "WARN", f"unexpected status={result.status} detail={result.detail}"
|
||||
assert "migrate-to-file" in result.detail.lower()
|
||||
# WARN must NOT report failure — it does not flip exit code to 1.
|
||||
assert result.passed is True
|
||||
|
||||
|
||||
def test_check_h_pass_when_file_missing_and_no_keyring(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""D-12 case 3 — file absent AND no Keychain entry → PASS (clean fresh install).
|
||||
|
||||
Detail mentions both `crypto init` and `IAI_MCP_CRYPTO_PASSPHRASE`
|
||||
so a fresh-install user has actionable guidance.
|
||||
"""
|
||||
from iai_mcp.doctor import check_h_crypto_file_state
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
assert not (tmp_path / ".crypto.key").exists()
|
||||
|
||||
# Simulate "no Keychain entry": get_password returns None.
|
||||
import keyring as _keyring
|
||||
|
||||
def fake_get(service: str, username: str) -> str | None:
|
||||
return None
|
||||
|
||||
monkeypatch.setattr(_keyring, "get_password", fake_get)
|
||||
|
||||
result = check_h_crypto_file_state()
|
||||
assert result.status == "PASS", f"unexpected status={result.status} detail={result.detail}"
|
||||
assert result.passed is True
|
||||
# Detail should point fresh-install users at `crypto init` or the passphrase env.
|
||||
detail_l = result.detail.lower()
|
||||
assert "init" in detail_l or "passphrase" in detail_l
|
||||
|
||||
|
||||
def test_check_h_pass_when_keyring_backend_unavailable(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""D-12 case 3b — file absent AND keyring NoKeyringError → PASS (clean fresh install).
|
||||
|
||||
Linux servers without a Secret Service backend should be treated the
|
||||
same as 'no Keychain entry detected' — not a failure, not a warning.
|
||||
"""
|
||||
from iai_mcp.doctor import check_h_crypto_file_state
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
assert not (tmp_path / ".crypto.key").exists()
|
||||
|
||||
import keyring as _keyring
|
||||
import keyring.errors as _keyring_errors
|
||||
|
||||
def raise_no_backend(service: str, username: str) -> str | None:
|
||||
raise _keyring_errors.NoKeyringError("no backend available (test-stub)")
|
||||
|
||||
monkeypatch.setattr(_keyring, "get_password", raise_no_backend)
|
||||
|
||||
result = check_h_crypto_file_state()
|
||||
assert result.status == "PASS", f"unexpected status={result.status} detail={result.detail}"
|
||||
assert result.passed is True
|
||||
|
||||
|
||||
def test_check_h_fail_when_file_malformed(
|
||||
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
|
||||
) -> None:
|
||||
"""D-12 case 4 — file exists but has wrong length → FAIL with `wrong length` in detail."""
|
||||
from iai_mcp.doctor import check_h_crypto_file_state
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
# Wrong length: 31 bytes instead of 32.
|
||||
key_path.write_bytes(b"\x00" * 31)
|
||||
os.chmod(key_path, 0o600)
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
|
||||
result = check_h_crypto_file_state()
|
||||
assert result.status == "FAIL", f"unexpected status={result.status} detail={result.detail}"
|
||||
assert result.passed is False
|
||||
assert "wrong length" in result.detail.lower() or "malformed" in result.detail.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- top-of-output hint helper
|
||||
|
||||
def test_format_top_of_output_hint_emits_line_when_check_h_warns() -> None:
|
||||
"""D-12 — when a WARN row for check_h is present, the helper emits a `> hint:` line
|
||||
that names `migrate-to-file` so the user sees the fix BEFORE the row-by-row print.
|
||||
"""
|
||||
from iai_mcp.doctor import CheckResult, _format_top_of_output_hint
|
||||
|
||||
results = [
|
||||
CheckResult("(a) daemon process alive", True, "PID 12345 (iai_mcp.daemon)", status="PASS"),
|
||||
CheckResult(
|
||||
"(h) crypto key file state",
|
||||
True,
|
||||
"crypto key file missing at /tmp/x/.crypto.key, but a Keychain entry was found.\n"
|
||||
" Run `iai-mcp crypto migrate-to-file` from a Terminal to migrate the key.",
|
||||
status="WARN",
|
||||
),
|
||||
]
|
||||
|
||||
hint = _format_top_of_output_hint(results)
|
||||
assert hint is not None, "WARN row for check_h must produce a hint"
|
||||
assert hint.startswith("> hint:"), f"hint must be prefixed with `> hint:`, got: {hint!r}"
|
||||
assert "migrate-to-file" in hint, f"hint must name migrate-to-file, got: {hint!r}"
|
||||
|
||||
|
||||
def test_format_top_of_output_hint_returns_none_when_no_warn() -> None:
|
||||
"""No WARN row → no hint."""
|
||||
from iai_mcp.doctor import CheckResult, _format_top_of_output_hint
|
||||
|
||||
results = [
|
||||
CheckResult("(a) daemon process alive", True, "PID 12345 (iai_mcp.daemon)", status="PASS"),
|
||||
CheckResult("(h) crypto key file state", True, "key file present", status="PASS"),
|
||||
]
|
||||
|
||||
assert _format_top_of_output_hint(results) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- run_diagnosis includes check_h
|
||||
|
||||
def test_run_diagnosis_includes_check_h(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
"""D-12 wire-in -- `run_diagnosis()` includes the check_h crypto-key row.
|
||||
|
||||
Originally a positional assertion (8th row); rewritten to name-based
|
||||
lookup so subsequent doctor-row additions (Phase 10.4 added m + n)
|
||||
do not regress this contract. The (h) and (i) rows must both be
|
||||
present in the returned list.
|
||||
|
||||
Uses IAI_MCP_STORE pointing at tmp_path and a valid key file so check_h
|
||||
returns PASS without hitting the user's real keyring or filesystem.
|
||||
"""
|
||||
from iai_mcp.doctor import run_diagnosis
|
||||
|
||||
key_path = tmp_path / ".crypto.key"
|
||||
key_path.write_bytes(secrets.token_bytes(32))
|
||||
os.chmod(key_path, 0o600)
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path))
|
||||
|
||||
# Other checks may FAIL in this environment (no daemon running) -- that's
|
||||
# fine, we only assert (h) and (i) are present by name.
|
||||
results = run_diagnosis()
|
||||
h_rows = [r for r in results if "(h)" in r.name and "crypto" in r.name.lower()]
|
||||
assert len(h_rows) == 1, (
|
||||
f"expected exactly one (h) crypto row in run_diagnosis(); "
|
||||
f"got {len(h_rows)} from {[r.name for r in results]}"
|
||||
)
|
||||
i_rows = [r for r in results if "(i)" in r.name and "lance" in r.name.lower()]
|
||||
assert len(i_rows) == 1, (
|
||||
f"expected exactly one (i) lance versions row in run_diagnosis(); "
|
||||
f"got {len(i_rows)} from {[r.name for r in results]}"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- cmd_doctor wire-in (advisor-driven)
|
||||
|
||||
def test_cmd_doctor_prints_hint_at_top_when_check_h_warns(
|
||||
monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture
|
||||
) -> None:
|
||||
"""D-12 wire-in pin (advisor) — cmd_doctor MUST call _format_top_of_output_hint
|
||||
BEFORE print_checklist so the hint appears at the very top of stdout.
|
||||
|
||||
Rationale: helper-level tests verify the helper produces the right string,
|
||||
and run_diagnosis() returns 8 rows — but neither verifies that cmd_doctor
|
||||
actually wires the helper into the print path. A future refactor that
|
||||
drops the 3-line `if hint is not None: print(hint); print()` block in
|
||||
cmd_doctor would not break any other test in this file. This test pins
|
||||
the placement-at-top guarantee.
|
||||
|
||||
Strategy: monkeypatch `doctor.run_diagnosis` to return a synthetic 8-row
|
||||
list with one WARN row (avoids mocking daemon-state/socket/lock/store/lsof
|
||||
simultaneously). Capture stdout and assert the `> hint:` line index is
|
||||
BEFORE the row-by-row checklist header.
|
||||
"""
|
||||
import argparse
|
||||
|
||||
from iai_mcp import doctor as _doctor
|
||||
|
||||
synthetic = [
|
||||
_doctor.CheckResult("(a) daemon process alive", True, "synthetic", status="PASS"),
|
||||
_doctor.CheckResult("(b) socket file fresh", True, "synthetic", status="PASS"),
|
||||
_doctor.CheckResult("(c) lock file healthy", True, "synthetic", status="PASS"),
|
||||
_doctor.CheckResult("(d) no orphan iai_mcp.core procs", True, "synthetic", status="PASS"),
|
||||
_doctor.CheckResult("(e) daemon state file valid", True, "synthetic", status="PASS"),
|
||||
_doctor.CheckResult("(f) lancedb store readable", True, "synthetic", status="PASS"),
|
||||
_doctor.CheckResult("(g) no dup binders", True, "synthetic", status="PASS"),
|
||||
_doctor.CheckResult(
|
||||
"(h) crypto key file state",
|
||||
True,
|
||||
(
|
||||
"crypto key file missing at /tmp/.crypto.key, but a Keychain entry was found.\n"
|
||||
" Run `iai-mcp crypto migrate-to-file` from a Terminal to migrate the key."
|
||||
),
|
||||
status="WARN",
|
||||
),
|
||||
]
|
||||
monkeypatch.setattr(_doctor, "run_diagnosis", lambda: synthetic)
|
||||
|
||||
args = argparse.Namespace(apply=False, yes=False)
|
||||
rc = _doctor.cmd_doctor(args)
|
||||
|
||||
captured = capsys.readouterr().out
|
||||
|
||||
hint_idx = captured.find("> hint:")
|
||||
header_idx = captured.find("IAI-MCP Doctor")
|
||||
assert hint_idx >= 0, f"expected `> hint:` line in stdout, got:\n{captured!r}"
|
||||
assert header_idx >= 0, f"expected checklist header in stdout, got:\n{captured!r}"
|
||||
assert hint_idx < header_idx, (
|
||||
f"hint (idx {hint_idx}) must appear BEFORE checklist header (idx {header_idx})\n"
|
||||
f"stdout was:\n{captured}"
|
||||
)
|
||||
# The hint must name the actionable command.
|
||||
assert "migrate-to-file" in captured[: header_idx], (
|
||||
f"hint must name `migrate-to-file` ABOVE the checklist header; "
|
||||
f"top-of-output region was: {captured[:header_idx]!r}"
|
||||
)
|
||||
# Exit code: WARN does NOT flip to 1 (advisory only); rc must be 0.
|
||||
assert rc == 0, f"WARN rows must not change exit code; got rc={rc}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- CheckResult back-compat
|
||||
|
||||
def test_check_result_three_arg_constructor_still_works() -> None:
|
||||
"""Phase 07.10 (Rule 1 deviation): adding `status` to CheckResult must NOT
|
||||
break existing tests that construct it with 3 positional args
|
||||
(test_doctor_checklist.py uses the 3-arg form ~14 times).
|
||||
"""
|
||||
from iai_mcp.doctor import CheckResult
|
||||
|
||||
r_pass = CheckResult("(x) example", True, "ok")
|
||||
assert r_pass.passed is True
|
||||
assert r_pass.detail == "ok"
|
||||
# Default status must be derived from `passed` so legacy 3-arg construction
|
||||
# produces a sensible value.
|
||||
assert r_pass.status in ("PASS", "FAIL")
|
||||
assert r_pass.status == "PASS"
|
||||
|
||||
r_fail = CheckResult("(y) example", False, "broken")
|
||||
assert r_fail.status == "FAIL"
|
||||
622
tests/test_doctor_multi_binder.py
Normal file
622
tests/test_doctor_multi_binder.py
Normal file
|
|
@ -0,0 +1,622 @@
|
|||
"""Phase 7.1 R6 / D7.1-05 — doctor.py multi-binder detection + repair.
|
||||
|
||||
Test matrix (8 tests):
|
||||
A. _extract_binder_pids parses lsof -F pn output → set[int]
|
||||
B. _extract_binder_pids skips PIDs bound to UNRELATED sockets
|
||||
C. _extract_binder_pids handles empty input → empty set
|
||||
D. check_g_no_dup_binders skips when socket file absent (PASS-with-skip)
|
||||
E. check_g_no_dup_binders PASSes with single binder (multiprocessing worker)
|
||||
F. check_g_no_dup_binders FAILs with two binders (regression-trap centerpiece)
|
||||
G. _kill_dup_binders keeps oldest, kills the rest (real subprocess daemons)
|
||||
H. iai-mcp doctor --apply --yes recovers from dup-binder scenario (e2e)
|
||||
|
||||
A-D: pure unit tests, no daemon, fast (<1s combined).
|
||||
E-F: in-process multiprocessing workers — distinct PIDs, lsof-visible.
|
||||
G-H: real iai_mcp.daemon subprocesses — required because _kill_dup_binders
|
||||
filters by 'iai_mcp.daemon' substring in psutil cmdline (wrong-PID-kill
|
||||
mitigation). Isolated by HIGH-4 LOCK env propagation pattern from
|
||||
test_doctor_apply_recovery.py:isolated_daemon_paths.
|
||||
|
||||
Skip on non-POSIX (AF_UNIX requirement).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
platform.system() == "Windows",
|
||||
reason="POSIX AF_UNIX required (lsof -U + multiprocessing socket binders)",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Section 1 — pure unit tests for _extract_binder_pids (A, B, C)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_extract_binder_pids_parses_lsof_output():
|
||||
"""A: hand-crafted lsof -F pn output → expected PID set.
|
||||
|
||||
lsof -F pn format alternates lines `p<pid>` and `n<filename>`. Each
|
||||
PID is followed by 0+ name entries until the next `p<pid>`.
|
||||
"""
|
||||
from iai_mcp.doctor import _extract_binder_pids
|
||||
|
||||
target = Path("/tmp/iai-test/d.sock")
|
||||
lsof_output = "\n".join([
|
||||
"p12345",
|
||||
f"n{target}",
|
||||
"p67890",
|
||||
f"n{target}",
|
||||
"p99999",
|
||||
"n/tmp/other-app/socket",
|
||||
])
|
||||
|
||||
pids = _extract_binder_pids(lsof_output, target)
|
||||
|
||||
assert pids == {12345, 67890}, f"expected {{12345, 67890}}, got {pids}"
|
||||
|
||||
|
||||
def test_extract_binder_pids_skips_unrelated_sockets():
|
||||
"""B: lsof output with multiple sockets; only PIDs holding OUR path are returned."""
|
||||
from iai_mcp.doctor import _extract_binder_pids
|
||||
|
||||
target = Path("/tmp/iai-test/d.sock")
|
||||
lsof_output = "\n".join([
|
||||
"p1001",
|
||||
"n/var/run/some-other-daemon.sock",
|
||||
"p2002",
|
||||
f"n{target}",
|
||||
"p3003",
|
||||
"n/tmp/X11-unix/X0",
|
||||
"p4004",
|
||||
f"n{target}",
|
||||
"n/some/extra/name/for/p4004", # PID 4004 holds multiple fds
|
||||
])
|
||||
|
||||
pids = _extract_binder_pids(lsof_output, target)
|
||||
|
||||
assert pids == {2002, 4004}, f"expected {{2002, 4004}}, got {pids}"
|
||||
|
||||
|
||||
def test_extract_binder_pids_handles_empty_output():
|
||||
"""C: empty input → empty set (defensive corner case)."""
|
||||
from iai_mcp.doctor import _extract_binder_pids
|
||||
|
||||
target = Path("/tmp/anywhere.sock")
|
||||
assert _extract_binder_pids("", target) == set()
|
||||
assert _extract_binder_pids("\n\n\n", target) == set()
|
||||
# Malformed: PID line without name line; name line without preceding PID.
|
||||
assert _extract_binder_pids("p123\nXgarbage\np\n", target) == set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Section 2 — check_g_no_dup_binders (D, E, F) using monkeypatched socket path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def short_socket_path(tmp_path, monkeypatch):
|
||||
"""Yield a short socket path under /tmp (AF_UNIX 104-byte cap on macOS).
|
||||
|
||||
Honors the IAI_DAEMON_SOCKET_PATH env override that doctor._resolve_socket_path
|
||||
consults. Cleans up the socket file on teardown.
|
||||
"""
|
||||
sock_dir = Path(f"/tmp/iai-mb-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
|
||||
try:
|
||||
yield sock_path
|
||||
finally:
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def test_check_g_no_socket_skips(short_socket_path, monkeypatch):
|
||||
"""D: socket file absent → PASS-with-skip detail "no socket file (skip)".
|
||||
|
||||
Mirrors check_d_no_orphan_core's skip pattern when the resource isn't
|
||||
present (no false-positive on a clean machine).
|
||||
"""
|
||||
from iai_mcp.doctor import check_g_no_dup_binders
|
||||
|
||||
# Fixture set the env var; ensure no file exists.
|
||||
assert not short_socket_path.exists()
|
||||
|
||||
result = check_g_no_dup_binders()
|
||||
|
||||
assert result.passed is True
|
||||
assert "no socket file" in result.detail
|
||||
|
||||
|
||||
# --- Multiprocessing worker for Tests E and F (distinct PIDs) ---------------
|
||||
|
||||
|
||||
def _bind_socket_worker(sock_path_str: str, ready_event: mp.Event, exit_event: mp.Event) -> None:
|
||||
"""Subprocess worker: bind an AF_UNIX socket to sock_path, signal ready,
|
||||
block until exit_event is set.
|
||||
|
||||
Each multiprocessing.Process child has a distinct PID and lsof reports
|
||||
its socket fd. Used by Tests E (1 binder) and F (2 binders) to construct
|
||||
deterministic dup-binder scenarios without a real iai_mcp.daemon (whose
|
||||
boot cost is ~3-10s).
|
||||
"""
|
||||
s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
try:
|
||||
# Each worker handles its own bind; for the 2-binder scenario, the
|
||||
# parent unlinks the path between worker spawns so each worker
|
||||
# successfully bind()s a fresh inode at the same name.
|
||||
s.bind(sock_path_str)
|
||||
s.listen(5)
|
||||
ready_event.set()
|
||||
# Block until parent signals shutdown.
|
||||
exit_event.wait(timeout=30)
|
||||
finally:
|
||||
try:
|
||||
s.close()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def test_check_g_single_binder_passes(short_socket_path):
|
||||
"""E: ONE binder bound to the socket → check_g returns PASS with "1 binder(s)".
|
||||
|
||||
Uses a multiprocessing.Process worker (distinct PID from the pytest
|
||||
process) so lsof has something to enumerate.
|
||||
"""
|
||||
from iai_mcp.doctor import check_g_no_dup_binders
|
||||
|
||||
# NOTE: use 'spawn' (not 'fork') even on Darwin — lancedb is not fork-safe
|
||||
# (UserWarning surfaces with fork on macOS). Workers don't touch lancedb,
|
||||
# but the parent test process has it imported transitively; spawn isolates.
|
||||
ctx = mp.get_context("spawn")
|
||||
ready = ctx.Event()
|
||||
exit_signal = ctx.Event()
|
||||
worker = ctx.Process(
|
||||
target=_bind_socket_worker,
|
||||
args=(str(short_socket_path), ready, exit_signal),
|
||||
)
|
||||
worker.start()
|
||||
try:
|
||||
assert ready.wait(timeout=10), "binder worker never signaled ready"
|
||||
# Tiny settle so lsof's cache reflects the bind.
|
||||
time.sleep(0.2)
|
||||
|
||||
result = check_g_no_dup_binders()
|
||||
|
||||
assert result.passed is True, (
|
||||
f"single-binder scenario should PASS; got detail={result.detail!r}"
|
||||
)
|
||||
assert "1 binder" in result.detail, f"unexpected detail: {result.detail!r}"
|
||||
finally:
|
||||
exit_signal.set()
|
||||
worker.join(timeout=5)
|
||||
if worker.is_alive():
|
||||
worker.terminate()
|
||||
worker.join(timeout=2)
|
||||
|
||||
|
||||
def test_check_g_two_binders_fails(short_socket_path):
|
||||
"""F: TWO binders bound to the same socket path → check_g returns FAIL.
|
||||
|
||||
REGRESSION-TRAP CENTERPIECE. Spawns 2 multiprocessing workers, each
|
||||
binding to the same socket path with an unlink between them so both
|
||||
bind() calls succeed at the OS level. lsof reports both PIDs as
|
||||
holding the path; check_g detects the singleton-invariant violation.
|
||||
|
||||
This is exactly the failure mode Phase 7.1's launchd architecture
|
||||
structurally prevents in production — the test bypasses launchd by
|
||||
hand-binding sockets in worker processes. On post-Phase 7.1 production,
|
||||
this scenario can only occur if a user manually bypasses launchd.
|
||||
"""
|
||||
from iai_mcp.doctor import _extract_binder_pids, check_g_no_dup_binders
|
||||
|
||||
# NOTE: use 'spawn' (not 'fork') even on Darwin — lancedb is not fork-safe
|
||||
# (UserWarning surfaces with fork on macOS). Workers don't touch lancedb,
|
||||
# but the parent test process has it imported transitively; spawn isolates.
|
||||
ctx = mp.get_context("spawn")
|
||||
|
||||
# Worker 1
|
||||
ready1 = ctx.Event()
|
||||
exit1 = ctx.Event()
|
||||
w1 = ctx.Process(
|
||||
target=_bind_socket_worker,
|
||||
args=(str(short_socket_path), ready1, exit1),
|
||||
)
|
||||
w1.start()
|
||||
|
||||
# Worker 2 — race-window simulation: unlink the path so worker 2's bind()
|
||||
# creates a fresh inode at the same name. Worker 1's fd still holds the
|
||||
# ORIGINAL inode (unlinked but kept alive by the open fd); worker 2 holds
|
||||
# the NEW inode at the same path. lsof reports both PIDs.
|
||||
ready2 = ctx.Event()
|
||||
exit2 = ctx.Event()
|
||||
w2 = None
|
||||
try:
|
||||
assert ready1.wait(timeout=10), "worker 1 never signaled ready"
|
||||
# Unlink so the second bind doesn't EADDRINUSE.
|
||||
try:
|
||||
short_socket_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
w2 = ctx.Process(
|
||||
target=_bind_socket_worker,
|
||||
args=(str(short_socket_path), ready2, exit2),
|
||||
)
|
||||
w2.start()
|
||||
assert ready2.wait(timeout=10), "worker 2 never signaled ready"
|
||||
time.sleep(0.3) # let lsof catch up
|
||||
|
||||
# Belt-and-suspenders: confirm via the parser directly that lsof sees both.
|
||||
lsof_out = subprocess.run(
|
||||
["lsof", "-U", "-F", "pn"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
check=False,
|
||||
).stdout
|
||||
binder_pids = _extract_binder_pids(lsof_out, short_socket_path)
|
||||
assert {w1.pid, w2.pid}.issubset(binder_pids), (
|
||||
f"lsof should report both worker PIDs as binders; got {binder_pids} "
|
||||
f"(workers: {w1.pid}, {w2.pid})"
|
||||
)
|
||||
|
||||
# Centerpiece assertion: check_g detects the dup-binder scenario.
|
||||
result = check_g_no_dup_binders()
|
||||
|
||||
assert result.passed is False, (
|
||||
f"two-binder scenario should FAIL; got detail={result.detail!r}"
|
||||
)
|
||||
# Detail mentions both PIDs.
|
||||
assert str(w1.pid) in result.detail, f"detail missing PID {w1.pid}: {result.detail!r}"
|
||||
assert str(w2.pid) in result.detail, f"detail missing PID {w2.pid}: {result.detail!r}"
|
||||
finally:
|
||||
exit1.set()
|
||||
if w2 is not None:
|
||||
exit2.set()
|
||||
for proc in (w1, w2):
|
||||
if proc is None:
|
||||
continue
|
||||
proc.join(timeout=5)
|
||||
if proc.is_alive():
|
||||
proc.terminate()
|
||||
proc.join(timeout=2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Section 3 — _kill_dup_binders + e2e doctor --apply (G, H)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_daemon_paths(tmp_path, monkeypatch):
|
||||
"""HOME + socket + store + crypto env propagation for real-daemon tests.
|
||||
|
||||
Mirrors test_doctor_apply_recovery.py:isolated_daemon_paths verbatim
|
||||
(HIGH-4 LOCK precedent, Plan 07-04). Required because _kill_dup_binders
|
||||
filters by 'iai_mcp.daemon' substring in psutil cmdline — only real
|
||||
iai_mcp.daemon subprocesses are killable, so multiprocessing workers
|
||||
cannot serve Tests G/H.
|
||||
"""
|
||||
iai_dir = tmp_path / ".iai-mcp"
|
||||
iai_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
state_path = iai_dir / ".daemon-state.json"
|
||||
lock_path = iai_dir / ".lock"
|
||||
store_dir = iai_dir / "store"
|
||||
store_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
sock_dir = Path(f"/tmp/iai-mb2-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
|
||||
real_hf_home = Path.home() / ".cache" / "huggingface"
|
||||
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("HF_HOME", str(real_hf_home))
|
||||
monkeypatch.setenv("IAI_DAEMON_SOCKET_PATH", str(sock_path))
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(store_dir))
|
||||
monkeypatch.setenv("IAI_DAEMON_IDLE_SHUTDOWN_SECS", "99999")
|
||||
monkeypatch.setenv(
|
||||
"PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring"
|
||||
)
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-mb-passphrase")
|
||||
import keyring.core
|
||||
|
||||
keyring.core._keyring_backend = None
|
||||
|
||||
from iai_mcp import cli, daemon_state
|
||||
|
||||
monkeypatch.setattr(daemon_state, "STATE_PATH", state_path)
|
||||
monkeypatch.setattr(cli, "LOCK_PATH", lock_path)
|
||||
monkeypatch.setattr(cli, "SOCKET_PATH", sock_path)
|
||||
|
||||
try:
|
||||
yield sock_path, state_path, store_dir, lock_path
|
||||
finally:
|
||||
_kill_test_daemons(sock_path)
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
keyring.core._keyring_backend = None
|
||||
|
||||
|
||||
def _spawn_daemon(sock_path: Path, store_dir: Path, home: Path) -> subprocess.Popen:
|
||||
"""Spawn `python -m iai_mcp.daemon` with the test's env propagated."""
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = str(home)
|
||||
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
|
||||
env["IAI_MCP_STORE"] = str(store_dir)
|
||||
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "99999"
|
||||
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
|
||||
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-mb-passphrase"
|
||||
return subprocess.Popen(
|
||||
[sys.executable, "-m", "iai_mcp.daemon"],
|
||||
env=env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
def _wait_for_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool:
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
while time.monotonic() < deadline:
|
||||
if sock_path.exists():
|
||||
return True
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
def _kill_test_daemons(sock_path: Path) -> None:
|
||||
"""Match-by-env cleanup: SIGTERM iai_mcp.daemon subprocesses whose
|
||||
psutil environ has our IAI_DAEMON_SOCKET_PATH value. Avoids touching
|
||||
the user's real production daemon.
|
||||
"""
|
||||
target = str(sock_path)
|
||||
for p in psutil.process_iter(["pid", "cmdline"]):
|
||||
try:
|
||||
cl = " ".join(p.info.get("cmdline") or [])
|
||||
if "iai_mcp.daemon" not in cl:
|
||||
continue
|
||||
try:
|
||||
env = p.environ()
|
||||
except (psutil.AccessDenied, psutil.NoSuchProcess):
|
||||
continue
|
||||
if env.get("IAI_DAEMON_SOCKET_PATH") == target:
|
||||
try:
|
||||
p.send_signal(signal.SIGTERM)
|
||||
p.wait(timeout=3)
|
||||
except (psutil.NoSuchProcess, psutil.TimeoutExpired):
|
||||
try:
|
||||
p.send_signal(signal.SIGKILL)
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
|
||||
|
||||
def _spawn_dup_daemons(
|
||||
sock_path: Path, store_dir: Path, home: Path
|
||||
) -> tuple[subprocess.Popen, subprocess.Popen]:
|
||||
"""Spawn 2 real iai_mcp.daemon subprocesses both bound to sock_path.
|
||||
|
||||
Race-window simulation per CONTEXT.md hint: spawn daemon #1, wait for
|
||||
socket, unlink (so daemon #2 can bind a fresh inode at the same path),
|
||||
spawn daemon #2, wait for socket. Daemon #1's listening fd still holds
|
||||
the original (now unlinked) inode; daemon #2 holds the new inode. lsof
|
||||
reports both PIDs as binders of the same path.
|
||||
"""
|
||||
p1 = _spawn_daemon(sock_path, store_dir, home)
|
||||
if not _wait_for_socket(sock_path, timeout_sec=30):
|
||||
try:
|
||||
p1.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
raise AssertionError("daemon #1 never bound socket within 30s")
|
||||
|
||||
# Race-window: unlink so daemon #2's bind() succeeds without EADDRINUSE.
|
||||
try:
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
p2 = _spawn_daemon(sock_path, store_dir, home)
|
||||
if not _wait_for_socket(sock_path, timeout_sec=30):
|
||||
try:
|
||||
p2.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
try:
|
||||
p1.kill()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
raise AssertionError("daemon #2 never bound socket within 30s")
|
||||
|
||||
# Settle so lsof reflects both binders.
|
||||
time.sleep(0.5)
|
||||
return p1, p2
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"Phase 10.6 Plan 10.6-01 Task 1.5: single-machine "
|
||||
"LifecycleLock prevents two daemons from both binding the same "
|
||||
"IAI_MCP_STORE. Daemon #2 raises LifecycleLockConflict and exits "
|
||||
"1 before bind. The dup-binder integration scenario is now "
|
||||
"impossible by design. The unit tests in this file "
|
||||
"(test_extract_binder_pids_*, test_check_g_*) still cover "
|
||||
"check_g's detection logic without spawning two real daemons."
|
||||
)
|
||||
)
|
||||
def test_kill_dup_binders_keeps_oldest(isolated_daemon_paths):
|
||||
"""G: 2 real daemons → _kill_dup_binders kills younger, keeps oldest.
|
||||
|
||||
Re-running check_g afterward returns PASS (1 binder remaining).
|
||||
"""
|
||||
from iai_mcp.doctor import (
|
||||
_extract_binder_pids,
|
||||
_kill_dup_binders,
|
||||
check_g_no_dup_binders,
|
||||
)
|
||||
|
||||
sock_path, _, store_dir, _ = isolated_daemon_paths
|
||||
home = Path(os.environ["HOME"])
|
||||
|
||||
p1, p2 = _spawn_dup_daemons(sock_path, store_dir, home)
|
||||
try:
|
||||
# Pre-condition: both daemons must show up as binders for our socket.
|
||||
lsof_out = subprocess.run(
|
||||
["lsof", "-U", "-F", "pn"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
check=False,
|
||||
).stdout
|
||||
binders = _extract_binder_pids(lsof_out, sock_path)
|
||||
assert {p1.pid, p2.pid}.issubset(binders), (
|
||||
f"expected both daemon PIDs in binders; got {binders} "
|
||||
f"(daemons: {p1.pid}, {p2.pid})"
|
||||
)
|
||||
pre_check = check_g_no_dup_binders()
|
||||
assert pre_check.passed is False, (
|
||||
f"pre-condition: dup-binder scenario should FAIL check_g; "
|
||||
f"got {pre_check.detail!r}"
|
||||
)
|
||||
|
||||
# Kill the younger daemon. p1 was spawned first → has greater etime →
|
||||
# is the keep_pid; p2 should be killed.
|
||||
ok, msg, ms = _kill_dup_binders()
|
||||
|
||||
assert ok is True, f"_kill_dup_binders returned ok=False: {msg}"
|
||||
assert "kept PID" in msg, f"msg missing 'kept PID': {msg!r}"
|
||||
assert "killed" in msg, f"msg missing 'killed': {msg!r}"
|
||||
assert ms < 10_000, f"_kill_dup_binders took {ms}ms (>10s); too slow"
|
||||
|
||||
# After kill, a follow-up check_g should report 1 (or 0 — race) binder.
|
||||
post_check = check_g_no_dup_binders()
|
||||
assert post_check.passed is True, (
|
||||
f"post-kill check_g should PASS; got {post_check.detail!r}"
|
||||
)
|
||||
|
||||
# The kept daemon (p1) should still be alive; the other should be dead
|
||||
# within a generous timeout (kill is SIGKILL, instant on macOS).
|
||||
assert p1.poll() is None, "expected oldest daemon (p1) to survive"
|
||||
# Allow up to 2s for SIGKILL signal delivery + reap.
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline and p2.poll() is None:
|
||||
time.sleep(0.1)
|
||||
assert p2.poll() is not None, "expected younger daemon (p2) to be dead"
|
||||
finally:
|
||||
for proc in (p1, p2):
|
||||
if proc.poll() is None:
|
||||
try:
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
proc.wait(timeout=3)
|
||||
except (subprocess.TimeoutExpired, ProcessLookupError):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason=(
|
||||
"Phase 10.6 Plan 10.6-01 Task 1.5: single-machine "
|
||||
"LifecycleLock prevents two daemons from both binding the same "
|
||||
"IAI_MCP_STORE. Daemon #2 raises LifecycleLockConflict and exits "
|
||||
"1 before bind. End-to-end recovery from dup-binders cannot run "
|
||||
"because the dup-binders state is now impossible to construct."
|
||||
)
|
||||
)
|
||||
def test_doctor_apply_yes_recovers_from_dup_binders(isolated_daemon_paths):
|
||||
"""H: end-to-end. 2 dup-binder daemons → cmd_doctor(apply=True, yes=True)
|
||||
drives the kill_dup_binders repair → re-check returns 0 OR exit 2 only
|
||||
if a non-related check (e.g., (a) state desync) FAILs.
|
||||
|
||||
NB: spawning two real daemons against the same socket inevitably leaves
|
||||
daemon-state.json pointing at one of the two PIDs (whichever wrote last).
|
||||
After kill_dup_binders, if the survivor is the one daemon-state recorded,
|
||||
check_a passes; if the survivor is the OTHER daemon, check_a FAILs and the
|
||||
respawn action triggers, which (because the surviving daemon already binds
|
||||
the socket) yields a launchd-react-noop OR a benign respawn-timeout. The
|
||||
relevant assertion for THIS test is the dup-binder repair specifically:
|
||||
after recovery, lsof reports exactly 1 binder for our socket path. The
|
||||
overall rc and check_a status are looser assertions because they depend
|
||||
on the state-file-vs-survivor coincidence.
|
||||
"""
|
||||
from iai_mcp.doctor import (
|
||||
_extract_binder_pids,
|
||||
check_g_no_dup_binders,
|
||||
cmd_doctor,
|
||||
)
|
||||
|
||||
sock_path, _, store_dir, _ = isolated_daemon_paths
|
||||
home = Path(os.environ["HOME"])
|
||||
|
||||
p1, p2 = _spawn_dup_daemons(sock_path, store_dir, home)
|
||||
try:
|
||||
# Sanity: dup-binder is detectable.
|
||||
pre = check_g_no_dup_binders()
|
||||
assert pre.passed is False, f"pre: dup-binder should FAIL; got {pre.detail!r}"
|
||||
|
||||
args = argparse.Namespace(apply=True, yes=True)
|
||||
rc = cmd_doctor(args)
|
||||
|
||||
# The critical observable: dup-binders cleared.
|
||||
post_check = check_g_no_dup_binders()
|
||||
assert post_check.passed is True, (
|
||||
f"post-recovery: check_g should PASS; got {post_check.detail!r}"
|
||||
)
|
||||
# rc may be 0 (everything green) or 2 (only check_a survived as FAIL
|
||||
# because state-file PID points at the killed survivor); both prove
|
||||
# the dup-binder repair mechanism worked. rc=1 would mean --apply
|
||||
# never ran the repair (regression).
|
||||
assert rc in (0, 2), (
|
||||
f"cmd_doctor rc={rc} unexpected; allowed 0 (full recovery) or 2 "
|
||||
f"(dup-binders fixed but state-file desync persists)."
|
||||
)
|
||||
|
||||
# Belt-and-suspenders: lsof confirms exactly 1 binder remains.
|
||||
lsof_out = subprocess.run(
|
||||
["lsof", "-U", "-F", "pn"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
check=False,
|
||||
).stdout
|
||||
binders = _extract_binder_pids(lsof_out, sock_path)
|
||||
assert len(binders) <= 1, (
|
||||
f"after recovery, expected ≤1 binder for {sock_path}; got {binders}"
|
||||
)
|
||||
finally:
|
||||
for proc in (p1, p2):
|
||||
if proc.poll() is None:
|
||||
try:
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
proc.wait(timeout=3)
|
||||
except (subprocess.TimeoutExpired, ProcessLookupError):
|
||||
pass
|
||||
636
tests/test_drain_deferred_captures.py
Normal file
636
tests/test_drain_deferred_captures.py
Normal file
|
|
@ -0,0 +1,636 @@
|
|||
"""Phase 7.1 Plan 06 / R3 closure — `drain_deferred_captures(store)` daemon-side.
|
||||
|
||||
Plan 07.1-05 shipped the WRITE side (`iai-mcp capture-transcript --no-spawn`
|
||||
writes JSONL files to ``~/.iai-mcp/.deferred-captures/`` when the daemon
|
||||
socket is unreachable). This plan ships the READ side: a drain function that
|
||||
the daemon runs at startup AND on every WAKE-from-SLEEP transition, so
|
||||
deferred events get ingested into the episodic tier within seconds of the
|
||||
daemon coming back up.
|
||||
|
||||
End-to-end story this module verifies:
|
||||
user closes 3 sessions while daemon is sleeping
|
||||
→ 3 Stop hooks fire `iai-mcp capture-transcript --no-spawn`
|
||||
→ 3 JSONL deferral files appear under ~/.iai-mcp/.deferred-captures/
|
||||
→ next MCP call socket-activates the daemon (or wake from idle)
|
||||
→ drain runs → all 3 transcripts land in the brain
|
||||
→ ZERO events lost; ZERO new daemons spawned
|
||||
|
||||
NOTE on idle-shutdown (per CONTEXT.md D7-05 inheritance): if the daemon
|
||||
idle-exits cleanly while many hook deferrals accumulate, the deferred-
|
||||
captures directory keeps growing until the NEXT non-hook MCP call
|
||||
socket-activates the daemon. This is by design — eliminating the spawn
|
||||
vector is the whole point. The drain happens whenever the daemon next runs.
|
||||
|
||||
Test layout:
|
||||
A: round-trip — write 3 events → drain → file deleted, store has records
|
||||
B: malformed event line — file renamed to .failed-<ts>, counts.files_failed=1
|
||||
C: forward-compat — version=99 header → file left in place + log entry
|
||||
D: missing dir — drain returns zero counts, no error
|
||||
E: empty file — drain unlinks it, counts unchanged
|
||||
F: multiple files — all 3 processed in glob-sort order, all deleted
|
||||
G: integration — daemon startup with malformed file pre-staged → daemon
|
||||
starts, malformed file is .failed-<ts>, daemon doesn't crash
|
||||
|
||||
Tests A–F are pure-Python unit tests of the drain function (in-process
|
||||
MemoryStore, monkeypatch HOME/keyring). Test G is the integration check —
|
||||
spawns a real `python -m iai_mcp.daemon` subprocess under env-isolation
|
||||
(mirroring `test_doctor_apply_recovery.py:isolated_daemon_paths`) with a
|
||||
malformed JSONL pre-seeded; asserts the daemon binds the socket without
|
||||
crashing AND the malformed file is renamed to .failed-<ts>.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
|
||||
|
||||
REPO = Path(__file__).resolve().parent.parent
|
||||
|
||||
# POSIX-only: AF_UNIX socket + subprocess + Path-based glob semantics.
|
||||
pytestmark = pytest.mark.skipif(
|
||||
platform.system() == "Windows",
|
||||
reason="POSIX subprocess + AF_UNIX socket; HOME isolation pattern",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: HOME + keyring isolation for in-process tests (A–F)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def iai_home(tmp_path, monkeypatch):
|
||||
"""HOME=tmp_path + keyring fail-backend + crypto passphrase.
|
||||
|
||||
The drain function uses ``Path.home()`` to find both
|
||||
``.deferred-captures/`` and ``logs/`` — so HOME monkeypatching
|
||||
isolates from the user's real ~/.iai-mcp/.
|
||||
|
||||
Drain calls ``capture_turn`` which calls ``store.insert()`` which
|
||||
encrypts via ``MemoryStore._key()`` → ``crypto.get_or_create()`` →
|
||||
keyring. Forcing the fail-backend + a passphrase env var sends us
|
||||
down the D-GUARD passphrase fallback so the macOS Security
|
||||
framework's interactive keychain prompt never fires.
|
||||
|
||||
Returns ``tmp_path`` (also reachable via ``Path.home()`` thanks to
|
||||
monkeypatched ``HOME``).
|
||||
"""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring")
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-drain-passphrase")
|
||||
# IAI_MCP_STORE under tmp so a fresh LanceDB is created per test —
|
||||
# avoids cross-test row leakage.
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / ".iai-mcp" / "lancedb"))
|
||||
|
||||
# Force keyring to re-resolve the backend (it caches on first access).
|
||||
import keyring.core
|
||||
|
||||
keyring.core._keyring_backend = None
|
||||
yield tmp_path
|
||||
# Reset post-test so the fail-backend cache doesn't leak.
|
||||
keyring.core._keyring_backend = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers — JSONL fixture builders (D7.1-04 v1 format)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _write_deferred_jsonl(
|
||||
deferred_dir: Path,
|
||||
session_id: str,
|
||||
events: list[dict],
|
||||
*,
|
||||
version: int = 1,
|
||||
ts_suffix: int | None = None,
|
||||
) -> Path:
|
||||
"""Construct a v1 JSONL file under ``deferred_dir`` and return its Path.
|
||||
|
||||
Mirrors the format ``write_deferred_captures`` produces (Plan 07.1-05).
|
||||
Header on line 1; events on lines 2..N.
|
||||
"""
|
||||
deferred_dir.mkdir(parents=True, exist_ok=True)
|
||||
suffix = ts_suffix if ts_suffix is not None else int(time.time())
|
||||
out = deferred_dir / f"{session_id}-{suffix}.jsonl"
|
||||
header = {
|
||||
"version": version,
|
||||
"deferred_at": "2026-04-26T00:00:00Z",
|
||||
"session_id": session_id,
|
||||
"cwd": "/tmp",
|
||||
}
|
||||
lines = [json.dumps(header)] + [json.dumps(e) for e in events]
|
||||
out.write_text("\n".join(lines) + "\n")
|
||||
return out
|
||||
|
||||
|
||||
def _make_event(text: str, role: str = "user") -> dict:
|
||||
return {
|
||||
"text": text,
|
||||
"cue": f"test cue: {text[:24]}",
|
||||
"tier": "episodic",
|
||||
"role": role,
|
||||
"ts": "2026-04-26T00:00:00Z",
|
||||
}
|
||||
|
||||
|
||||
def _open_isolated_store():
|
||||
"""Construct a MemoryStore that respects the iai_home fixture's env.
|
||||
|
||||
Imported lazily because module import touches LanceDB + crypto
|
||||
config; we want the env overrides in place first.
|
||||
"""
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
return MemoryStore()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test A — round-trip: write JSONL → drain → file deleted, store has records
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_consumes_jsonl_and_deletes_file(iai_home):
|
||||
"""The happy path: drain reads a v1 JSONL, captures every event via
|
||||
capture_turn (so encryption + dedup + shield run), and unlinks the file.
|
||||
"""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
events = [
|
||||
_make_event("Alice said: drain test event one — must be at least 12 chars"),
|
||||
_make_event("assistant reply with sufficient length to pass MIN_CAPTURE", role="assistant"),
|
||||
_make_event("third event for the round-trip drain count assertion"),
|
||||
]
|
||||
fpath = _write_deferred_jsonl(deferred_dir, "session-A", events)
|
||||
assert fpath.exists()
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
# W2 / counts schema split four ways per status.
|
||||
assert counts["files_drained"] == 1, counts
|
||||
assert counts["files_failed"] == 0, counts
|
||||
assert counts["events_inserted"] == 3, counts
|
||||
assert counts["events_skipped_insert_failed"] == 0, counts
|
||||
assert not fpath.exists(), "deferred file must be unlinked after drain"
|
||||
|
||||
# Verify the events landed in the records table — count_rows is the
|
||||
# cheapest sanity check that drain actually inserted (capture_turn may
|
||||
# also reinforce/skip depending on dedup; for a fresh store all three
|
||||
# are net-new inserts).
|
||||
n_rows = store.db.open_table("records").count_rows()
|
||||
assert n_rows >= 3, f"expected ≥3 records inserted, got {n_rows}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test B — malformed event line → file renamed to .failed-<ts>, count tallied
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_handles_malformed_event_line(iai_home):
|
||||
"""Per-event JSON-decode failure surfaces as a per-FILE failure: drain
|
||||
catches the exception, renames the offender to .failed-<ts>, logs, and
|
||||
moves on. The original file MUST NOT exist after drain.
|
||||
"""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
deferred_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Hand-craft so we can inject a non-JSON line in the middle.
|
||||
fpath = deferred_dir / "session-B-12345.jsonl"
|
||||
fpath.write_text(
|
||||
json.dumps({
|
||||
"version": 1,
|
||||
"deferred_at": "2026-04-26T00:00:00Z",
|
||||
"session_id": "session-B",
|
||||
"cwd": "/tmp",
|
||||
}) + "\n"
|
||||
+ json.dumps(_make_event("first valid event with adequate length")) + "\n"
|
||||
+ "this line is not valid JSON {{{ broken\n"
|
||||
+ json.dumps(_make_event("never reached because file-level error")) + "\n"
|
||||
)
|
||||
assert fpath.exists()
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
assert counts["files_failed"] == 1, counts
|
||||
assert counts["files_drained"] == 0, counts
|
||||
# Original gone, .failed-<ts>.jsonl present (via with_suffix replacement).
|
||||
assert not fpath.exists(), "original must be renamed away on per-file error"
|
||||
failed = list(deferred_dir.glob("session-B-12345.failed-*.jsonl"))
|
||||
assert len(failed) == 1, f"expected exactly 1 .failed-* file, got {failed}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test C — forward-compat: version > 1 → file left intact, log entry written
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_skips_future_version(iai_home):
|
||||
"""A future-version header (version=99) is left in place so a newer
|
||||
daemon can handle it. Drain logs a "skip" line for forensic visibility.
|
||||
"""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
fpath = _write_deferred_jsonl(
|
||||
deferred_dir,
|
||||
"session-C",
|
||||
[_make_event("event from a future format version that we cannot parse")],
|
||||
version=99,
|
||||
)
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
# W2 / counts schema split four ways per status.
|
||||
assert counts["files_drained"] == 0, counts
|
||||
assert counts["files_failed"] == 0, counts
|
||||
assert counts["events_inserted"] == 0, counts
|
||||
assert counts["events_skipped_insert_failed"] == 0, counts
|
||||
assert fpath.exists(), "version>1 file must remain for a future daemon to handle"
|
||||
# No .failed-* either.
|
||||
assert not list(deferred_dir.glob("*.failed-*.jsonl"))
|
||||
|
||||
# Log line should mention the file basename + version.
|
||||
log_dir = iai_home / ".iai-mcp" / "logs"
|
||||
log_files = list(log_dir.glob("deferred-drain-*.log"))
|
||||
assert log_files, "drain must create a log file when it skips a future version"
|
||||
log_content = log_files[0].read_text()
|
||||
assert "skip" in log_content
|
||||
assert "session-C" in log_content
|
||||
assert "version=99" in log_content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test D — no deferred dir → drain returns zero counts, no error
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_no_deferred_dir(iai_home):
|
||||
"""Cold-boot path: ~/.iai-mcp/.deferred-captures/ doesn't exist yet.
|
||||
Drain must return zero counts cleanly without trying to mkdir or raise.
|
||||
"""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
assert not deferred_dir.exists()
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
# W2 / counts schema split four ways per status.
|
||||
assert counts["files_drained"] == 0, counts
|
||||
assert counts["files_failed"] == 0, counts
|
||||
assert counts["events_inserted"] == 0, counts
|
||||
assert counts["events_skipped_insert_failed"] == 0, counts
|
||||
# Drain MUST NOT auto-create the deferred dir — only the writer creates it.
|
||||
assert not deferred_dir.exists(), "drain should not create .deferred-captures/"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test E — empty (0-byte) file → drain unlinks it, counts unchanged
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_empty_jsonl(iai_home):
|
||||
"""A 0-byte deferral file (e.g. from a writer that crashed before any
|
||||
line landed) is unlinked silently — no insert, no failure, no log.
|
||||
"""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
deferred_dir.mkdir(parents=True, exist_ok=True)
|
||||
fpath = deferred_dir / "session-E-empty.jsonl"
|
||||
fpath.write_text("") # 0 bytes
|
||||
assert fpath.exists()
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
# W2 / counts schema split four ways per status.
|
||||
assert counts["files_drained"] == 0, counts
|
||||
assert counts["files_failed"] == 0, counts
|
||||
assert counts["events_inserted"] == 0, counts
|
||||
assert counts["events_skipped_insert_failed"] == 0, counts
|
||||
assert not fpath.exists(), "0-byte file must be unlinked"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test F — multiple files processed in glob-sort order, all deleted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_multiple_files_processed_in_order(iai_home):
|
||||
"""Three deferral files (sorted by name = sorted by unix_ts within a
|
||||
single session) are all drained in one pass. Counts aggregate correctly.
|
||||
"""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
# NOTE: 07.11-01 Rule 1 deviation -- before Plan 07.11-01 these three
|
||||
# lexically-near cues all looked unique because the dedup branch in
|
||||
# capture_turn was unreachable dead code (Bugs A/B/C). After the dedup
|
||||
# fix, bge-small-en-v1.5 places "test cue: event from file 0/1/2" above
|
||||
# the 0.95 cosine threshold and the second + third capture get correctly
|
||||
# de-duplicated -> events_inserted=1, events_reinforced=2.
|
||||
# The fix is to give each event a SEMANTICALLY divergent topic so cosine
|
||||
# genuinely separates them (matches the divergence pattern in
|
||||
# tests/test_capture_dedup_contract.py::test_capture_turn_inserts_on_low_cos).
|
||||
distinct_texts = [
|
||||
"apples are red and grow on trees in orchards across the world",
|
||||
"quantum chromodynamics describes the strong nuclear force precisely",
|
||||
"hummingbirds beat their wings about eighty times per second in flight",
|
||||
]
|
||||
paths = []
|
||||
for i, base_ts in enumerate([1000, 2000, 3000]):
|
||||
events = [_make_event(distinct_texts[i])]
|
||||
paths.append(
|
||||
_write_deferred_jsonl(
|
||||
deferred_dir, f"session-F-{i}", events, ts_suffix=base_ts,
|
||||
)
|
||||
)
|
||||
assert all(p.exists() for p in paths)
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
# W2 / counts schema split four ways per status.
|
||||
assert counts["files_drained"] == 3, counts
|
||||
assert counts["events_inserted"] == 3, counts
|
||||
assert counts["events_skipped_insert_failed"] == 0, counts
|
||||
assert counts["files_failed"] == 0, counts
|
||||
for p in paths:
|
||||
assert not p.exists(), f"{p} must be unlinked after drain"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test H — W2 / per-event insert failure preserves the file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_partial_insert_failure_preserves_file(iai_home, monkeypatch):
|
||||
"""W2 / when ANY event in a file returns status=skipped reason=
|
||||
insert-failed:* (capture_turn swallowed a store.insert exception), the
|
||||
drain MUST rename the file to .failed-<ts>.jsonl and NOT unlink it.
|
||||
Pre-07.9 the file was deleted with the events permanently lost."""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
|
||||
# File with three events: good, poison-sentinel (will fail insert), good.
|
||||
fpath = _write_deferred_jsonl(
|
||||
deferred_dir,
|
||||
"session-H",
|
||||
[
|
||||
_make_event("first good event with adequate length here"),
|
||||
_make_event("INSERT_FAIL_SENTINEL_07_9 — this event triggers a failure"),
|
||||
_make_event("third good event after the failing one in the middle"),
|
||||
],
|
||||
ts_suffix=42,
|
||||
)
|
||||
assert fpath.exists()
|
||||
|
||||
# Patch MemoryStore.insert to raise when literal_surface contains the
|
||||
# sentinel string. This drives capture_turn into its insert-failed
|
||||
# return path (capture.py:169-171).
|
||||
real_insert = MemoryStore.insert
|
||||
|
||||
def insert_or_fail(self, rec):
|
||||
if "INSERT_FAIL_SENTINEL_07_9" in rec.literal_surface:
|
||||
raise RuntimeError("simulated lance write failure")
|
||||
return real_insert(self, rec)
|
||||
|
||||
monkeypatch.setattr(MemoryStore, "insert", insert_or_fail)
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
# File NOT unlinked — renamed to .failed-<ts>.jsonl, evidence preserved.
|
||||
assert not fpath.exists(), "original file must be renamed when any insert fails"
|
||||
failed_files = list(deferred_dir.glob("session-H-42.failed-*.jsonl"))
|
||||
assert len(failed_files) == 1, (
|
||||
f"expected 1 .failed-* file; got {failed_files} "
|
||||
f"(deferred_dir contents: {list(deferred_dir.iterdir())})"
|
||||
)
|
||||
|
||||
# Counts split four ways: 2 inserted (good ones), 1 insert-failed
|
||||
# (the sentinel), file marked failed (not drained).
|
||||
assert counts["events_inserted"] == 2, counts
|
||||
assert counts["events_skipped_insert_failed"] == 1, counts
|
||||
assert counts["events_skipped_intentional"] == 0, counts
|
||||
assert counts["files_drained"] == 0, counts
|
||||
assert counts["files_failed"] == 1, counts
|
||||
|
||||
# Log carries the insert-failed marker and the first error reason.
|
||||
log_dir = iai_home / ".iai-mcp" / "logs"
|
||||
log_files = list(log_dir.glob("deferred-drain-*.log"))
|
||||
assert log_files, "log file must record the insert-failed event"
|
||||
log_content = log_files[0].read_text()
|
||||
assert "insert-failed" in log_content
|
||||
assert "session-H" in log_content
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test I — W2 / intentional skips do NOT fail the file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_drain_intentional_skip_does_not_fail_file(iai_home):
|
||||
"""W2 / an event whose text is too short returns status=skipped
|
||||
reason='too short' — that's an INTENTIONAL skip, not an insert
|
||||
failure. The file must be unlinked normally; counts.files_failed=0;
|
||||
counts.events_skipped_intentional incremented."""
|
||||
from iai_mcp.capture import drain_deferred_captures
|
||||
|
||||
deferred_dir = iai_home / ".iai-mcp" / ".deferred-captures"
|
||||
fpath = _write_deferred_jsonl(
|
||||
deferred_dir,
|
||||
"session-I",
|
||||
[
|
||||
_make_event("ok this is a long enough event for the min-length gate"),
|
||||
# Too short event: will return status=skipped reason="too short".
|
||||
{"cue": "x", "text": "tiny", "tier": "episodic", "role": "user",
|
||||
"ts": "2026-04-26T00:00:00Z"},
|
||||
],
|
||||
ts_suffix=43,
|
||||
)
|
||||
assert fpath.exists()
|
||||
|
||||
store = _open_isolated_store()
|
||||
counts = drain_deferred_captures(store)
|
||||
|
||||
# File unlinked: intentional skips DO NOT mark a file as failed.
|
||||
assert not fpath.exists()
|
||||
assert list(deferred_dir.glob("*.failed-*.jsonl")) == []
|
||||
assert counts["files_drained"] == 1, counts
|
||||
assert counts["files_failed"] == 0, counts
|
||||
assert counts["events_inserted"] == 1, counts
|
||||
assert counts["events_skipped_intentional"] == 1, counts
|
||||
assert counts["events_skipped_insert_failed"] == 0, counts
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test G — integration: daemon startup with malformed file → daemon stays up,
|
||||
# file is renamed to .failed-<ts>
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Mirror test_doctor_apply_recovery.py:isolated_daemon_paths so the spawned
|
||||
# daemon writes its state + LanceDB + logs under tmp_path. Crucially this
|
||||
# also propagates HF_HOME so the daemon's prewarm step (bge-small load)
|
||||
# reuses the user's already-cached model and prewarm completes in <1s
|
||||
# instead of trying to download from HuggingFace under an empty tmp HOME.
|
||||
|
||||
|
||||
def _spawn_daemon(sock_path: Path, store_dir: Path, home: Path) -> subprocess.Popen:
|
||||
"""Spawn `python -m iai_mcp.daemon` with full env-isolation."""
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = str(home)
|
||||
env["IAI_DAEMON_SOCKET_PATH"] = str(sock_path)
|
||||
env["IAI_MCP_STORE"] = str(store_dir)
|
||||
env["IAI_DAEMON_IDLE_SHUTDOWN_SECS"] = "99999"
|
||||
# Reuse user's HF cache so bge-small doesn't redownload (pattern from
|
||||
# test_doctor_apply_recovery.py:69-89).
|
||||
env["HF_HOME"] = str(Path.home() / ".cache" / "huggingface")
|
||||
# Force keyring fail-backend → passphrase fallback in the daemon
|
||||
# subprocess (otherwise macOS Security framework prompts interactively).
|
||||
env["PYTHON_KEYRING_BACKEND"] = "keyring.backends.fail.Keyring"
|
||||
env["IAI_MCP_CRYPTO_PASSPHRASE"] = "test-drain-integration-pass"
|
||||
return subprocess.Popen(
|
||||
[sys.executable, "-m", "iai_mcp.daemon"],
|
||||
env=env,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
def _wait_for_socket(sock_path: Path, timeout_sec: float = 30.0) -> bool:
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
while time.monotonic() < deadline:
|
||||
if sock_path.exists():
|
||||
return True
|
||||
time.sleep(0.1)
|
||||
return False
|
||||
|
||||
|
||||
def _kill_daemon_by_socket(sock_path: Path) -> None:
|
||||
"""Match-by-env cleanup so we never touch the user's real daemon."""
|
||||
target = str(sock_path)
|
||||
for p in psutil.process_iter(["pid", "cmdline"]):
|
||||
try:
|
||||
cl = " ".join(p.info.get("cmdline") or [])
|
||||
if "iai_mcp.daemon" not in cl:
|
||||
continue
|
||||
try:
|
||||
env = p.environ()
|
||||
except (psutil.AccessDenied, psutil.NoSuchProcess):
|
||||
continue
|
||||
if env.get("IAI_DAEMON_SOCKET_PATH") == target:
|
||||
try:
|
||||
p.send_signal(signal.SIGTERM)
|
||||
p.wait(timeout=3)
|
||||
except (psutil.NoSuchProcess, psutil.TimeoutExpired):
|
||||
try:
|
||||
p.send_signal(signal.SIGKILL)
|
||||
except psutil.NoSuchProcess:
|
||||
pass
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
|
||||
|
||||
def test_daemon_main_drain_does_not_crash_on_bad_file(tmp_path, monkeypatch):
|
||||
"""Pre-seed a malformed JSONL under .deferred-captures/ → spawn daemon.
|
||||
Daemon must (a) bind socket and stay alive, (b) rename the bad file to
|
||||
.failed-<ts>.jsonl. Confirms startup-drain's per-file try/except shields
|
||||
daemon main from a malformed input.
|
||||
"""
|
||||
# Build the same env scaffolding as _spawn_daemon, applied to in-process
|
||||
# too so any pre-seed Path.home() lookups resolve to tmp_path.
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("HF_HOME", str(Path.home() / ".cache" / "huggingface"))
|
||||
monkeypatch.setenv("PYTHON_KEYRING_BACKEND", "keyring.backends.fail.Keyring")
|
||||
monkeypatch.setenv("IAI_MCP_CRYPTO_PASSPHRASE", "test-drain-integration-pass")
|
||||
|
||||
iai_dir = tmp_path / ".iai-mcp"
|
||||
iai_dir.mkdir(parents=True, exist_ok=True)
|
||||
store_dir = iai_dir / "lancedb"
|
||||
store_dir.mkdir(parents=True, exist_ok=True)
|
||||
deferred_dir = iai_dir / ".deferred-captures"
|
||||
deferred_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Pre-seed a malformed file BEFORE the daemon spawns.
|
||||
bad = deferred_dir / "session-G-99999.jsonl"
|
||||
bad.write_text(
|
||||
json.dumps({"version": 1, "session_id": "session-G",
|
||||
"deferred_at": "2026-04-26T00:00:00Z", "cwd": "/tmp"}) + "\n"
|
||||
+ "totally not JSON ===invalid===\n"
|
||||
)
|
||||
assert bad.exists()
|
||||
|
||||
# Short socket path (macOS AF_UNIX 104-byte cap).
|
||||
sock_dir = Path(f"/tmp/iai-drn-{os.getpid()}-{id(tmp_path)}")
|
||||
sock_dir.mkdir(parents=True, exist_ok=True)
|
||||
sock_path = sock_dir / "d.sock"
|
||||
|
||||
proc = None
|
||||
try:
|
||||
proc = _spawn_daemon(
|
||||
sock_path, store_dir, home=Path(os.environ["HOME"])
|
||||
)
|
||||
assert _wait_for_socket(sock_path, timeout_sec=30), (
|
||||
f"daemon never bound socket within 30s; pid={proc.pid} "
|
||||
f"poll_status={proc.poll()}"
|
||||
)
|
||||
|
||||
# Brief settle for startup-drain to run (asyncio.to_thread
|
||||
# immediately after daemon_started write_event).
|
||||
time.sleep(2.0)
|
||||
|
||||
# Daemon process MUST still be alive (drain didn't crash it).
|
||||
assert proc.poll() is None, (
|
||||
f"daemon exited unexpectedly with code {proc.returncode} — "
|
||||
f"startup-drain probably propagated an exception"
|
||||
)
|
||||
|
||||
# Bad file MUST be renamed to .failed-<ts>.jsonl.
|
||||
assert not bad.exists(), (
|
||||
"malformed file should have been renamed away by drain"
|
||||
)
|
||||
failed = list(deferred_dir.glob("session-G-99999.failed-*.jsonl"))
|
||||
assert len(failed) == 1, (
|
||||
f"expected exactly 1 .failed-* file, got {failed}"
|
||||
)
|
||||
finally:
|
||||
if proc is not None and proc.poll() is None:
|
||||
proc.send_signal(signal.SIGTERM)
|
||||
try:
|
||||
proc.wait(timeout=10)
|
||||
except subprocess.TimeoutExpired:
|
||||
proc.send_signal(signal.SIGKILL)
|
||||
proc.wait(timeout=3)
|
||||
_kill_daemon_by_socket(sock_path)
|
||||
try:
|
||||
if sock_path.exists():
|
||||
sock_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
sock_dir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
# Reset keyring cache.
|
||||
import keyring.core
|
||||
keyring.core._keyring_backend = None
|
||||
373
tests/test_dream.py
Normal file
373
tests/test_dream.py
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
"""Tests for iai_mcp.dream -- Task 1.
|
||||
|
||||
Covers 9 behaviours from the plan:
|
||||
1. run_rem_cycle calls sleep.run_heavy_consolidation with SleepConfig(llm_enabled=False)
|
||||
and has_api_key=False.
|
||||
2. run_rem_cycle calls schema.induce_schemas_tier1 with llm_enabled=False (Tier-0).
|
||||
3. Non-last cycle does NOT invoke insight.generate_overnight_insight even if
|
||||
claude_enabled=True.
|
||||
4. Last cycle WITH claude_enabled=True invokes insight.generate_overnight_insight
|
||||
and surfaces text into result.
|
||||
5. Last cycle with claude_enabled=False does NOT invoke insight.
|
||||
6. rem_cycle_started + rem_cycle_completed events emitted.
|
||||
7. 15min cap enforced via asyncio.timeout; emits rem_cycle_timeout and returns
|
||||
timed_out=True.
|
||||
8. Exception inside run_heavy_consolidation is caught; rem_cycle_error event
|
||||
emitted; function returns a partial result dict (daemon never dies).
|
||||
9. literal preservation -- no daemon-side code path mutates
|
||||
MemoryRecord.literal_surface during a cycle (static assertion on dream.py).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# helpers: lightweight store stub + event capture
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _EventLog:
|
||||
"""In-memory capture of write_event calls for test assertions."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.events: list[tuple[str, dict, str | None]] = []
|
||||
|
||||
def capture(self, store, kind, data, *, severity=None, **kwargs):
|
||||
self.events.append((kind, dict(data), severity))
|
||||
return None
|
||||
|
||||
def kinds(self) -> list[str]:
|
||||
return [k for (k, _d, _s) in self.events]
|
||||
|
||||
|
||||
def _fresh_store(tmp_path, monkeypatch):
|
||||
"""Minimal MemoryStore tied to a tmp path (pattern reused from tests)."""
|
||||
monkeypatch.setenv("IAI_MCP_STORE", str(tmp_path / "iai"))
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_DIM", "384")
|
||||
from iai_mcp.store import MemoryStore
|
||||
return MemoryStore()
|
||||
|
||||
|
||||
def _install_stubs(
|
||||
monkeypatch,
|
||||
*,
|
||||
heavy_return=None,
|
||||
heavy_raises=None,
|
||||
heavy_sleep_sec: float | None = None,
|
||||
candidates_return=None,
|
||||
insight_return=None,
|
||||
event_log: _EventLog | None = None,
|
||||
):
|
||||
"""Monkeypatch the three external callables dream.run_rem_cycle invokes.
|
||||
|
||||
Returns the (heavy_calls, schema_calls, insight_calls) recorders.
|
||||
"""
|
||||
heavy_calls: list[tuple] = []
|
||||
schema_calls: list[tuple] = []
|
||||
insight_calls: list[tuple] = []
|
||||
|
||||
def fake_heavy(store, session_id, cfg, budget, rate, has_api_key):
|
||||
heavy_calls.append((session_id, cfg, has_api_key))
|
||||
if heavy_sleep_sec is not None:
|
||||
time.sleep(heavy_sleep_sec)
|
||||
if heavy_raises is not None:
|
||||
raise heavy_raises
|
||||
return heavy_return if heavy_return is not None else {
|
||||
"mode": "heavy", "tier": "tier0",
|
||||
"summaries_created": 3, "schemas_induced": 1,
|
||||
"decay_result": {"decayed": 0, "pruned": 0},
|
||||
"schema_candidates": [],
|
||||
}
|
||||
|
||||
def fake_induce(store, budget, rate, llm_enabled):
|
||||
schema_calls.append((llm_enabled,))
|
||||
return candidates_return if candidates_return is not None else []
|
||||
|
||||
async def fake_insight(store, session_id):
|
||||
insight_calls.append((session_id,))
|
||||
return insight_return if insight_return is not None else {
|
||||
"ok": True, "text": "test insight"
|
||||
}
|
||||
|
||||
monkeypatch.setattr("iai_mcp.dream.run_heavy_consolidation", fake_heavy)
|
||||
monkeypatch.setattr("iai_mcp.dream.induce_schemas_tier1", fake_induce)
|
||||
monkeypatch.setattr("iai_mcp.insight.generate_overnight_insight", fake_insight)
|
||||
|
||||
if event_log is not None:
|
||||
monkeypatch.setattr("iai_mcp.dream.write_event", event_log.capture)
|
||||
|
||||
# Stub BudgetLedger / RateLimitLedger ctors so a bare store object works.
|
||||
class _NoOp:
|
||||
def __init__(self, *a, **kw):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr("iai_mcp.dream.BudgetLedger", _NoOp)
|
||||
monkeypatch.setattr("iai_mcp.dream.RateLimitLedger", _NoOp)
|
||||
|
||||
return heavy_calls, schema_calls, insight_calls
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: heavy consolidation called with llm_enabled=False + has_api_key=False
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_rem_cycle_invokes_heavy(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
event_log = _EventLog()
|
||||
heavy_calls, _schema_calls, _insight_calls = _install_stubs(
|
||||
monkeypatch, event_log=event_log,
|
||||
)
|
||||
|
||||
store = object() # dream.py never touches store directly; stubs handle it.
|
||||
|
||||
async def runner():
|
||||
return await dream.run_rem_cycle(
|
||||
store, 1, 4, "sess-X",
|
||||
is_last=False, claude_enabled=False,
|
||||
)
|
||||
|
||||
result = asyncio.run(runner())
|
||||
|
||||
assert len(heavy_calls) == 1, "run_heavy_consolidation not called"
|
||||
session_id, cfg, has_api_key = heavy_calls[0]
|
||||
assert session_id == "sess-X"
|
||||
assert has_api_key is False, "daemon must pass has_api_key=False"
|
||||
assert getattr(cfg, "llm_enabled", None) is False, "llm_enabled must be False"
|
||||
|
||||
# The heavy result stub returns summaries_created=3.
|
||||
assert result["summaries_created"] == 3
|
||||
assert result["timed_out"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: Tier-0 schema induction (llm_enabled=False)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_rem_cycle_invokes_tier0_induction(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
event_log = _EventLog()
|
||||
_h, schema_calls, _i = _install_stubs(
|
||||
monkeypatch, event_log=event_log,
|
||||
candidates_return=[{"pattern": "foo"}, {"pattern": "bar"}],
|
||||
)
|
||||
|
||||
store = object()
|
||||
|
||||
async def runner():
|
||||
return await dream.run_rem_cycle(
|
||||
store, 2, 4, "sess-Y",
|
||||
is_last=False, claude_enabled=False,
|
||||
)
|
||||
|
||||
result = asyncio.run(runner())
|
||||
|
||||
assert len(schema_calls) == 1, "induce_schemas_tier1 not called"
|
||||
(llm_enabled,) = schema_calls[0]
|
||||
assert llm_enabled is False, "Tier-0 path requires llm_enabled=False"
|
||||
assert result["schema_candidates"] == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: non-last cycle with claude_enabled=True does NOT invoke insight
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_non_last_cycle_does_not_invoke_insight(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
event_log = _EventLog()
|
||||
_h, _s, insight_calls = _install_stubs(
|
||||
monkeypatch, event_log=event_log,
|
||||
)
|
||||
|
||||
store = object()
|
||||
|
||||
async def runner():
|
||||
return await dream.run_rem_cycle(
|
||||
store, 2, 4, "sess-Y",
|
||||
is_last=False, claude_enabled=True,
|
||||
)
|
||||
|
||||
result = asyncio.run(runner())
|
||||
|
||||
assert insight_calls == [], "insight called on non-last cycle (D-08 violation)"
|
||||
assert result["claude_call_used"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: last cycle with claude_enabled=True invokes insight and surfaces text
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_last_cycle_triggers_insight(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
event_log = _EventLog()
|
||||
_h, _s, insight_calls = _install_stubs(
|
||||
monkeypatch, event_log=event_log,
|
||||
insight_return={"ok": True, "text": "unified insight about patterns"},
|
||||
)
|
||||
|
||||
store = object()
|
||||
|
||||
async def runner():
|
||||
return await dream.run_rem_cycle(
|
||||
store, 4, 4, "sess-Z",
|
||||
is_last=True, claude_enabled=True,
|
||||
)
|
||||
|
||||
result = asyncio.run(runner())
|
||||
|
||||
assert len(insight_calls) == 1, "last cycle must invoke insight"
|
||||
assert insight_calls[0] == ("sess-Z",)
|
||||
assert result["claude_call_used"] is True
|
||||
assert result["main_insight_text"] == "unified insight about patterns"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: last cycle with claude_enabled=False does NOT invoke insight
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_last_cycle_respects_host_disabled(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
event_log = _EventLog()
|
||||
_h, _s, insight_calls = _install_stubs(
|
||||
monkeypatch, event_log=event_log,
|
||||
)
|
||||
|
||||
store = object()
|
||||
|
||||
async def runner():
|
||||
return await dream.run_rem_cycle(
|
||||
store, 4, 4, "sess-W",
|
||||
is_last=True, claude_enabled=False,
|
||||
)
|
||||
|
||||
result = asyncio.run(runner())
|
||||
|
||||
assert insight_calls == [], "claude_enabled=False must gate insight call"
|
||||
assert result["claude_call_used"] is False
|
||||
assert result["main_insight_text"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 6: rem_cycle_started + rem_cycle_completed events emitted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_cycle_start_and_completed_events(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
event_log = _EventLog()
|
||||
_install_stubs(monkeypatch, event_log=event_log)
|
||||
|
||||
store = object()
|
||||
|
||||
async def runner():
|
||||
return await dream.run_rem_cycle(
|
||||
store, 1, 4, "sess-E",
|
||||
is_last=False, claude_enabled=False,
|
||||
)
|
||||
|
||||
asyncio.run(runner())
|
||||
|
||||
kinds = event_log.kinds()
|
||||
assert "rem_cycle_started" in kinds
|
||||
assert "rem_cycle_completed" in kinds
|
||||
assert kinds.index("rem_cycle_started") < kinds.index("rem_cycle_completed")
|
||||
|
||||
# rem_cycle_started payload shape
|
||||
started = next(e for e in event_log.events if e[0] == "rem_cycle_started")
|
||||
assert started[1] == {"n": 1, "of": 4}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 7: 15min cap enforced; timeout emits rem_cycle_timeout, timed_out=True
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_rem_cycle_respects_15min_cap(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
# Shrink the cap so the test is fast; make run_heavy_consolidation slow
|
||||
# enough (sleep 0.3s) to trigger the timeout.
|
||||
monkeypatch.setattr(dream, "REM_CYCLE_MAX_SEC", 0.1)
|
||||
|
||||
event_log = _EventLog()
|
||||
_install_stubs(
|
||||
monkeypatch, event_log=event_log,
|
||||
heavy_sleep_sec=0.3,
|
||||
)
|
||||
|
||||
store = object()
|
||||
|
||||
async def runner():
|
||||
return await dream.run_rem_cycle(
|
||||
store, 3, 4, "sess-T",
|
||||
is_last=False, claude_enabled=False,
|
||||
)
|
||||
|
||||
result = asyncio.run(runner())
|
||||
|
||||
assert result["timed_out"] is True
|
||||
kinds = event_log.kinds()
|
||||
assert "rem_cycle_timeout" in kinds, f"missing rem_cycle_timeout; kinds={kinds}"
|
||||
# Timeout still completes with rem_cycle_completed (non-crashing).
|
||||
assert "rem_cycle_completed" in kinds
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 8: exception inside heavy-consolidation is caught, error event emitted
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_rem_cycle_exception_does_not_crash_daemon(tmp_path, monkeypatch):
|
||||
from iai_mcp import dream
|
||||
|
||||
event_log = _EventLog()
|
||||
_install_stubs(
|
||||
monkeypatch, event_log=event_log,
|
||||
heavy_raises=RuntimeError("boom from heavy"),
|
||||
)
|
||||
|
||||
store = object()
|
||||
|
||||
async def runner():
|
||||
# Must NOT raise -- daemon's outer loop relies on this invariant.
|
||||
return await dream.run_rem_cycle(
|
||||
store, 1, 4, "sess-X",
|
||||
is_last=False, claude_enabled=False,
|
||||
)
|
||||
|
||||
result = asyncio.run(runner())
|
||||
|
||||
kinds = event_log.kinds()
|
||||
assert "rem_cycle_error" in kinds, (
|
||||
f"rem_cycle_error must be emitted on exception; got {kinds}"
|
||||
)
|
||||
err_event = next(e for e in event_log.events if e[0] == "rem_cycle_error")
|
||||
assert "boom from heavy" in err_event[1]["error"]
|
||||
# Partial result still returned (no exception propagates).
|
||||
assert "cycle" in result
|
||||
assert result["cycle"] == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 9: literal preservation -- dream.py does not mutate literal_surface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_dream_does_not_mutate_literal_surface():
|
||||
"""C5 static guard. dream.py must contain zero writes to
|
||||
record.literal_surface (read-access is fine but assignment is forbidden)."""
|
||||
dream_src = (
|
||||
Path(__file__).resolve().parent.parent
|
||||
/ "src" / "iai_mcp" / "dream.py"
|
||||
).read_text()
|
||||
pattern = re.compile(r"\.literal_surface\s*=")
|
||||
assert not pattern.search(dream_src), (
|
||||
"C5 violation: dream.py assigns to literal_surface"
|
||||
)
|
||||
59
tests/test_embed.py
Normal file
59
tests/test_embed.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
"""Tests for iai_mcp.embed -- bge-small-en-v1.5 path (legacy model).
|
||||
|
||||
Plan 02-01 made bge-m3 the default. The 3-model registry still exposes
|
||||
bge-small-en-v1.5 (384d, English-only) for English-only deployments. These
|
||||
tests exercise the Phase-1 model explicitly via `Embedder(model_key=...)` so
|
||||
they remain valid regression gates.
|
||||
|
||||
Multilingual behaviour is covered by tests/test_embed_multilingual.py.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
|
||||
def test_embed_returns_384_dim_vector() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
v = emb.embed("hello world")
|
||||
assert len(v) == 384
|
||||
assert all(isinstance(x, float) for x in v)
|
||||
|
||||
|
||||
def test_embed_is_deterministic() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
a = emb.embed("exact same text")
|
||||
b = emb.embed("exact same text")
|
||||
assert a == b
|
||||
|
||||
|
||||
def test_embed_batch_preserves_order_and_dim() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
texts = ["one", "two", "three"]
|
||||
vecs = emb.embed_batch(texts)
|
||||
assert len(vecs) == 3
|
||||
assert all(len(v) == 384 for v in vecs)
|
||||
# Batch must equal sequential calls (determinism across batching path too).
|
||||
assert vecs[0] == emb.embed("one")
|
||||
|
||||
|
||||
def test_embed_empty_string_still_returns_384d() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
v = emb.embed("")
|
||||
assert len(v) == 384
|
||||
|
||||
|
||||
def test_embedder_dim_matches_output() -> None:
|
||||
emb = Embedder(model_key="bge-small-en-v1.5")
|
||||
assert emb.DIM == 384
|
||||
v = emb.embed("anything")
|
||||
assert len(v) == emb.DIM
|
||||
|
||||
|
||||
def test_bge_small_en_still_registered_for_legacy() -> None:
|
||||
"""D-02a keeps the model in the registry for English-only deployments."""
|
||||
from iai_mcp.embed import MODEL_REGISTRY
|
||||
|
||||
assert "bge-small-en-v1.5" in MODEL_REGISTRY
|
||||
assert MODEL_REGISTRY["bge-small-en-v1.5"]["dim"] == 384
|
||||
151
tests/test_embed_multilingual.py
Normal file
151
tests/test_embed_multilingual.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
"""Tests for the multilingual embedder path in the 3-model registry.
|
||||
|
||||
Plan 05-08 (2026-04-20) flipped the DEFAULT to bge-small-en-v1.5 (384d
|
||||
English-only). bge-m3 remains selectable via env var or explicit
|
||||
``Embedder(model_key="bge-m3")`` — these tests pin the key explicitly
|
||||
so the multilingual coverage keeps running under the new default.
|
||||
|
||||
These tests import SentenceTransformer and pull the bge-m3 weights once on
|
||||
first run (HuggingFace cache is re-used thereafter). If bge-m3 is already
|
||||
cached by any previous dev session the test runs in seconds.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
# ------------------------------------------------------------- bge-m3 opt-in
|
||||
|
||||
|
||||
def test_bge_m3_opt_in_produces_1024d() -> None:
|
||||
"""Explicit Embedder(model_key="bge-m3") still yields the multilingual
|
||||
1024d path after Plan 05-08's default revert."""
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
e = Embedder(model_key="bge-m3")
|
||||
assert e.model_key == "bge-m3"
|
||||
assert e.model_name == "BAAI/bge-m3"
|
||||
assert e.DIM == 1024
|
||||
|
||||
|
||||
def test_bge_m3_embeds_english() -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
e = Embedder(model_key="bge-m3")
|
||||
v = e.embed("Hello, how are you?")
|
||||
assert len(v) == 1024
|
||||
# bge-m3 returns normalised vectors (|v| == 1)
|
||||
n = float(np.linalg.norm(np.asarray(v)))
|
||||
assert abs(n - 1.0) < 1e-4
|
||||
|
||||
|
||||
def test_bge_m3_embeds_russian() -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
e = Embedder(model_key="bge-m3")
|
||||
v = e.embed("Привет, как дела?")
|
||||
assert len(v) == 1024
|
||||
n = float(np.linalg.norm(np.asarray(v)))
|
||||
assert abs(n - 1.0) < 1e-4
|
||||
|
||||
|
||||
def test_bge_m3_embeds_japanese() -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
e = Embedder(model_key="bge-m3")
|
||||
v = e.embed("こんにちは、今日は元気ですか?")
|
||||
assert len(v) == 1024
|
||||
n = float(np.linalg.norm(np.asarray(v)))
|
||||
assert abs(n - 1.0) < 1e-4
|
||||
|
||||
|
||||
def test_bge_m3_cross_language_similarity() -> None:
|
||||
"""bge-m3 encodes cross-lingual concepts. Pinned explicitly because
|
||||
Plan 05-08's default is now English-only bge-small."""
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
e = Embedder(model_key="bge-m3")
|
||||
en = np.asarray(e.embed("hello"))
|
||||
ru = np.asarray(e.embed("привет"))
|
||||
cos = float(en @ ru / (np.linalg.norm(en) * np.linalg.norm(ru)))
|
||||
assert cos > 0.5, f"cross-language cosine too low: {cos}"
|
||||
|
||||
|
||||
# ----------------------------------------------------------- env-var selection
|
||||
|
||||
|
||||
def test_embed_model_selectable_via_env(monkeypatch) -> None:
|
||||
"""IAI_MCP_EMBED_MODEL selects from the 3-model registry."""
|
||||
import importlib
|
||||
|
||||
# Clear the process-level cache so re-import exposes the correct default.
|
||||
import iai_mcp.embed as embed_mod
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "bge-small-en-v1.5")
|
||||
importlib.reload(embed_mod)
|
||||
e = embed_mod.Embedder()
|
||||
assert e.model_key == "bge-small-en-v1.5"
|
||||
assert e.DIM == 384
|
||||
|
||||
# Restore default for remaining tests.
|
||||
monkeypatch.delenv("IAI_MCP_EMBED_MODEL", raising=False)
|
||||
importlib.reload(embed_mod)
|
||||
|
||||
|
||||
def test_embed_model_explicit_key_overrides_env(monkeypatch) -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "bge-m3")
|
||||
e = Embedder(model_key="bge-small-en-v1.5")
|
||||
# Explicit key wins over env.
|
||||
assert e.model_key == "bge-small-en-v1.5"
|
||||
assert e.DIM == 384
|
||||
|
||||
|
||||
def test_embed_model_dimension_registered() -> None:
|
||||
"""Registry reports the correct DIM for every entry."""
|
||||
from iai_mcp.embed import MODEL_REGISTRY
|
||||
|
||||
assert MODEL_REGISTRY["bge-m3"]["dim"] == 1024
|
||||
assert MODEL_REGISTRY["multilingual-e5-small"]["dim"] == 384
|
||||
assert MODEL_REGISTRY["bge-small-en-v1.5"]["dim"] == 384
|
||||
|
||||
|
||||
def test_embed_model_rejects_unknown_key() -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
Embedder(model_key="this-model-does-not-exist")
|
||||
|
||||
|
||||
def test_embed_model_rejects_unknown_env(monkeypatch) -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "garbage")
|
||||
with pytest.raises(ValueError):
|
||||
Embedder()
|
||||
|
||||
|
||||
# ------------------------------------------------------- batch + determinism
|
||||
|
||||
|
||||
def test_embed_batch_preserves_order_and_dim() -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
e = Embedder(model_key="bge-m3")
|
||||
texts = ["one", "два", "三"]
|
||||
vecs = e.embed_batch(texts)
|
||||
assert len(vecs) == 3
|
||||
assert all(len(v) == 1024 for v in vecs)
|
||||
|
||||
|
||||
def test_embed_deterministic_same_input() -> None:
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
e = Embedder()
|
||||
a = e.embed("deterministic test")
|
||||
b = e.embed("deterministic test")
|
||||
assert a == b
|
||||
73
tests/test_embed_registry_minilm.py
Normal file
73
tests/test_embed_registry_minilm.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
"""Phase 9.1 — Registry invariant tests for the all-MiniLM-L6-v2 additive entry.
|
||||
|
||||
Locks (additive-only registry expansion) and (source-freeze-modulo-registry)
|
||||
from internal architecture spec Verifies that:
|
||||
- the new MODEL_REGISTRY entry exists with the correct HF id and dimension,
|
||||
- DEFAULT_MODEL_KEY remains bge-small-en-v1.5 (English-Only Brain lock from
|
||||
/ holds),
|
||||
- the 3 pre-existing entries are byte-identical to v3,
|
||||
- the new entry is functionally usable (loads, produces normalized 384d vectors),
|
||||
- production zero-arg Embedder() still resolves to the default.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from iai_mcp.embed import DEFAULT_MODEL_KEY, MODEL_REGISTRY, Embedder
|
||||
|
||||
|
||||
def test_registry_has_minilm_entry() -> None:
|
||||
"""MODEL_REGISTRY contains the additive all-MiniLM-L6-v2 entry."""
|
||||
assert "all-MiniLM-L6-v2" in MODEL_REGISTRY
|
||||
spec = MODEL_REGISTRY["all-MiniLM-L6-v2"]
|
||||
assert spec["hf"] == "sentence-transformers/all-MiniLM-L6-v2"
|
||||
assert spec["dim"] == 384
|
||||
|
||||
|
||||
def test_default_model_key_unchanged() -> None:
|
||||
"""D-02 + English-Only Brain lock: DEFAULT_MODEL_KEY is still bge-small-en-v1.5."""
|
||||
assert DEFAULT_MODEL_KEY == "bge-small-en-v1.5"
|
||||
|
||||
|
||||
def test_registry_has_exactly_four_entries() -> None:
|
||||
"""D-02 + source-freeze-modulo-registry — exactly 1 additive entry vs v3."""
|
||||
expected_keys = {
|
||||
"bge-m3",
|
||||
"multilingual-e5-small",
|
||||
"bge-small-en-v1.5",
|
||||
"all-MiniLM-L6-v2",
|
||||
}
|
||||
assert set(MODEL_REGISTRY.keys()) == expected_keys
|
||||
|
||||
|
||||
def test_existing_entries_byte_identical_to_v3() -> None:
|
||||
"""the 3 pre-existing entries are unchanged from pre-registered-lme500-v3."""
|
||||
assert MODEL_REGISTRY["bge-m3"] == {"hf": "BAAI/bge-m3", "dim": 1024}
|
||||
assert MODEL_REGISTRY["multilingual-e5-small"] == {
|
||||
"hf": "intfloat/multilingual-e5-small",
|
||||
"dim": 384,
|
||||
}
|
||||
assert MODEL_REGISTRY["bge-small-en-v1.5"] == {
|
||||
"hf": "BAAI/bge-small-en-v1.5",
|
||||
"dim": 384,
|
||||
}
|
||||
|
||||
|
||||
def test_minilm_embedder_loads_and_produces_normalized_384d() -> None:
|
||||
"""D-02 functional check: Embedder(model_key='all-MiniLM-L6-v2') is usable."""
|
||||
emb = Embedder(model_key="all-MiniLM-L6-v2")
|
||||
assert emb.model_key == "all-MiniLM-L6-v2"
|
||||
assert emb.DIM == 384
|
||||
assert emb.model_name == "sentence-transformers/all-MiniLM-L6-v2"
|
||||
vec = emb.embed("hello world")
|
||||
assert isinstance(vec, list)
|
||||
assert len(vec) == 384
|
||||
# normalized: L2 norm ≈ 1.0 (within float32 tolerance)
|
||||
l2 = sum(v * v for v in vec) ** 0.5
|
||||
assert abs(l2 - 1.0) < 1e-3, f"vector not normalized: L2={l2}"
|
||||
|
||||
|
||||
def test_default_embedder_still_resolves_to_bge_small() -> None:
|
||||
"""production zero-arg Embedder() still picks bge-small-en-v1.5."""
|
||||
emb = Embedder()
|
||||
assert emb.model_key == "bge-small-en-v1.5"
|
||||
assert emb.DIM == 384
|
||||
assert emb.model_name == "BAAI/bge-small-en-v1.5"
|
||||
176
tests/test_enforce_language_tagged.py
Normal file
176
tests/test_enforce_language_tagged.py
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
"""Tests for enforce_language_tagged (Plan 02-01, constitutional).
|
||||
|
||||
Phase 1's enforce_english_raw gated storage to English-only. amends to
|
||||
native-language storage: every record carries a language tag; the guard
|
||||
function only raises if the tag is missing or auto-detection is low confidence.
|
||||
|
||||
enforce_english_raw is retained as a backward-compat shim for callers.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def _rec(text: str, language: str = "", tags: list[str] | None = None) -> MemoryRecord:
|
||||
"""Build a MemoryRecord with an overridable language tag.
|
||||
|
||||
When language="" we would normally fail __post_init__, but we need to
|
||||
exercise the "missing tag" enforcement path. So we set a placeholder
|
||||
language="XX" when the caller asks for empty and the guard will fail
|
||||
accordingly via its own checks.
|
||||
"""
|
||||
# For tests that probe missing language, pass "XX" (still valid non-empty)
|
||||
# and then zero it out on the record after construction.
|
||||
actual_lang = language if language else "XX"
|
||||
r = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[0.1] * EMBED_DIM,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=datetime.now(timezone.utc),
|
||||
updated_at=datetime.now(timezone.utc),
|
||||
tags=list(tags) if tags else [],
|
||||
language=actual_lang,
|
||||
)
|
||||
if not language:
|
||||
# Post-construction: simulate "record missing language" for the guard.
|
||||
r.language = ""
|
||||
return r
|
||||
|
||||
|
||||
# ---------------------------------------------------- enforce_language_tagged
|
||||
|
||||
|
||||
def test_enforce_language_tagged_accepts_english_with_tag():
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec("hello world", language="en")
|
||||
enforce_language_tagged(r) # should not raise
|
||||
|
||||
|
||||
def test_enforce_language_tagged_accepts_russian_with_tag():
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec("привет мир", language="ru")
|
||||
enforce_language_tagged(r)
|
||||
|
||||
|
||||
def test_enforce_language_tagged_accepts_japanese_with_tag():
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec("こんにちは", language="ja")
|
||||
enforce_language_tagged(r)
|
||||
|
||||
|
||||
def test_enforce_language_tagged_accepts_arabic_with_tag():
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec("مرحبا بالعالم", language="ar")
|
||||
enforce_language_tagged(r)
|
||||
|
||||
|
||||
def test_enforce_language_tagged_rejects_missing_language_no_detect():
|
||||
"""record.language="" without detect=True must raise."""
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec("some text", language="") # simulates un-tagged record
|
||||
with pytest.raises(ValueError) as exc:
|
||||
enforce_language_tagged(r)
|
||||
assert "constitutional" in str(exc.value).lower()
|
||||
|
||||
|
||||
def test_enforce_language_tagged_auto_detect_sets_language():
|
||||
"""When detect=True and language empty, runs langdetect and mutates record."""
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec(
|
||||
"This is a reasonable English sentence with enough words for detection.",
|
||||
language="",
|
||||
)
|
||||
enforce_language_tagged(r, detect=True)
|
||||
assert r.language == "en"
|
||||
|
||||
|
||||
def test_enforce_language_tagged_auto_detect_russian():
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec(
|
||||
"Это осмысленное предложение на русском языке с достаточным количеством слов.",
|
||||
language="",
|
||||
)
|
||||
enforce_language_tagged(r, detect=True)
|
||||
assert r.language == "ru"
|
||||
|
||||
|
||||
def test_enforce_language_tagged_empty_text_gets_default_en():
|
||||
"""Empty literal_surface + detect=True falls through to 'en' default."""
|
||||
from iai_mcp.aaak import enforce_language_tagged
|
||||
|
||||
r = _rec("", language="")
|
||||
enforce_language_tagged(r, detect=True)
|
||||
assert r.language == "en"
|
||||
|
||||
|
||||
# ------------------------------------------------ enforce_english_raw shim
|
||||
|
||||
|
||||
def test_enforce_english_raw_still_importable():
|
||||
"""Backward compat: the Phase-1 guard is still a valid import."""
|
||||
from iai_mcp.aaak import enforce_english_raw
|
||||
|
||||
assert callable(enforce_english_raw)
|
||||
|
||||
|
||||
def test_enforce_english_raw_with_language_tag_still_phase1_semantics():
|
||||
"""The shim preserves semantics: even with language='ru' set,
|
||||
untagged Cyrillic literal_surface WITHOUT 'raw:<lang>' tag still raises.
|
||||
|
||||
callers who want native-language storage should call
|
||||
`enforce_language_tagged` instead of this shim.
|
||||
"""
|
||||
from iai_mcp.aaak import enforce_english_raw
|
||||
|
||||
r = _rec("привет мир", language="ru")
|
||||
with pytest.raises(ValueError):
|
||||
enforce_english_raw(r)
|
||||
|
||||
|
||||
def test_enforce_english_raw_still_blocks_untagged_cyrillic():
|
||||
"""Phase 1 behaviour preserved for untagged records (language="")."""
|
||||
from iai_mcp.aaak import enforce_english_raw
|
||||
|
||||
r = _rec("привет мир", language="")
|
||||
with pytest.raises(ValueError) as exc:
|
||||
enforce_english_raw(r)
|
||||
assert "constitutional" in str(exc.value).lower()
|
||||
|
||||
|
||||
def test_enforce_english_raw_accepts_cyrillic_with_raw_tag():
|
||||
"""Phase-1 raw:<lang> tag exception still works through the shim."""
|
||||
from iai_mcp.aaak import enforce_english_raw
|
||||
|
||||
r = _rec("привет мир", language="", tags=["raw:ru"])
|
||||
enforce_english_raw(r)
|
||||
|
||||
|
||||
def test_enforce_english_raw_accepts_pure_english():
|
||||
from iai_mcp.aaak import enforce_english_raw
|
||||
|
||||
r = _rec("hello world", language="")
|
||||
enforce_english_raw(r)
|
||||
161
tests/test_english_only_default.py
Normal file
161
tests/test_english_only_default.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
"""Plan 05-08 — revert the Phase-2 deviation and restore the
|
||||
PROJECT.md original embedder default: ``bge-small-en-v1.5`` (384d
|
||||
English-only). bge-m3 (1024d multilingual) remains opt-in via the
|
||||
``IAI_MCP_EMBED_MODEL`` env var or the ``model_key`` kwarg on Embedder.
|
||||
|
||||
Phase 9.1 (2026-04-29): MODEL_REGISTRY grew by ONE additive entry
|
||||
for ``all-MiniLM-L6-v2`` (legacy alternative embedder; bench-only ablation).
|
||||
DEFAULT_MODEL_KEY remains ``bge-small-en-v1.5``; production callers
|
||||
unaffected. The "registry retains all original entries" contract here is
|
||||
relaxed to "registry retains all original entries + at most 1 additive
|
||||
entry per the source-freeze-modulo-registry invariant".
|
||||
|
||||
Covered contracts (9 tests):
|
||||
|
||||
1. DEFAULT_MODEL_KEY is "bge-small-en-v1.5"
|
||||
2. Embedder() with no args builds the 384d bge-small embedder
|
||||
3. DEFAULT_EMBED_DIM (and legacy EMBED_DIM alias) is 384
|
||||
4. MODEL_REGISTRY retains the original 3 entries; D-02
|
||||
allows the additive all-MiniLM-L6-v2 entry without breaking the
|
||||
English-Only Brain lock
|
||||
5. IAI_MCP_EMBED_MODEL=bge-m3 env var still selects bge-m3
|
||||
6. embedder_for_store on a 1024d store returns bge-m3 (back-compat)
|
||||
7. embedder_for_store on a 384d store returns bge-small-en-v1.5
|
||||
8. PROJECT.md line 125 still mentions bge-small-en-v1.5 (constraint)
|
||||
9. importing the package does NOT auto-download bge-m3 weights
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_env(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Every test starts without an IAI_MCP_EMBED_MODEL override."""
|
||||
monkeypatch.delenv("IAI_MCP_EMBED_MODEL", raising=False)
|
||||
yield
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- tests
|
||||
|
||||
|
||||
def test_default_model_key_is_bge_small():
|
||||
from iai_mcp.embed import DEFAULT_MODEL_KEY
|
||||
|
||||
assert DEFAULT_MODEL_KEY == "bge-small-en-v1.5"
|
||||
|
||||
|
||||
def test_embedder_defaults_to_384d_small():
|
||||
from iai_mcp.embed import Embedder
|
||||
|
||||
assert Embedder.DEFAULT_MODEL_KEY == "bge-small-en-v1.5"
|
||||
assert Embedder.DEFAULT_DIM == 384
|
||||
assert Embedder.DIM == 384
|
||||
|
||||
|
||||
def test_types_embed_dim_defaults_to_384():
|
||||
from iai_mcp.types import DEFAULT_EMBED_DIM, EMBED_DIM
|
||||
|
||||
assert DEFAULT_EMBED_DIM == 384
|
||||
assert EMBED_DIM == 384
|
||||
|
||||
|
||||
def test_model_registry_retains_original_three_entries():
|
||||
"""The 3 original entries must remain unchanged. D-02
|
||||
allows additive entries (currently: all-MiniLM-L6-v2) but the original
|
||||
contract — bge-m3 / multilingual-e5-small / bge-small-en-v1.5 with their
|
||||
canonical dims — is non-negotiable."""
|
||||
from iai_mcp.embed import MODEL_REGISTRY
|
||||
|
||||
# Original 3 entries must be present and byte-identical to Plan 05-08.
|
||||
assert "bge-m3" in MODEL_REGISTRY
|
||||
assert "multilingual-e5-small" in MODEL_REGISTRY
|
||||
assert "bge-small-en-v1.5" in MODEL_REGISTRY
|
||||
assert MODEL_REGISTRY["bge-m3"] == {"hf": "BAAI/bge-m3", "dim": 1024}
|
||||
assert MODEL_REGISTRY["bge-small-en-v1.5"] == {
|
||||
"hf": "BAAI/bge-small-en-v1.5",
|
||||
"dim": 384,
|
||||
}
|
||||
assert MODEL_REGISTRY["multilingual-e5-small"] == {
|
||||
"hf": "intfloat/multilingual-e5-small",
|
||||
"dim": 384,
|
||||
}
|
||||
# additive entries are allowed, but the original 3 must
|
||||
# never be removed or mutated. Guard explicitly against pruning.
|
||||
assert {"bge-m3", "multilingual-e5-small", "bge-small-en-v1.5"}.issubset(
|
||||
set(MODEL_REGISTRY)
|
||||
)
|
||||
|
||||
|
||||
def test_env_var_still_selects_bge_m3(monkeypatch):
|
||||
monkeypatch.setenv("IAI_MCP_EMBED_MODEL", "bge-m3")
|
||||
from iai_mcp.embed import _resolve_model_key
|
||||
|
||||
assert _resolve_model_key() == "bge-m3"
|
||||
|
||||
|
||||
def test_embedder_for_store_picks_bge_m3_for_1024d_store():
|
||||
"""Back-compat: existing 1024d user stores keep working after the
|
||||
default flip. The factory routes around the flip transparently."""
|
||||
from iai_mcp.embed import embedder_for_store
|
||||
|
||||
store = SimpleNamespace(embed_dim=1024)
|
||||
with mock.patch("iai_mcp.embed._get_model") as mock_get:
|
||||
mock_get.return_value = mock.MagicMock()
|
||||
e = embedder_for_store(store)
|
||||
assert e.model_key == "bge-m3"
|
||||
assert e.DIM == 1024
|
||||
|
||||
|
||||
def test_embedder_for_store_picks_bge_small_for_384d_store():
|
||||
from iai_mcp.embed import embedder_for_store
|
||||
|
||||
store = SimpleNamespace(embed_dim=384)
|
||||
with mock.patch("iai_mcp.embed._get_model") as mock_get:
|
||||
mock_get.return_value = mock.MagicMock()
|
||||
e = embedder_for_store(store)
|
||||
assert e.model_key == "bge-small-en-v1.5"
|
||||
assert e.DIM == 384
|
||||
|
||||
|
||||
def test_project_md_still_pins_bge_small_constraint():
|
||||
"""PROJECT.md line 125 was the source of truth all along. This plan
|
||||
merely reverts the Phase-2 deviation. Asserting the file content
|
||||
here guards against someone silently flipping the spec in the future."""
|
||||
p = Path(__file__).resolve().parents[1] / ".planning" / "PROJECT.md"
|
||||
if not p.exists():
|
||||
pytest.skip(".planning is gitignored; PROJECT.md not present in this checkout")
|
||||
content = p.read_text()
|
||||
assert "bge-small-en-v1.5" in content
|
||||
assert "384d embeddings" in content or "384d" in content
|
||||
|
||||
|
||||
def test_package_import_does_not_auto_download_models():
|
||||
"""Importing iai_mcp must not trigger a SentenceTransformer download
|
||||
for ANY model. The weights pull should happen lazily on first
|
||||
Embedder() instantiation, not at import time. Otherwise a fresh
|
||||
install spends minutes pulling bge-m3 before the user has even
|
||||
decided which model they want."""
|
||||
import sys
|
||||
|
||||
# Pretend sentence_transformers is absent so any early reference to
|
||||
# SentenceTransformer() would raise. If the import path is clean, this
|
||||
# should succeed even without the package loaded.
|
||||
with mock.patch.dict(sys.modules):
|
||||
# Drop cached iai_mcp modules so the import actually re-runs.
|
||||
for name in list(sys.modules):
|
||||
if name.startswith("iai_mcp"):
|
||||
sys.modules.pop(name, None)
|
||||
# Track SentenceTransformer construction attempts.
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
with mock.patch.object(
|
||||
SentenceTransformer, "__init__",
|
||||
side_effect=AssertionError("model instantiated at import time"),
|
||||
):
|
||||
import iai_mcp.embed # noqa: F401
|
||||
import iai_mcp.types # noqa: F401
|
||||
187
tests/test_events.py
Normal file
187
tests/test_events.py
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
"""Tests for the events LanceDB table + events.py module (Plan 02-01, D-STORAGE).
|
||||
|
||||
Covers:
|
||||
- events table created on MemoryStore instantiation
|
||||
- write_event / query_events round-trip
|
||||
- kind/severity/since filters
|
||||
- ordering (newest first)
|
||||
- limit default + explicit
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ----------------------------------------------------------- table creation
|
||||
|
||||
|
||||
def test_events_table_created_on_store_init(tmp_path):
|
||||
"""MemoryStore() creates events table with the D-STORAGE schema."""
|
||||
from iai_mcp.store import EVENTS_TABLE, MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
assert EVENTS_TABLE in store._table_names()
|
||||
|
||||
|
||||
def test_budget_ledger_table_created(tmp_path):
|
||||
from iai_mcp.store import BUDGET_TABLE, MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
assert BUDGET_TABLE in store._table_names()
|
||||
|
||||
|
||||
def test_ratelimit_ledger_table_created(tmp_path):
|
||||
from iai_mcp.store import MemoryStore, RATELIMIT_TABLE
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
assert RATELIMIT_TABLE in store._table_names()
|
||||
|
||||
|
||||
# ------------------------------------------------------ write_event / query
|
||||
|
||||
|
||||
def test_events_write_and_query_roundtrip(tmp_path):
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
event_id = write_event(store, kind="test", data={"x": 1}, session_id="s1")
|
||||
assert isinstance(event_id, UUID)
|
||||
|
||||
results = query_events(store, kind="test")
|
||||
assert len(results) == 1
|
||||
assert results[0]["kind"] == "test"
|
||||
assert results[0]["data"]["x"] == 1
|
||||
assert results[0]["session_id"] == "s1"
|
||||
|
||||
|
||||
def test_events_write_returns_uuid(tmp_path):
|
||||
from iai_mcp.events import write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
ev = write_event(store, kind="k", data={})
|
||||
assert isinstance(ev, UUID)
|
||||
|
||||
|
||||
def test_events_query_filter_kind(tmp_path):
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(store, kind="a", data={})
|
||||
write_event(store, kind="b", data={})
|
||||
write_event(store, kind="c", data={})
|
||||
|
||||
assert len(query_events(store, kind="a")) == 1
|
||||
assert len(query_events(store, kind="b")) == 1
|
||||
assert len(query_events(store)) == 3
|
||||
|
||||
|
||||
def test_events_query_filter_since(tmp_path, monkeypatch):
|
||||
"""Events at different timestamps; since=30min-ago returns only the newer."""
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
# We can't easily freeze time; instead write both events, then query with
|
||||
# since = far-future-past to confirm filter works (both return).
|
||||
write_event(store, kind="t", data={"old": True})
|
||||
write_event(store, kind="t", data={"new": True})
|
||||
|
||||
# since in the future -> no results
|
||||
future = datetime.now(timezone.utc) + timedelta(hours=1)
|
||||
assert query_events(store, kind="t", since=future) == []
|
||||
|
||||
# since well in the past -> 2 results
|
||||
past = datetime.now(timezone.utc) - timedelta(hours=1)
|
||||
assert len(query_events(store, kind="t", since=past)) == 2
|
||||
|
||||
|
||||
def test_events_query_filter_severity(tmp_path):
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(store, kind="k", data={}, severity="info")
|
||||
write_event(store, kind="k", data={}, severity="warning")
|
||||
write_event(store, kind="k", data={}, severity="critical")
|
||||
|
||||
assert len(query_events(store, severity="critical")) == 1
|
||||
assert len(query_events(store, severity="warning")) == 1
|
||||
assert len(query_events(store, severity="info")) == 1
|
||||
|
||||
|
||||
def test_events_query_limit_default_100(tmp_path):
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
for i in range(150):
|
||||
write_event(store, kind="bulk", data={"i": i})
|
||||
|
||||
# Default limit
|
||||
results = query_events(store, kind="bulk")
|
||||
assert len(results) == 100
|
||||
|
||||
# Explicit limit
|
||||
results = query_events(store, kind="bulk", limit=50)
|
||||
assert len(results) == 50
|
||||
|
||||
|
||||
def test_events_query_ordering_newest_first(tmp_path):
|
||||
"""Events must come back in descending ts order (newest first)."""
|
||||
import time
|
||||
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(store, kind="ord", data={"i": 0})
|
||||
time.sleep(0.01)
|
||||
write_event(store, kind="ord", data={"i": 1})
|
||||
time.sleep(0.01)
|
||||
write_event(store, kind="ord", data={"i": 2})
|
||||
|
||||
results = query_events(store, kind="ord")
|
||||
# Newest (i=2) first
|
||||
ordered_is = [r["data"]["i"] for r in results]
|
||||
assert ordered_is == [2, 1, 0]
|
||||
|
||||
|
||||
def test_events_source_ids_roundtrip(tmp_path):
|
||||
"""source_ids list[UUID] is preserved as JSON array of strings."""
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
ids = [uuid4(), uuid4()]
|
||||
write_event(store, kind="s", data={}, source_ids=ids)
|
||||
results = query_events(store, kind="s")
|
||||
assert len(results) == 1
|
||||
src = results[0]["source_ids"]
|
||||
assert set(src) == {str(i) for i in ids}
|
||||
|
||||
|
||||
def test_events_domain_roundtrip(tmp_path):
|
||||
from iai_mcp.events import query_events, write_event
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
write_event(store, kind="k", data={}, domain="coding")
|
||||
results = query_events(store, kind="k")
|
||||
assert len(results) == 1
|
||||
assert results[0]["domain"] == "coding"
|
||||
|
||||
|
||||
def test_events_empty_store_returns_empty(tmp_path):
|
||||
from iai_mcp.events import query_events
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
assert query_events(store) == []
|
||||
assert query_events(store, kind="nothing") == []
|
||||
116
tests/test_first_turn_pending_drain.py
Normal file
116
tests/test_first_turn_pending_drain.py
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
"""Phase 07.2-02 R3 unit tests for prune_first_turn_pending pure helper.
|
||||
|
||||
Distinct from tests/test_daemon_state.py::test_prune_* which covers the
|
||||
24h-default `prune_stale_first_turn`. This file covers the new 1h-default
|
||||
`prune_first_turn_pending` (tuple return + dropped session_ids list).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from iai_mcp.daemon_state import (
|
||||
FIRST_TURN_PENDING_TTL_SEC_DEFAULT,
|
||||
prune_first_turn_pending,
|
||||
)
|
||||
|
||||
NOW = datetime(2026, 4, 27, 12, 0, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def test_default_ttl_is_3600_seconds() -> None:
|
||||
"""D7.2-08: default TTL is 3600s (1h)."""
|
||||
assert FIRST_TURN_PENDING_TTL_SEC_DEFAULT == 3600.0
|
||||
|
||||
|
||||
def test_keeps_fresh_evicts_stale_returns_dropped_ids() -> None:
|
||||
"""Mixed input: some entries < ttl_sec, some > ttl_sec."""
|
||||
fresh_ts = (NOW - timedelta(seconds=1800)).isoformat() # 30min — keep
|
||||
stale_ts = (NOW - timedelta(seconds=7200)).isoformat() # 2h — evict
|
||||
state = {
|
||||
"first_turn_pending": {
|
||||
"sess-fresh": fresh_ts,
|
||||
"sess-stale": stale_ts,
|
||||
},
|
||||
}
|
||||
|
||||
new_state, dropped = prune_first_turn_pending(state, now=NOW, ttl_sec=3600.0)
|
||||
|
||||
assert new_state["first_turn_pending"] == {"sess-fresh": fresh_ts}
|
||||
assert dropped == ["sess-stale"]
|
||||
|
||||
|
||||
def test_legacy_bool_entries_evict_with_no_timestamp() -> None:
|
||||
"""D7.2-07 contract: non-string values treated as stale."""
|
||||
state = {
|
||||
"first_turn_pending": {"sess-1": True, "sess-2": False, "sess-3": None},
|
||||
}
|
||||
|
||||
new_state, dropped = prune_first_turn_pending(state, now=NOW)
|
||||
|
||||
assert new_state["first_turn_pending"] == {}
|
||||
assert sorted(dropped) == ["sess-1", "sess-2", "sess-3"]
|
||||
|
||||
|
||||
def test_malformed_iso_string_evicts() -> None:
|
||||
"""Defensive: corrupt ISO strings evict rather than crash."""
|
||||
state = {
|
||||
"first_turn_pending": {
|
||||
"sess-bad": "not-an-iso-string-2026-99-99",
|
||||
"sess-good": (NOW - timedelta(seconds=60)).isoformat(),
|
||||
},
|
||||
}
|
||||
|
||||
new_state, dropped = prune_first_turn_pending(state, now=NOW)
|
||||
|
||||
assert "sess-bad" in dropped
|
||||
assert "sess-good" in new_state["first_turn_pending"]
|
||||
|
||||
|
||||
def test_naive_timestamps_treated_as_utc() -> None:
|
||||
"""Naive ISO strings (no tzinfo) get assumed UTC at parse time."""
|
||||
# A naive ISO string for "2 hours ago" — must evict at 1h TTL.
|
||||
naive_stale = (NOW - timedelta(seconds=7200)).replace(tzinfo=None).isoformat()
|
||||
state = {"first_turn_pending": {"sess-naive": naive_stale}}
|
||||
|
||||
new_state, dropped = prune_first_turn_pending(state, now=NOW, ttl_sec=3600.0)
|
||||
|
||||
assert dropped == ["sess-naive"]
|
||||
assert new_state["first_turn_pending"] == {}
|
||||
|
||||
|
||||
def test_empty_or_missing_pending_returns_no_drops() -> None:
|
||||
"""Idempotent on empty/missing first_turn_pending key."""
|
||||
# Missing key.
|
||||
new_state, dropped = prune_first_turn_pending({}, now=NOW)
|
||||
assert new_state == {"first_turn_pending": {}} or new_state == {}
|
||||
# Implementation contract: when the key is missing, return state
|
||||
# unchanged (we set "first_turn_pending" only when there was a dict
|
||||
# to prune). Both shapes are acceptable; the important property is
|
||||
# `dropped == []`.
|
||||
assert dropped == []
|
||||
|
||||
# Present-but-empty dict.
|
||||
new_state2, dropped2 = prune_first_turn_pending(
|
||||
{"first_turn_pending": {}}, now=NOW,
|
||||
)
|
||||
assert dropped2 == []
|
||||
assert new_state2["first_turn_pending"] == {}
|
||||
|
||||
# Present-but-None.
|
||||
new_state3, dropped3 = prune_first_turn_pending(
|
||||
{"first_turn_pending": None}, now=NOW,
|
||||
)
|
||||
assert dropped3 == []
|
||||
|
||||
|
||||
def test_does_not_mutate_state_outside_first_turn_pending() -> None:
|
||||
"""Pure function discipline: only first_turn_pending should change."""
|
||||
unrelated = {"unrelated_key": "unrelated_value", "fsm_state": "WAKE"}
|
||||
state = dict(unrelated)
|
||||
state["first_turn_pending"] = {
|
||||
"sess-stale": (NOW - timedelta(hours=2)).isoformat(),
|
||||
}
|
||||
|
||||
new_state, _ = prune_first_turn_pending(state, now=NOW)
|
||||
|
||||
for k, v in unrelated.items():
|
||||
assert new_state.get(k) == v
|
||||
146
tests/test_first_turn_pending_drain_wireup.py
Normal file
146
tests/test_first_turn_pending_drain_wireup.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Phase 07.2-04 R3 / A3 integration test — startup + per-tick TTL drain wired into daemon.
|
||||
|
||||
Strategy: Plan 04 Task 1 threads an explicit `now=datetime.now(timezone.utc)`
|
||||
kwarg from BOTH wire-in call sites into `prune_first_turn_pending`. This
|
||||
means the helper is fully testable by passing a fixed `NOW` directly —
|
||||
no datetime monkeypatching dance.
|
||||
|
||||
Three checks:
|
||||
1. Direct helper invocation with mixed stale/fresh state proves the
|
||||
eviction contract (5 stale evict, 5 fresh keep, dropped IDs returned).
|
||||
2. Smoke import confirms the names daemon.py imports are reachable.
|
||||
3. Source-grep on daemon.py confirms both wire-in sites pass the explicit
|
||||
`now=` kwarg (Task 1's structural contract).
|
||||
|
||||
Project async-test idiom (mandatory): sync `def test_*`. No
|
||||
`@pytest.mark.asyncio`. The helper itself is sync, so all tests here
|
||||
are plain sync `def test_*` with no `asyncio.run` needed.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
NOW = datetime(2026, 4, 27, 12, 0, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def _make_mixed_state() -> dict:
|
||||
"""Return a state dict with 5 stale + 5 fresh first_turn_pending entries.
|
||||
|
||||
Stale = 2 h old (well past the 1 h TTL).
|
||||
Fresh = 30 s old (well within the TTL).
|
||||
Both timestamps are RELATIVE TO `NOW` so the test is deterministic
|
||||
regardless of when it runs — `prune_first_turn_pending` only sees the
|
||||
explicit `now` we pass in.
|
||||
"""
|
||||
stale_entries = {
|
||||
f"sess-stale-{i}": (NOW - timedelta(hours=2)).isoformat()
|
||||
for i in range(5)
|
||||
}
|
||||
fresh_entries = {
|
||||
f"sess-fresh-{i}": (NOW - timedelta(seconds=30)).isoformat()
|
||||
for i in range(5)
|
||||
}
|
||||
return {
|
||||
"fsm_state": "WAKE",
|
||||
"first_turn_pending": {**stale_entries, **fresh_entries},
|
||||
}
|
||||
|
||||
|
||||
def test_prune_helper_drops_5_stale_keeps_5_fresh_with_fixed_now():
|
||||
"""A3 acceptance (helper contract): with NOW fixed and 5 stale + 5 fresh
|
||||
entries, the helper returns 5 dropped IDs and a state holding only the
|
||||
fresh entries. This is exactly the contract Plan 04's wire-in invokes
|
||||
at startup and per-tick.
|
||||
"""
|
||||
from iai_mcp.daemon_state import (
|
||||
FIRST_TURN_PENDING_TTL_SEC_DEFAULT,
|
||||
prune_first_turn_pending,
|
||||
)
|
||||
|
||||
state = _make_mixed_state()
|
||||
# Plan 04 Task 1 calls this with the EXACT signature shown below at
|
||||
# both wire-in sites. The test mirrors the wire-in call shape so any
|
||||
# future signature drift breaks BOTH sides at once.
|
||||
new_state, dropped = prune_first_turn_pending(state, now=NOW)
|
||||
|
||||
# 5 stale IDs evict.
|
||||
assert sorted(dropped) == sorted(f"sess-stale-{i}" for i in range(5)), (
|
||||
f"Expected exactly 5 stale session_ids dropped; got {dropped}"
|
||||
)
|
||||
# 5 fresh IDs survive.
|
||||
kept = new_state["first_turn_pending"]
|
||||
assert len(kept) == 5
|
||||
for k in kept:
|
||||
assert k.startswith("sess-fresh-"), f"unexpected key kept: {k}"
|
||||
# Helper exposes the TTL constant Plan 04 wire-in uses for the event
|
||||
# payload — sanity-check it has the documented value (1 h).
|
||||
assert FIRST_TURN_PENDING_TTL_SEC_DEFAULT == 3600.0
|
||||
|
||||
|
||||
def test_prune_helper_no_drop_when_only_fresh_entries():
|
||||
"""Control: NOW fixed and only fresh entries → 0 dropped, 5 kept,
|
||||
state.first_turn_pending unchanged in shape."""
|
||||
from iai_mcp.daemon_state import prune_first_turn_pending
|
||||
|
||||
state = {
|
||||
"fsm_state": "WAKE",
|
||||
"first_turn_pending": {
|
||||
f"sess-fresh-{i}": (NOW - timedelta(seconds=30)).isoformat()
|
||||
for i in range(5)
|
||||
},
|
||||
}
|
||||
new_state, dropped = prune_first_turn_pending(state, now=NOW)
|
||||
|
||||
assert dropped == [], f"Expected zero drops on all-fresh state; got {dropped}"
|
||||
assert len(new_state["first_turn_pending"]) == 5
|
||||
|
||||
|
||||
def test_first_turn_pending_drain_helper_imported_in_daemon_main():
|
||||
"""Smoke: daemon.main() can import the helper without error.
|
||||
|
||||
If Plan 04's import block is wrong (typo, wrong module, etc.), this
|
||||
fails fast.
|
||||
"""
|
||||
from iai_mcp.daemon_state import (
|
||||
FIRST_TURN_PENDING_TTL_SEC_DEFAULT,
|
||||
prune_first_turn_pending,
|
||||
)
|
||||
assert FIRST_TURN_PENDING_TTL_SEC_DEFAULT == 3600.0
|
||||
assert callable(prune_first_turn_pending)
|
||||
|
||||
|
||||
def test_daemon_wire_in_passes_explicit_now_kwarg_at_both_sites():
|
||||
"""Structural check: read daemon.py source and confirm BOTH wire-in
|
||||
sites pass `now=datetime.now(timezone.utc)` explicitly.
|
||||
|
||||
This is the wire-up half of A3 — without it, Task 2 only proves the
|
||||
helper works, not that Task 1 wired it in correctly. Plan 04 Task 1's
|
||||
contract is that BOTH call sites thread `now=` explicitly so the
|
||||
helper is testable without datetime mocking.
|
||||
"""
|
||||
daemon_src = Path(__file__).resolve().parent.parent / "src" / "iai_mcp" / "daemon.py"
|
||||
text = daemon_src.read_text()
|
||||
|
||||
# Match `prune_first_turn_pending(\n state, now=datetime.now(timezone.utc)`
|
||||
# tolerantly across whitespace + line breaks.
|
||||
pat = re.compile(
|
||||
r"prune_first_turn_pending\s*\(\s*state\s*,\s*now\s*=\s*datetime\.now\(\s*timezone\.utc\s*\)",
|
||||
re.MULTILINE,
|
||||
)
|
||||
matches = pat.findall(text)
|
||||
assert len(matches) >= 2, (
|
||||
f"Expected >= 2 wire-in sites with explicit `now=datetime.now(timezone.utc)` "
|
||||
f"kwarg in daemon.py; found {len(matches)}. Plan 04 Task 1 contract:"
|
||||
f" both startup-prune (in main()) and tick-prune (in _tick_body Step 0.5)"
|
||||
f" must thread `now=` explicitly."
|
||||
)
|
||||
|
||||
# Both event-emit phases ("startup" and "tick") must be present.
|
||||
assert '"phase": "startup"' in text or "'phase': 'startup'" in text, (
|
||||
"Startup-side event emit missing `phase: startup` in payload."
|
||||
)
|
||||
assert '"phase": "tick"' in text or "'phase': 'tick'" in text, (
|
||||
"Tick-side event emit missing `phase: tick` in payload."
|
||||
)
|
||||
192
tests/test_first_turn_recall.py
Normal file
192
tests/test_first_turn_recall.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
"""Phase 5 RED-state test scaffold. Tasks 2-5 turn these GREEN.
|
||||
|
||||
Covers TOK-12 / D5-03: first-turn auto-recall hook in core.dispatch that fires
|
||||
exactly once per session and injects a scoped recall into the response.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import core
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
def _seed_one_record(store: MemoryStore, text: str = "reference content") -> None:
|
||||
now = datetime.now(timezone.utc)
|
||||
rec = MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="semantic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[0.1] * EMBED_DIM,
|
||||
community_id=None,
|
||||
centrality=0.5,
|
||||
detail_level=3,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[],
|
||||
language="en",
|
||||
)
|
||||
store.insert(rec)
|
||||
|
||||
|
||||
def test_first_turn_fires_exactly_once(tmp_path, monkeypatch):
|
||||
"""D5-03: first dispatch injects first_turn_recall; second dispatch does not."""
|
||||
# Patch daemon_state to emulate first-turn-pending for session s1 exactly once.
|
||||
pending = {"s1": True}
|
||||
|
||||
def _load_state():
|
||||
return {"first_turn_pending": dict(pending)}
|
||||
|
||||
def _save_state(state):
|
||||
# Update the outer dict state per what the test sets.
|
||||
fresh = state.get("first_turn_pending", {})
|
||||
pending.clear()
|
||||
pending.update(fresh)
|
||||
|
||||
monkeypatch.setattr("iai_mcp.daemon_state.load_state", _load_state)
|
||||
monkeypatch.setattr("iai_mcp.daemon_state.save_state", _save_state)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_one_record(store, "session one reference content")
|
||||
|
||||
params = {
|
||||
"cue": "reference content",
|
||||
"session_id": "s1",
|
||||
"cue_embedding": [0.1] * EMBED_DIM,
|
||||
}
|
||||
resp1 = core.dispatch(store, "memory_recall", params)
|
||||
resp2 = core.dispatch(store, "memory_recall", params)
|
||||
|
||||
assert "first_turn_recall" in resp1, f"first dispatch missing hook: {resp1.keys()}"
|
||||
assert "first_turn_recall" not in resp2, (
|
||||
f"second dispatch should NOT have hook: {resp2.keys()}"
|
||||
)
|
||||
|
||||
|
||||
def test_first_turn_budget_capped_at_400(tmp_path, monkeypatch):
|
||||
"""D5-03: first_turn_recall budget_tokens ≤ 400."""
|
||||
pending = {"s2": True}
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.daemon_state.load_state",
|
||||
lambda: {"first_turn_pending": dict(pending)},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.daemon_state.save_state",
|
||||
lambda s: pending.clear(),
|
||||
)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_one_record(store)
|
||||
|
||||
resp = core.dispatch(store, "memory_recall", {
|
||||
"cue": "X",
|
||||
"session_id": "s2",
|
||||
"cue_embedding": [0.1] * EMBED_DIM,
|
||||
})
|
||||
ftr = resp.get("first_turn_recall")
|
||||
assert ftr is not None, f"first_turn_recall missing: {resp.keys()}"
|
||||
assert ftr.get("budget_tokens", 0) <= 400, f"budget too high: {ftr}"
|
||||
|
||||
|
||||
def test_daemon_unreachable_falls_back_silently(tmp_path, monkeypatch):
|
||||
"""D5-03 silent-fail: daemon_state read error must not break dispatch."""
|
||||
def _boom():
|
||||
raise RuntimeError("synthetic daemon_state failure")
|
||||
|
||||
monkeypatch.setattr("iai_mcp.daemon_state.load_state", _boom)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_one_record(store)
|
||||
|
||||
# Must not raise.
|
||||
resp = core.dispatch(store, "memory_recall", {
|
||||
"cue": "X",
|
||||
"session_id": "s3",
|
||||
"cue_embedding": [0.1] * EMBED_DIM,
|
||||
})
|
||||
# Normal response shape preserved; first_turn_recall absent.
|
||||
assert "hits" in resp
|
||||
assert "first_turn_recall" not in resp
|
||||
|
||||
|
||||
def test_first_turn_emits_event(tmp_path, monkeypatch):
|
||||
"""D5-03: first_turn hook writes kind=first_turn_recall event."""
|
||||
from iai_mcp.events import query_events
|
||||
|
||||
pending = {"s4": True}
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.daemon_state.load_state",
|
||||
lambda: {"first_turn_pending": dict(pending)},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.daemon_state.save_state",
|
||||
lambda s: pending.clear(),
|
||||
)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_one_record(store)
|
||||
|
||||
core.dispatch(store, "memory_recall", {
|
||||
"cue": "something",
|
||||
"session_id": "s4",
|
||||
"cue_embedding": [0.1] * EMBED_DIM,
|
||||
})
|
||||
|
||||
events = query_events(store, kind="first_turn_recall", limit=10)
|
||||
assert len(events) >= 1, "first_turn_recall event should have been emitted"
|
||||
|
||||
|
||||
def test_input_length_clamp_2000(tmp_path, monkeypatch):
|
||||
"""V5 security: first-turn cue clamped to 2000 chars before recall."""
|
||||
pending = {"s5": True}
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.daemon_state.load_state",
|
||||
lambda: {"first_turn_pending": dict(pending)},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"iai_mcp.daemon_state.save_state",
|
||||
lambda s: pending.clear(),
|
||||
)
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_one_record(store)
|
||||
|
||||
# Huge cue — should be clamped by the hook.
|
||||
huge_cue = "X" * 5000
|
||||
|
||||
# Wrap retrieve.recall to capture the cue_text arg.
|
||||
seen_cues: list[str] = []
|
||||
from iai_mcp import retrieve as _retrieve
|
||||
orig = _retrieve.recall
|
||||
|
||||
def _spy(*args, **kwargs):
|
||||
cue = kwargs.get("cue_text", "")
|
||||
if "first-turn" not in cue[:20]: # avoid capturing the outer dispatch
|
||||
seen_cues.append(cue)
|
||||
return orig(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr("iai_mcp.retrieve.recall", _spy)
|
||||
|
||||
core.dispatch(store, "memory_recall", {
|
||||
"cue": huge_cue,
|
||||
"session_id": "s5",
|
||||
"cue_embedding": [0.1] * EMBED_DIM,
|
||||
})
|
||||
|
||||
# The hook must have called recall with a clamped cue — any cue longer than
|
||||
# 2000 chars indicates the clamp failed.
|
||||
assert any(len(c) <= 2000 for c in seen_cues), (
|
||||
f"no clamped cue observed; len spread: {[len(c) for c in seen_cues]}"
|
||||
)
|
||||
105
tests/test_formality_scorer.py
Normal file
105
tests/test_formality_scorer.py
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
"""Plan 03-03 Task 1 RED + Task 2 GREEN — surface-feature formality scorer.
|
||||
|
||||
Validates the formality scorer against a RU+EN fixture of ~50 formal/informal pairs.
|
||||
Constitutional guard: the scorer observes ONLY the user's surface text. There is no
|
||||
user-internal-state signal anywhere in this test or in the module it tests.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "fixtures" / "formality_ru_en_50pairs.json"
|
||||
|
||||
|
||||
def _load_fixture():
|
||||
with FIXTURE_PATH.open() as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
# ------------------------------------------------------------- fixture integrity
|
||||
def test_fixture_loads_and_has_enough_pairs():
|
||||
pairs = _load_fixture()
|
||||
assert len(pairs) >= 45, f"expected ~50 pairs, got {len(pairs)}"
|
||||
langs = {p["lang"] for p in pairs}
|
||||
assert "en" in langs and "ru" in langs
|
||||
|
||||
|
||||
def test_fixture_shape():
|
||||
pairs = _load_fixture()
|
||||
for p in pairs:
|
||||
assert set(p.keys()) >= {"id", "lang", "formal", "informal"}
|
||||
assert isinstance(p["formal"], str) and p["formal"].strip()
|
||||
assert isinstance(p["informal"], str) and p["informal"].strip()
|
||||
|
||||
|
||||
# ------------------------------------------------------------- scorer contract
|
||||
def test_formality_score_fixture_accuracy_at_least_85_percent():
|
||||
"""Formal text must score > informal text on >= 85% of pairs."""
|
||||
from iai_mcp.formality import formality_score
|
||||
|
||||
pairs = _load_fixture()
|
||||
wins = sum(
|
||||
1
|
||||
for p in pairs
|
||||
if formality_score(p["formal"], p["lang"]) > formality_score(p["informal"], p["lang"])
|
||||
)
|
||||
accuracy = wins / len(pairs)
|
||||
assert accuracy >= 0.85, f"accuracy {accuracy:.2%} ({wins}/{len(pairs)}) below 85% floor"
|
||||
|
||||
|
||||
def test_formality_score_en_formal_anchor():
|
||||
from iai_mcp.formality import formality_score
|
||||
|
||||
score = formality_score("The proposal is, therefore, accepted.", "en")
|
||||
assert score >= 0.6, f"expected highly formal sentence >= 0.6, got {score:.3f}"
|
||||
|
||||
|
||||
def test_formality_score_en_informal_anchor():
|
||||
from iai_mcp.formality import formality_score
|
||||
|
||||
score = formality_score("yo, works for me lol", "en")
|
||||
assert score <= 0.3, f"expected clearly informal <= 0.3, got {score:.3f}"
|
||||
|
||||
|
||||
def test_formality_score_unknown_lang_returns_neutral_with_warning():
|
||||
"""MEMORY.md global-product mandate: unknown lang degrades gracefully."""
|
||||
from iai_mcp.formality import formality_score
|
||||
|
||||
with warnings.catch_warnings(record=True) as w_list:
|
||||
warnings.simplefilter("always")
|
||||
score = formality_score("some test text", "zz")
|
||||
assert score == 0.5
|
||||
# A warning must have been issued.
|
||||
assert any("formality_score" in str(w.message).lower() or "zz" in str(w.message) for w in w_list)
|
||||
|
||||
|
||||
def test_formality_score_unknown_lang_never_raises():
|
||||
from iai_mcp.formality import formality_score
|
||||
|
||||
# Must never raise, regardless of the lang string.
|
||||
for bad_lang in ("", "zz", "xx", "de", "fr"):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
_ = formality_score("test", bad_lang)
|
||||
|
||||
|
||||
def test_formality_score_empty_text_returns_zero():
|
||||
from iai_mcp.formality import formality_score
|
||||
|
||||
assert formality_score("", "en") == 0.0
|
||||
assert formality_score(" ", "en") == 0.0
|
||||
|
||||
|
||||
def test_formality_score_range_bounded_in_0_1():
|
||||
from iai_mcp.formality import formality_score
|
||||
|
||||
pairs = _load_fixture()
|
||||
for p in pairs:
|
||||
for txt in (p["formal"], p["informal"]):
|
||||
s = formality_score(txt, p["lang"])
|
||||
assert 0.0 <= s <= 1.0, f"score {s} out of [0, 1] for {txt!r}"
|
||||
189
tests/test_fsrs_decay.py
Normal file
189
tests/test_fsrs_decay.py
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
"""Tests for FSRS-style edge decay sweep inside sleep._decay_edges.
|
||||
|
||||
Behaviour:
|
||||
- hebbian edges with last updated > 90d ago and weight < ε after decay are pruned.
|
||||
- hebbian edges above ε are updated with the decayed weight.
|
||||
- NON-hebbian edges (contradicts, invariant_anchor, consolidated_from, etc.)
|
||||
are NEVER pruned by the sweep. This is load-bearing for S5 identity protection
|
||||
: invariant anchors must survive decay.
|
||||
- never_decay records are unaffected on the records side (Plan 02-01 __post_init__
|
||||
already enforces this on detail_level>=3; decay loop here targets edges only).
|
||||
- DECAY_EPSILON defaults to 0.01.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _insert_stale_edge(store, edge_type: str, weight: float, days_old: int):
|
||||
"""Directly insert an aged edge for decay testing. Bypasses boost_edges
|
||||
which always stamps now() as updated_at."""
|
||||
import pandas as pd
|
||||
|
||||
tbl = store.db.open_table("edges")
|
||||
old = datetime.now(timezone.utc) - timedelta(days=days_old)
|
||||
src_id, dst_id = str(uuid4()), str(uuid4())
|
||||
tbl.add([
|
||||
{
|
||||
"src": src_id,
|
||||
"dst": dst_id,
|
||||
"edge_type": edge_type,
|
||||
"weight": float(weight),
|
||||
"updated_at": old,
|
||||
}
|
||||
])
|
||||
return src_id, dst_id
|
||||
|
||||
|
||||
# ---- constants
|
||||
|
||||
|
||||
def test_decay_epsilon_default():
|
||||
from iai_mcp import sleep as sleep_mod
|
||||
|
||||
assert sleep_mod.DECAY_EPSILON == 0.01
|
||||
|
||||
|
||||
# ---- sweep behaviour
|
||||
|
||||
|
||||
def test_decay_edges_preserves_fresh_hebbian_edges(tmp_path):
|
||||
"""Edges <= 90d old are untouched by the sweep."""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
src, dst = _insert_stale_edge(store, "hebbian", weight=0.5, days_old=30)
|
||||
|
||||
result = _decay_edges(store)
|
||||
assert result["decayed"] == 0
|
||||
assert result["pruned"] == 0
|
||||
|
||||
# Edge still exists at original weight
|
||||
df = store.db.open_table("edges").to_pandas()
|
||||
row = df[(df["src"] == src) & (df["dst"] == dst)]
|
||||
assert not row.empty
|
||||
assert float(row.iloc[0]["weight"]) == 0.5
|
||||
|
||||
|
||||
def test_decay_edges_decays_stale_hebbian_edges(tmp_path):
|
||||
"""Edge >90d old and weight above ε is decayed, not pruned."""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
src, dst = _insert_stale_edge(store, "hebbian", weight=0.8, days_old=100)
|
||||
|
||||
result = _decay_edges(store)
|
||||
assert result["decayed"] >= 1
|
||||
|
||||
df = store.db.open_table("edges").to_pandas()
|
||||
row = df[(df["src"] == src) & (df["dst"] == dst)]
|
||||
assert not row.empty
|
||||
assert float(row.iloc[0]["weight"]) < 0.8
|
||||
|
||||
|
||||
def test_decay_edges_prunes_below_epsilon(tmp_path):
|
||||
"""Edge decayed to weight < ε is removed."""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
# Very old + already tiny weight -> decays below 0.01
|
||||
src, dst = _insert_stale_edge(store, "hebbian", weight=0.02, days_old=200)
|
||||
|
||||
result = _decay_edges(store)
|
||||
assert result["pruned"] >= 1
|
||||
|
||||
df = store.db.open_table("edges").to_pandas()
|
||||
gone = df[(df["src"] == src) & (df["dst"] == dst) & (df["edge_type"] == "hebbian")]
|
||||
assert gone.empty
|
||||
|
||||
|
||||
def test_decay_edges_spares_contradicts(tmp_path):
|
||||
"""Decay sweep only touches hebbian edges; contradicts edges survive forever."""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
src, dst = _insert_stale_edge(store, "contradicts", weight=0.5, days_old=1000)
|
||||
|
||||
_decay_edges(store)
|
||||
|
||||
df = store.db.open_table("edges").to_pandas()
|
||||
row = df[
|
||||
(df["src"] == src)
|
||||
& (df["dst"] == dst)
|
||||
& (df["edge_type"] == "contradicts")
|
||||
]
|
||||
assert not row.empty
|
||||
assert float(row.iloc[0]["weight"]) == 0.5
|
||||
|
||||
|
||||
def test_decay_edges_spares_invariant_anchor(tmp_path):
|
||||
"""S5 invariant_anchor edges MUST NOT be pruned."""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
src, dst = _insert_stale_edge(store, "invariant_anchor", weight=0.001, days_old=5000)
|
||||
|
||||
_decay_edges(store)
|
||||
df = store.db.open_table("edges").to_pandas()
|
||||
row = df[
|
||||
(df["src"] == src)
|
||||
& (df["dst"] == dst)
|
||||
& (df["edge_type"] == "invariant_anchor")
|
||||
]
|
||||
assert not row.empty # survived
|
||||
|
||||
|
||||
|
||||
def test_decay_edges_spares_consolidated_from(tmp_path):
|
||||
"""consolidated_from (semantic<-episode) edges must survive decay."""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
src, dst = _insert_stale_edge(store, "consolidated_from", weight=0.01, days_old=2000)
|
||||
|
||||
_decay_edges(store)
|
||||
df = store.db.open_table("edges").to_pandas()
|
||||
row = df[
|
||||
(df["src"] == src)
|
||||
& (df["dst"] == dst)
|
||||
& (df["edge_type"] == "consolidated_from")
|
||||
]
|
||||
assert not row.empty
|
||||
|
||||
|
||||
def test_decay_edges_custom_epsilon(tmp_path):
|
||||
"""Epsilon can be overridden per-call."""
|
||||
from iai_mcp.sleep import _decay_edges
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
src, dst = _insert_stale_edge(store, "hebbian", weight=0.05, days_old=95)
|
||||
|
||||
# Default ε=0.01 -> likely not pruned after only 5 days of decay beyond 90
|
||||
result_default = _decay_edges(store, epsilon=0.01)
|
||||
# High ε=0.5 -> should prune anything below 0.5
|
||||
# Re-insert since we may have been decayed
|
||||
df = store.db.open_table("edges").to_pandas()
|
||||
remaining = df[(df["src"] == src) & (df["dst"] == dst) & (df["edge_type"] == "hebbian")]
|
||||
# Reset for clean experiment
|
||||
if not remaining.empty:
|
||||
store.db.open_table("edges").delete(
|
||||
f"src = '{src}' AND dst = '{dst}' AND edge_type = 'hebbian'"
|
||||
)
|
||||
|
||||
src2, dst2 = _insert_stale_edge(store, "hebbian", weight=0.3, days_old=95)
|
||||
result_custom = _decay_edges(store, epsilon=0.5)
|
||||
df2 = store.db.open_table("edges").to_pandas()
|
||||
row = df2[(df2["src"] == src2) & (df2["dst"] == dst2) & (df2["edge_type"] == "hebbian")]
|
||||
# With epsilon=0.5 and starting weight 0.3, prune should happen immediately.
|
||||
assert row.empty
|
||||
assert result_custom["pruned"] >= 1
|
||||
200
tests/test_fsrs_persistence.py
Normal file
200
tests/test_fsrs_persistence.py
Normal file
|
|
@ -0,0 +1,200 @@
|
|||
"""Tests for 02-REVIEW.md H-01 (FSRS tick not persisted across restart).
|
||||
|
||||
Bug: `run_light_consolidation` calls `_apply_fsrs(r, now)` which mutates
|
||||
record.stability and record.last_reviewed in-place on the in-memory
|
||||
MemoryRecord object. The updated record was never written back to the store.
|
||||
Every process restart reset all FSRS fields to their previous checkpoint.
|
||||
|
||||
Fix:
|
||||
- Add MemoryStore.update_record(record) that rewrites ONLY the FSRS
|
||||
columns (stability, difficulty, last_reviewed, updated_at) via
|
||||
_uuid_literal-safe WHERE predicate. No embedding / provenance /
|
||||
tags / community_id changes -- avoids clobbering concurrent
|
||||
boost_edges / append_provenance writers.
|
||||
- Call store.update_record(r) inside run_light_consolidation after
|
||||
_apply_fsrs mutates r.
|
||||
|
||||
Constitutional contract (MEM-07 FSRS biological fidelity + D-STORAGE):
|
||||
FSRS stability is the biological decay curve state. Losing it on every
|
||||
restart equivalates to wiping short-term memory at every session
|
||||
switch -- unacceptable for a system whose promise is "Claude remembers
|
||||
every word".
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def _record(
|
||||
*,
|
||||
text: str = "fsrs-target",
|
||||
stability: float = 0.1,
|
||||
prov_seconds_ago: int = 30,
|
||||
) -> MemoryRecord:
|
||||
"""Build a record with a fresh provenance entry so run_light_consolidation
|
||||
will actually tick it (the light pass only nudges records whose last
|
||||
provenance entry is < 1h old)."""
|
||||
now = datetime.now(timezone.utc)
|
||||
prov_ts = (now - timedelta(seconds=prov_seconds_ago)).isoformat()
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=stability,
|
||||
difficulty=0.3,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[{"ts": prov_ts, "cue": "recall", "session_id": "s1"}],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[],
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
# ============================================== update_record API unit tests
|
||||
|
||||
|
||||
def test_update_record_writes_back_fsrs_columns(tmp_path):
|
||||
"""MemoryStore.update_record persists stability/difficulty/last_reviewed."""
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
rec = _record(stability=0.1)
|
||||
store.insert(rec)
|
||||
|
||||
# Mutate the in-memory copy then write it back
|
||||
rec.stability = 0.55
|
||||
rec.difficulty = 0.42
|
||||
new_reviewed = datetime.now(timezone.utc)
|
||||
rec.last_reviewed = new_reviewed
|
||||
|
||||
store.update_record(rec)
|
||||
|
||||
fresh = store.get(rec.id)
|
||||
assert fresh is not None
|
||||
assert fresh.stability == pytest.approx(0.55, abs=1e-3)
|
||||
assert fresh.difficulty == pytest.approx(0.42, abs=1e-3)
|
||||
assert fresh.last_reviewed is not None
|
||||
|
||||
|
||||
def test_update_record_rejects_unknown_id(tmp_path):
|
||||
"""Calling update_record on a record id that is not in the table must be
|
||||
a no-op (no exception, no table growth)."""
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
# No insert -- record never existed
|
||||
phantom = _record(stability=0.9)
|
||||
|
||||
# Row count before
|
||||
before = store.db.open_table("records").count_rows()
|
||||
|
||||
# Must not raise
|
||||
store.update_record(phantom)
|
||||
|
||||
# Row count unchanged (no row was inserted)
|
||||
after = store.db.open_table("records").count_rows()
|
||||
assert after == before
|
||||
|
||||
|
||||
def test_update_record_does_not_touch_untouched_columns(tmp_path):
|
||||
"""update_record must only rewrite FSRS-relevant columns. Embedding,
|
||||
provenance, tags, community_id must survive unchanged."""
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
rec = _record(stability=0.1)
|
||||
rec.tags = ["important", "keep-me"]
|
||||
rec.provenance = [
|
||||
{"ts": "2026-04-16T00:00:00Z", "cue": "seed", "session_id": "s0"},
|
||||
]
|
||||
store.insert(rec)
|
||||
|
||||
# Only change FSRS fields in-memory; leave rec.tags / rec.provenance alone.
|
||||
rec.stability = 0.6
|
||||
rec.last_reviewed = datetime.now(timezone.utc)
|
||||
store.update_record(rec)
|
||||
|
||||
fresh = store.get(rec.id)
|
||||
assert fresh is not None
|
||||
# FSRS columns updated
|
||||
assert fresh.stability == pytest.approx(0.6, abs=1e-3)
|
||||
# Unrelated columns preserved
|
||||
assert fresh.tags == ["important", "keep-me"]
|
||||
assert len(fresh.provenance) == 1
|
||||
assert fresh.provenance[0]["cue"] == "seed"
|
||||
|
||||
|
||||
# ============================================== H-01 end-to-end persistence
|
||||
|
||||
|
||||
def test_fsrs_state_persists_across_store_reopen(tmp_path):
|
||||
"""H-01 end-to-end: after run_light_consolidation, a NEW MemoryStore
|
||||
instance at the same tmp_path must see updated stability + last_reviewed.
|
||||
|
||||
Pre-fix: stability stayed at 0.1 because _apply_fsrs only mutated the
|
||||
in-memory object; nothing was written back.
|
||||
Post-fix: stability >= 0.1 + FSRS_STABILITY_BOOST (0.3 cap at 1.0).
|
||||
"""
|
||||
from iai_mcp.sleep import FSRS_STABILITY_BOOST, run_light_consolidation
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
# Phase A: create, insert with fresh provenance, run light cycle
|
||||
store = MemoryStore(path=tmp_path)
|
||||
rec = _record(stability=0.1, prov_seconds_ago=30)
|
||||
rec_id = rec.id
|
||||
store.insert(rec)
|
||||
|
||||
result = run_light_consolidation(store, session_id="persist-test")
|
||||
assert result["fsrs_ticked"] >= 1
|
||||
|
||||
# Phase B: close (via new instance on the same path) and re-read
|
||||
del store
|
||||
store2 = MemoryStore(path=tmp_path)
|
||||
fresh = store2.get(rec_id)
|
||||
assert fresh is not None
|
||||
|
||||
# Stability boosted and persisted
|
||||
expected_min = 0.1 + FSRS_STABILITY_BOOST - 1e-3
|
||||
assert fresh.stability >= expected_min, (
|
||||
f"FSRS stability not persisted: expected >= {expected_min}, "
|
||||
f"got {fresh.stability}"
|
||||
)
|
||||
# last_reviewed populated
|
||||
assert fresh.last_reviewed is not None
|
||||
|
||||
|
||||
def test_fsrs_persistence_only_fresh_provenance(tmp_path):
|
||||
"""Records with STALE provenance (>1h old) must NOT be FSRS-ticked. This
|
||||
preserves the current sleep.py light-phase gating; our update_record fix
|
||||
must not widen that surface.
|
||||
"""
|
||||
from iai_mcp.sleep import run_light_consolidation
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
# 2h-old provenance -- outside the 1h tick window
|
||||
rec = _record(stability=0.1, prov_seconds_ago=7200)
|
||||
store.insert(rec)
|
||||
|
||||
run_light_consolidation(store, session_id="no-tick")
|
||||
fresh = store.get(rec.id)
|
||||
assert fresh is not None
|
||||
# Stability unchanged
|
||||
assert fresh.stability == pytest.approx(0.1, abs=1e-3)
|
||||
112
tests/test_graph.py
Normal file
112
tests/test_graph.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
"""Tests for iai_mcp.graph (D-04 dual-library wrapper, CONN-03 2-hop spread)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.graph import IGRAPH_THRESHOLD, MemoryGraph, _HAS_IGRAPH
|
||||
|
||||
|
||||
def test_small_graph_uses_networkx() -> None:
|
||||
g = MemoryGraph()
|
||||
for _ in range(10):
|
||||
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
|
||||
assert g.backend == "networkx"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_IGRAPH, reason="igraph optional on some boxes")
|
||||
def test_large_graph_switches_to_igraph() -> None:
|
||||
g = MemoryGraph()
|
||||
for _ in range(IGRAPH_THRESHOLD + 1):
|
||||
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
|
||||
assert g.backend == "igraph"
|
||||
|
||||
|
||||
def test_backend_stays_networkx_just_below_threshold() -> None:
|
||||
g = MemoryGraph()
|
||||
for _ in range(IGRAPH_THRESHOLD - 1):
|
||||
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
|
||||
assert g.backend == "networkx"
|
||||
|
||||
|
||||
def test_two_hop_reaches_exactly_two_hops() -> None:
|
||||
"""CONN-03: linear chain A-B-C-D seeded at A returns {B, C} -- D is 3 hops."""
|
||||
g = MemoryGraph()
|
||||
a, b, c, d = uuid4(), uuid4(), uuid4(), uuid4()
|
||||
for n in (a, b, c, d):
|
||||
g.add_node(n, community_id=None, embedding=[0.0] * 384)
|
||||
g.add_edge(a, b)
|
||||
g.add_edge(b, c)
|
||||
g.add_edge(c, d)
|
||||
|
||||
reached = set(g.two_hop_neighborhood([a], top_k=5))
|
||||
assert b in reached
|
||||
assert c in reached
|
||||
assert d not in reached # 3 hops away
|
||||
assert a not in reached # seed excluded
|
||||
|
||||
|
||||
def test_two_hop_multiple_seeds_deduped() -> None:
|
||||
g = MemoryGraph()
|
||||
a, b, c = uuid4(), uuid4(), uuid4()
|
||||
for n in (a, b, c):
|
||||
g.add_node(n, community_id=None, embedding=[0.0] * 384)
|
||||
g.add_edge(a, b)
|
||||
g.add_edge(b, c)
|
||||
# Both a and c as seeds: 2-hop from a reaches {b,c}, from c reaches {b,a};
|
||||
# union minus seeds should be {b}.
|
||||
reached = set(g.two_hop_neighborhood([a, c], top_k=5))
|
||||
assert reached == {b}
|
||||
|
||||
|
||||
def test_two_hop_empty_seeds_returns_empty_list() -> None:
|
||||
g = MemoryGraph()
|
||||
assert g.two_hop_neighborhood([], top_k=5) == []
|
||||
|
||||
|
||||
def test_centrality_hub_beats_leaves() -> None:
|
||||
"""5-node star: hub's betweenness strictly greater than any leaf's."""
|
||||
g = MemoryGraph()
|
||||
hub = uuid4()
|
||||
leaves = [uuid4() for _ in range(4)]
|
||||
g.add_node(hub, community_id=None, embedding=[0.0] * 384)
|
||||
for leaf in leaves:
|
||||
g.add_node(leaf, community_id=None, embedding=[0.0] * 384)
|
||||
g.add_edge(hub, leaf)
|
||||
c = g.centrality()
|
||||
for leaf in leaves:
|
||||
assert c[hub] > c[leaf]
|
||||
|
||||
|
||||
def test_centrality_no_edges_all_zero() -> None:
|
||||
g = MemoryGraph()
|
||||
for _ in range(5):
|
||||
g.add_node(uuid4(), community_id=None, embedding=[0.0] * 384)
|
||||
c = g.centrality()
|
||||
assert all(v == 0.0 for v in c.values())
|
||||
assert len(c) == 5
|
||||
|
||||
|
||||
def test_get_embedding_returns_stored_vector() -> None:
|
||||
g = MemoryGraph()
|
||||
nid = uuid4()
|
||||
emb = [1.0] + [0.0] * 383
|
||||
g.add_node(nid, community_id=None, embedding=emb)
|
||||
assert g.get_embedding(nid) == emb
|
||||
assert g.get_embedding(uuid4()) is None
|
||||
|
||||
|
||||
def test_rich_club_coefficient_on_star_graph() -> None:
|
||||
"""Star has hub with degree 4; coefficient well-defined."""
|
||||
g = MemoryGraph()
|
||||
hub = uuid4()
|
||||
leaves = [uuid4() for _ in range(4)]
|
||||
g.add_node(hub, community_id=None, embedding=[0.0] * 384)
|
||||
for leaf in leaves:
|
||||
g.add_node(leaf, community_id=None, embedding=[0.0] * 384)
|
||||
g.add_edge(hub, leaf)
|
||||
# Should not raise; returns a float.
|
||||
coef = g.rich_club_coefficient()
|
||||
assert isinstance(coef, float)
|
||||
assert coef >= 0.0
|
||||
340
tests/test_graph_native_recall.py
Normal file
340
tests/test_graph_native_recall.py
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
"""Plan 05-12 — graph-native recall tests (RED scaffold).
|
||||
|
||||
Close the latency gap by switching recall_for_response's seed + spread
|
||||
stages from per-id ``store.get(rid)`` LanceDB round-trips to in-RAM
|
||||
``G.nodes[rid]`` attribute lookups. ``build_runtime_graph`` attaches the
|
||||
record payload (embedding, surface, centrality, tier) to every graph
|
||||
node so the recall hot path never touches disk for a graph-resident id.
|
||||
|
||||
Covered contracts:
|
||||
|
||||
A1 — every node in G carries embedding + surface + centrality + tier
|
||||
after ``build_runtime_graph``.
|
||||
A2 — seed stage does NOT call ``store.get`` (patch raises if invoked).
|
||||
A3 — spread stage (rank/reachable walk) does NOT call ``store.get``.
|
||||
A4 — verbatim L0 fast path (cue_text exact-match / gate skip) still
|
||||
hits ``store.get`` — invariant path is untouched.
|
||||
A5 — partial sync / missing attribute on a node falls back to
|
||||
``store.get`` without crashing; recall still returns hits.
|
||||
A6 — correctness fence: recall returns the seeded records with
|
||||
high cosine similarity (no correctness regression).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import retrieve
|
||||
from iai_mcp.pipeline import recall_for_response
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- fixtures
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Swap macOS Keychain for an in-memory dict so tests don't prompt."""
|
||||
import keyring as _keyring
|
||||
|
||||
fake: dict[tuple[str, str], str] = {}
|
||||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||||
monkeypatch.setattr(
|
||||
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||||
)
|
||||
yield fake
|
||||
|
||||
|
||||
class _DetEmbedder:
|
||||
"""Deterministic embedder — seeds record vectors by text hash."""
|
||||
|
||||
def __init__(self, dim: int = 384) -> None:
|
||||
self.DIM = dim
|
||||
self.DEFAULT_DIM = dim
|
||||
self.DEFAULT_MODEL_KEY = "test"
|
||||
|
||||
def embed(self, text: str) -> list[float]:
|
||||
import hashlib
|
||||
import random
|
||||
|
||||
digest = hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
rng = random.Random(int(digest[:16], 16))
|
||||
v = [rng.random() * 2 - 1 for _ in range(self.DIM)]
|
||||
n = sum(x * x for x in v) ** 0.5
|
||||
return [x / n for x in v] if n > 0 else v
|
||||
|
||||
|
||||
def _make_record(vec: list[float], text: str) -> MemoryRecord:
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=vec,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["t"],
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def seeded_store(tmp_path: Path) -> tuple[MemoryStore, _DetEmbedder, list[MemoryRecord]]:
|
||||
"""Fresh store with 12 records so the seed+spread stages have enough
|
||||
material to exercise the graph-native read path."""
|
||||
store = MemoryStore(path=tmp_path / "lancedb")
|
||||
store.root = tmp_path
|
||||
emb = _DetEmbedder(dim=store.embed_dim)
|
||||
recs = []
|
||||
for i in range(12):
|
||||
vec = emb.embed(f"fact-{i}")
|
||||
rec = _make_record(vec, f"synthetic fact {i}")
|
||||
store.insert(rec)
|
||||
recs.append(rec)
|
||||
return store, emb, recs
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- A1: node payload
|
||||
|
||||
|
||||
def test_A1_build_runtime_graph_attaches_node_payload(seeded_store):
|
||||
"""A1: every node carries embedding + surface + centrality + tier."""
|
||||
store, _emb, recs = seeded_store
|
||||
graph, _assignment, _rc = retrieve.build_runtime_graph(store)
|
||||
|
||||
# Use the underlying NetworkX graph directly; adds the
|
||||
# payload as NetworkX node attributes via G.add_node(id, **payload).
|
||||
G = graph._nx
|
||||
assert G.number_of_nodes() == len(recs)
|
||||
for rec in recs:
|
||||
node = G.nodes[str(rec.id)]
|
||||
assert "embedding" in node, f"node {rec.id} missing embedding attr"
|
||||
assert "surface" in node, f"node {rec.id} missing surface attr"
|
||||
assert "centrality" in node, f"node {rec.id} missing centrality attr"
|
||||
assert "tier" in node, f"node {rec.id} missing tier attr"
|
||||
# Embedding list matches the record's embedding.
|
||||
assert list(node["embedding"]) == list(rec.embedding)
|
||||
assert node["surface"] == rec.literal_surface
|
||||
assert node["tier"] == rec.tier
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- A2: seed stage
|
||||
|
||||
|
||||
def test_A2_seed_stage_reads_from_graph_not_store(seeded_store):
|
||||
"""A2: seed stage (top-K by cosine) must NOT call store.get.
|
||||
|
||||
We patch MemoryStore.get to raise; if recall_for_response still returns
|
||||
a non-empty RecallResponse, the seed stage is graph-native.
|
||||
"""
|
||||
store, emb, _recs = seeded_store
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
|
||||
|
||||
# The verbatim L0 fast-path (gate skip) calls store.get too — disable
|
||||
# the skip by choosing a cue that the gate will NOT classify as trivial.
|
||||
cue = "explain the authentication migration for long-running deployments"
|
||||
|
||||
# AllowedError raises ONLY on the hot-path store.get; the L0 fast-path
|
||||
# is known not to fire for this cue.
|
||||
class _Boom(RuntimeError):
|
||||
pass
|
||||
|
||||
original_get = store.get
|
||||
|
||||
def _explode(rid):
|
||||
# Allow the verbatim L0 UUID fetch to pass through so the fast-path
|
||||
# check (no L0 record seeded) is a clean miss — but any OTHER store.get
|
||||
# call blows up.
|
||||
from uuid import UUID
|
||||
l0 = UUID("00000000-0000-0000-0000-000000000001")
|
||||
if rid == l0:
|
||||
return None
|
||||
raise _Boom(f"store.get({rid}) — seed stage should not call this")
|
||||
|
||||
with mock.patch.object(MemoryStore, "get", side_effect=_explode):
|
||||
resp = recall_for_response(
|
||||
store=store,
|
||||
graph=graph,
|
||||
assignment=assignment,
|
||||
rich_club=rich_club,
|
||||
embedder=emb,
|
||||
cue=cue,
|
||||
session_id="s",
|
||||
budget_tokens=1500,
|
||||
)
|
||||
assert len(resp.hits) >= 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- A3: spread stage
|
||||
|
||||
|
||||
def test_A3_spread_stage_reads_from_graph_not_store(seeded_store):
|
||||
"""A3: rank+spread stages do NOT call store.get either.
|
||||
|
||||
Same shape as A2 but asserts over the full reachable-union not just
|
||||
seeds.
|
||||
"""
|
||||
store, emb, _recs = seeded_store
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
|
||||
|
||||
cue = "network stack changes for the web cache"
|
||||
|
||||
class _Boom(RuntimeError):
|
||||
pass
|
||||
|
||||
def _explode(rid):
|
||||
from uuid import UUID
|
||||
l0 = UUID("00000000-0000-0000-0000-000000000001")
|
||||
if rid == l0:
|
||||
return None
|
||||
raise _Boom(f"store.get({rid}) during spread/rank")
|
||||
|
||||
with mock.patch.object(MemoryStore, "get", side_effect=_explode):
|
||||
resp = recall_for_response(
|
||||
store=store,
|
||||
graph=graph,
|
||||
assignment=assignment,
|
||||
rich_club=rich_club,
|
||||
embedder=emb,
|
||||
cue=cue,
|
||||
session_id="s",
|
||||
budget_tokens=1500,
|
||||
)
|
||||
# If spread/rank was using store.get, we would have exploded above.
|
||||
assert isinstance(resp.hits, list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- A4: L0 fast path
|
||||
|
||||
|
||||
def test_A4_verbatim_l0_fast_path_still_calls_store_get(seeded_store):
|
||||
"""A4: the L0 (gate-skip) fast path still hits store.get — unchanged.
|
||||
|
||||
invariant: verbatim recall path is NOT touched.
|
||||
"""
|
||||
store, emb, _recs = seeded_store
|
||||
# Seed the deterministic L0 record so the gate-skip branch fires.
|
||||
from uuid import UUID
|
||||
l0_id = UUID("00000000-0000-0000-0000-000000000001")
|
||||
l0_vec = emb.embed("l0-identity")
|
||||
now = datetime.now(timezone.utc)
|
||||
l0_rec = MemoryRecord(
|
||||
id=l0_id,
|
||||
tier="semantic",
|
||||
literal_surface="L0 identity kernel",
|
||||
aaak_index="",
|
||||
embedding=l0_vec,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=5, # never_decay
|
||||
pinned=True,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=True,
|
||||
never_merge=True,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["identity"],
|
||||
language="en",
|
||||
)
|
||||
store.insert(l0_rec)
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
|
||||
|
||||
# Pick a cue that the gate treats as trivial (short / who-am-i style).
|
||||
cue = "hi"
|
||||
|
||||
with mock.patch.object(MemoryStore, "get", wraps=store.get) as spy:
|
||||
_ = recall_for_response(
|
||||
store=store,
|
||||
graph=graph,
|
||||
assignment=assignment,
|
||||
rich_club=rich_club,
|
||||
embedder=emb,
|
||||
cue=cue,
|
||||
session_id="s",
|
||||
budget_tokens=1500,
|
||||
)
|
||||
# At LEAST one store.get call on the L0 fast path (verbatim invariant).
|
||||
assert spy.call_count >= 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- A5: fallback
|
||||
|
||||
|
||||
def test_A5_missing_node_attr_falls_back_to_store_get(seeded_store):
|
||||
"""A5: if a node somehow lacks the embedding attr (race / partial
|
||||
sync), _read_record_payload falls back to store.get and recall still
|
||||
returns correct hits — no crash."""
|
||||
store, emb, recs = seeded_store
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
|
||||
# Blow away the embedding attr on half the nodes.
|
||||
G = graph._nx
|
||||
victims = [str(r.id) for r in recs[:6]]
|
||||
for nid in victims:
|
||||
if "embedding" in G.nodes[nid]:
|
||||
del G.nodes[nid]["embedding"]
|
||||
|
||||
cue = "summary of cli subcommand changes for the auth token rotation"
|
||||
resp = recall_for_response(
|
||||
store=store,
|
||||
graph=graph,
|
||||
assignment=assignment,
|
||||
rich_club=rich_club,
|
||||
embedder=emb,
|
||||
cue=cue,
|
||||
session_id="s",
|
||||
budget_tokens=1500,
|
||||
)
|
||||
assert len(resp.hits) >= 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- A6: correctness
|
||||
|
||||
|
||||
def test_A6_m04_correctness_no_regression(seeded_store):
|
||||
"""A6: recall returns the seeded record whose text matches the cue.
|
||||
|
||||
Minimal correctness fence inside this file (the heavyweight
|
||||
bench.verbatim sweep covers gap=5/20/100 elsewhere; this guards the
|
||||
happy-path-does-not-regress invariant inside the unit suite).
|
||||
"""
|
||||
store, emb, recs = seeded_store
|
||||
graph, assignment, rich_club = retrieve.build_runtime_graph(store)
|
||||
|
||||
# Query with text similar to record 7 — its cosine should dominate.
|
||||
resp = recall_for_response(
|
||||
store=store,
|
||||
graph=graph,
|
||||
assignment=assignment,
|
||||
rich_club=rich_club,
|
||||
embedder=emb,
|
||||
cue="synthetic fact 7",
|
||||
session_id="s",
|
||||
budget_tokens=1500,
|
||||
)
|
||||
# At least one hit comes back.
|
||||
assert len(resp.hits) >= 1
|
||||
# All hit record ids are in the seeded record id set.
|
||||
seeded_ids = {r.id for r in recs}
|
||||
assert all(h.record_id in seeded_ids for h in resp.hits)
|
||||
247
tests/test_graph_node_payload_sync.py
Normal file
247
tests/test_graph_node_payload_sync.py
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
"""Plan 05-12 — store <-> graph write-sync hook tests (RED scaffold).
|
||||
|
||||
``build_runtime_graph`` registers a ``_graph_sync_hook`` on the store so
|
||||
every ``insert`` / ``update`` / ``delete`` mutates the in-RAM graph's
|
||||
node payload. Hook exceptions are logged to stderr as structured events
|
||||
but NEVER break the underlying store write — the store is authoritative.
|
||||
|
||||
Covered contracts:
|
||||
|
||||
B1 — ``store.insert`` with registered hook adds the graph node + payload.
|
||||
B2 — ``store.update`` mutates the node's embedding / surface payload.
|
||||
B3 — ``store.delete`` removes the node from the graph.
|
||||
B4 — hook that raises does not break ``store.insert`` — write
|
||||
completes, stderr carries a structured ``graph_sync_failed`` event.
|
||||
B5 — cold start: after save/try_load round-trip the node payload blob
|
||||
restores every node attribute from cache.
|
||||
B6 — CACHE_VERSION bump from "05-09-v1" -> "05-12-v1" invalidates the
|
||||
old cache cleanly (forward-compat fence).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp import retrieve, runtime_graph_cache
|
||||
from iai_mcp.store import MemoryStore
|
||||
from iai_mcp.types import MemoryRecord
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- fixtures
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_keyring(monkeypatch: pytest.MonkeyPatch):
|
||||
import keyring as _keyring
|
||||
|
||||
fake: dict[tuple[str, str], str] = {}
|
||||
monkeypatch.setattr(_keyring, "get_password", lambda s, u: fake.get((s, u)))
|
||||
monkeypatch.setattr(
|
||||
_keyring, "set_password", lambda s, u, p: fake.__setitem__((s, u), p)
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
_keyring, "delete_password", lambda s, u: fake.pop((s, u), None)
|
||||
)
|
||||
yield fake
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def store(tmp_path: Path) -> MemoryStore:
|
||||
s = MemoryStore(path=tmp_path / "lancedb")
|
||||
s.root = tmp_path
|
||||
return s
|
||||
|
||||
|
||||
def _make_record(
|
||||
store: MemoryStore,
|
||||
text: str = "hello",
|
||||
vec_seed: float = 0.1,
|
||||
) -> MemoryRecord:
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[vec_seed] * store.embed_dim,
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.0,
|
||||
difficulty=0.0,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=["t"],
|
||||
language="en",
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- B1: insert
|
||||
|
||||
|
||||
def test_B1_insert_updates_graph_node(store):
|
||||
"""B1: store.insert while a hook is registered adds node + payload."""
|
||||
# Seed one record so build_runtime_graph has something to register with.
|
||||
seed = _make_record(store, "seed", 0.5)
|
||||
store.insert(seed)
|
||||
|
||||
graph, _a, _rc = retrieve.build_runtime_graph(store)
|
||||
assert str(seed.id) in graph._nx.nodes
|
||||
# Now insert a second record; the hook should mirror it to the graph.
|
||||
new_rec = _make_record(store, "freshly-inserted", 0.3)
|
||||
store.insert(new_rec)
|
||||
|
||||
assert str(new_rec.id) in graph._nx.nodes
|
||||
node = graph._nx.nodes[str(new_rec.id)]
|
||||
assert node.get("surface") == "freshly-inserted"
|
||||
assert "embedding" in node
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- B2: update
|
||||
|
||||
|
||||
def test_B2_update_mutates_node_payload(store):
|
||||
"""B2: store.update rewrites the node's embedding + surface on the graph."""
|
||||
rec = _make_record(store, "before-update", 0.2)
|
||||
store.insert(rec)
|
||||
graph, _a, _rc = retrieve.build_runtime_graph(store)
|
||||
|
||||
node_before = graph._nx.nodes[str(rec.id)]
|
||||
assert node_before["surface"] == "before-update"
|
||||
|
||||
# Mutate surface and embedding.
|
||||
rec.literal_surface = "after-update"
|
||||
rec.embedding = [0.9] * store.embed_dim
|
||||
store.update(rec)
|
||||
|
||||
node_after = graph._nx.nodes[str(rec.id)]
|
||||
assert node_after["surface"] == "after-update"
|
||||
# embedding replaced (first element is 0.9 now)
|
||||
assert list(node_after["embedding"])[0] == pytest.approx(0.9)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- B3: delete
|
||||
|
||||
|
||||
def test_B3_delete_removes_node(store):
|
||||
"""B3: store.delete drops the node from the graph."""
|
||||
rec = _make_record(store, "to-be-deleted", 0.4)
|
||||
store.insert(rec)
|
||||
graph, _a, _rc = retrieve.build_runtime_graph(store)
|
||||
assert str(rec.id) in graph._nx.nodes
|
||||
|
||||
store.delete(rec.id)
|
||||
assert str(rec.id) not in graph._nx.nodes
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- B4: hook robustness
|
||||
|
||||
|
||||
def test_B4_hook_exception_does_not_break_store_insert(store, capsys):
|
||||
"""B4: a raising hook must never break store.insert; stderr logs a
|
||||
structured ``graph_sync_failed`` event."""
|
||||
def _bad_hook(op, record):
|
||||
raise RuntimeError("hook is sad")
|
||||
|
||||
store.register_graph_sync_hook(_bad_hook)
|
||||
|
||||
rec = _make_record(store, "store-write-is-authoritative", 0.15)
|
||||
store.insert(rec) # must not raise
|
||||
|
||||
# Verify the record actually landed in LanceDB.
|
||||
roundtrip = store.get(rec.id)
|
||||
assert roundtrip is not None
|
||||
assert roundtrip.literal_surface == "store-write-is-authoritative"
|
||||
|
||||
# Structured stderr event logged.
|
||||
captured = capsys.readouterr()
|
||||
assert "graph_sync_failed" in captured.err
|
||||
# JSON parseability of at least one stderr line.
|
||||
found = False
|
||||
for line in captured.err.splitlines():
|
||||
try:
|
||||
payload = json.loads(line)
|
||||
if payload.get("event") == "graph_sync_failed":
|
||||
assert payload.get("op") == "insert"
|
||||
found = True
|
||||
break
|
||||
except (ValueError, TypeError):
|
||||
continue
|
||||
assert found, "expected a JSON graph_sync_failed event on stderr"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- B5: cold start
|
||||
|
||||
|
||||
def test_B5_cold_start_restores_node_payload_from_cache(store):
|
||||
"""B5: after save/try_load, build_runtime_graph rehydrates node
|
||||
attrs from the cache without re-reading all records."""
|
||||
rec = _make_record(store, "cached-payload", 0.25)
|
||||
store.insert(rec)
|
||||
|
||||
# First build — writes the v2 cache with node_payload blob.
|
||||
graph1, _a, _rc = retrieve.build_runtime_graph(store)
|
||||
node1 = graph1._nx.nodes[str(rec.id)]
|
||||
expected_surface = node1["surface"]
|
||||
expected_emb = list(node1["embedding"])
|
||||
|
||||
# Inspect via try_load (cache is encrypted under v3 sidecar per Phase 07.9
|
||||
# W3 / D-03; raw file is ciphertext, so json.load on it would fail).
|
||||
loaded = runtime_graph_cache.try_load(store)
|
||||
assert loaded is not None, "cache must be loadable"
|
||||
_assignment, _rich_club, node_payload, _max_degree = loaded
|
||||
assert node_payload is not None, "cache is missing node_payload blob"
|
||||
assert str(rec.id) in node_payload
|
||||
|
||||
# Rebuild — cache HIT must rehydrate payload without scanning store.all_records.
|
||||
graph2, _a, _rc = retrieve.build_runtime_graph(store)
|
||||
node2 = graph2._nx.nodes[str(rec.id)]
|
||||
assert node2["surface"] == expected_surface
|
||||
assert list(node2["embedding"]) == expected_emb
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- B6: version bump
|
||||
|
||||
|
||||
def test_B6_cache_version_bump_invalidates_old_cache(store):
|
||||
"""B6: CACHE_VERSION is "05-12-v1" — old "05-09-v1" caches invalidate
|
||||
cleanly on try_load.
|
||||
"""
|
||||
# Plant an old-format cache file manually.
|
||||
cache_path = runtime_graph_cache._cache_path(store)
|
||||
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with cache_path.open("w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"cache_version": "05-09-v1", # legacy
|
||||
"key": [0, 0, 4, store.embed_dim, "05-09-v1"],
|
||||
"assignment": {
|
||||
"node_to_community": {},
|
||||
"community_centroids": {},
|
||||
"modularity": 0.0,
|
||||
"backend": "flat",
|
||||
"top_communities": [],
|
||||
"mid_regions": {},
|
||||
},
|
||||
"rich_club": [],
|
||||
"saved_at": "2026-01-01T00:00:00+00:00",
|
||||
},
|
||||
f,
|
||||
)
|
||||
|
||||
# CACHE_VERSION constant is the current one (Phase 07.9 W3 / bump
|
||||
# to "07-09-v3" with AES-256-GCM sidecar). Legacy 05-09 / 05-12 / 05-13
|
||||
# / 06-02 cache files are rejected.
|
||||
assert runtime_graph_cache.CACHE_VERSION == "07-09-v3"
|
||||
|
||||
# try_load on the old cache returns None (mismatch).
|
||||
assert runtime_graph_cache.try_load(store) is None
|
||||
255
tests/test_guard.py
Normal file
255
tests/test_guard.py
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
"""Tests for D-GUARD (BudgetLedger + RateLimitLedger + should_call_llm).
|
||||
|
||||
Covers:
|
||||
- BudgetLedger daily/monthly caps + rollover
|
||||
- RateLimitLedger cooldown window
|
||||
- should_call_llm 7-step ladder ordering per CONTEXT.md D-GUARD
|
||||
- Persistence across store reopen
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ------------------------------------------------------------- BudgetLedger
|
||||
|
||||
|
||||
def test_budget_ledger_daily_cap_enforced(tmp_path):
|
||||
from iai_mcp.guard import BudgetLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store, daily_usd_cap=0.10, monthly_usd_cap=3.00)
|
||||
|
||||
ok, _ = bl.can_spend(0.05)
|
||||
assert ok is True
|
||||
|
||||
bl.record_spend(0.08)
|
||||
ok, _ = bl.can_spend(0.03)
|
||||
# 0.08 + 0.03 = 0.11 > 0.10 -> NOT ok
|
||||
ok2, reason = bl.can_spend(0.03)
|
||||
assert ok2 is False
|
||||
assert "daily" in reason.lower()
|
||||
|
||||
|
||||
def test_budget_ledger_daily_allows_under_cap(tmp_path):
|
||||
from iai_mcp.guard import BudgetLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store, daily_usd_cap=0.10)
|
||||
bl.record_spend(0.05)
|
||||
ok, _ = bl.can_spend(0.04)
|
||||
assert ok is True
|
||||
|
||||
|
||||
def test_budget_ledger_monthly_cap_enforced(tmp_path):
|
||||
"""Daily small spends accumulate to monthly cap."""
|
||||
from iai_mcp.guard import BudgetLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store, daily_usd_cap=10.0, monthly_usd_cap=0.20)
|
||||
bl.record_spend(0.15)
|
||||
ok, reason = bl.can_spend(0.10)
|
||||
# 0.15 + 0.10 = 0.25 > 0.20 -> NOT ok, but reason is monthly (daily cap 10.0 is fine)
|
||||
assert ok is False
|
||||
assert "monthly" in reason.lower()
|
||||
|
||||
|
||||
def test_budget_ledger_daily_used(tmp_path):
|
||||
from iai_mcp.guard import BudgetLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store)
|
||||
assert bl.daily_used() == 0.0
|
||||
bl.record_spend(0.01)
|
||||
bl.record_spend(0.02)
|
||||
assert abs(bl.daily_used() - 0.03) < 1e-5
|
||||
|
||||
|
||||
def test_budget_ledger_monthly_used(tmp_path):
|
||||
from iai_mcp.guard import BudgetLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store)
|
||||
bl.record_spend(0.05)
|
||||
bl.record_spend(0.03)
|
||||
assert abs(bl.monthly_used() - 0.08) < 1e-5
|
||||
|
||||
|
||||
def test_budget_ledger_persists_across_reopen(tmp_path):
|
||||
"""Ledger-backed by LanceDB -> survives store close/reopen (D-GUARD repudiation)."""
|
||||
from iai_mcp.guard import BudgetLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store1 = MemoryStore(path=tmp_path)
|
||||
BudgetLedger(store1).record_spend(0.05)
|
||||
del store1
|
||||
|
||||
store2 = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store2)
|
||||
assert abs(bl.daily_used() - 0.05) < 1e-5
|
||||
|
||||
|
||||
# ----------------------------------------------------------- RateLimitLedger
|
||||
|
||||
|
||||
def test_ratelimit_ledger_no_history_not_in_cooldown(tmp_path):
|
||||
from iai_mcp.guard import RateLimitLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
rl = RateLimitLedger(store)
|
||||
assert rl.in_cooldown() is False
|
||||
|
||||
|
||||
def test_ratelimit_ledger_record_429_enters_cooldown(tmp_path):
|
||||
from iai_mcp.guard import RateLimitLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
rl = RateLimitLedger(store)
|
||||
rl.record_429()
|
||||
assert rl.in_cooldown() is True
|
||||
|
||||
|
||||
def test_ratelimit_ledger_persists_across_reopen(tmp_path):
|
||||
from iai_mcp.guard import RateLimitLedger
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store1 = MemoryStore(path=tmp_path)
|
||||
RateLimitLedger(store1).record_429()
|
||||
del store1
|
||||
|
||||
store2 = MemoryStore(path=tmp_path)
|
||||
assert RateLimitLedger(store2).in_cooldown() is True
|
||||
|
||||
|
||||
# -------------------------------------------------- should_call_llm ladder
|
||||
|
||||
|
||||
def test_should_call_llm_tier_0_fallback_llm_disabled(tmp_path):
|
||||
"""Step 1: llm_enabled=False -> (False, 'sleep.llm_enabled=false')."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store)
|
||||
rl = RateLimitLedger(store)
|
||||
ok, reason = should_call_llm(bl, rl, llm_enabled=False, has_api_key=True)
|
||||
assert ok is False
|
||||
assert "llm_enabled" in reason
|
||||
|
||||
|
||||
def test_should_call_llm_no_api_key(tmp_path):
|
||||
"""Step 2: no api key -> (False, 'no api key')."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store)
|
||||
rl = RateLimitLedger(store)
|
||||
ok, reason = should_call_llm(bl, rl, llm_enabled=True, has_api_key=False)
|
||||
assert ok is False
|
||||
assert "api key" in reason.lower()
|
||||
|
||||
|
||||
def test_should_call_llm_daily_cap_hit(tmp_path):
|
||||
"""Step 3: daily cap exhausted -> (False, ... daily cap ...)."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store, daily_usd_cap=0.01, monthly_usd_cap=3.0)
|
||||
bl.record_spend(0.009)
|
||||
rl = RateLimitLedger(store)
|
||||
ok, reason = should_call_llm(
|
||||
bl, rl, llm_enabled=True, has_api_key=True, estimated_usd=0.005
|
||||
)
|
||||
assert ok is False
|
||||
assert "daily" in reason.lower()
|
||||
|
||||
|
||||
def test_should_call_llm_monthly_cap_hit(tmp_path):
|
||||
"""Step 4: daily ok, monthly cap exhausted."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store, daily_usd_cap=10.0, monthly_usd_cap=0.02)
|
||||
bl.record_spend(0.015)
|
||||
rl = RateLimitLedger(store)
|
||||
ok, reason = should_call_llm(
|
||||
bl, rl, llm_enabled=True, has_api_key=True, estimated_usd=0.01
|
||||
)
|
||||
assert ok is False
|
||||
assert "monthly" in reason.lower()
|
||||
|
||||
|
||||
def test_should_call_llm_in_cooldown(tmp_path):
|
||||
"""Step 5: budget ok, but rate limiter in cooldown."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store)
|
||||
rl = RateLimitLedger(store)
|
||||
rl.record_429()
|
||||
ok, reason = should_call_llm(bl, rl, llm_enabled=True, has_api_key=True)
|
||||
assert ok is False
|
||||
assert "cooldown" in reason.lower()
|
||||
|
||||
|
||||
def test_should_call_llm_all_green(tmp_path):
|
||||
"""All 7 steps pass -> (True, 'ok')."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store)
|
||||
rl = RateLimitLedger(store)
|
||||
ok, reason = should_call_llm(bl, rl, llm_enabled=True, has_api_key=True)
|
||||
assert ok is True
|
||||
assert reason == "ok"
|
||||
|
||||
|
||||
def test_should_call_llm_ordering_llm_enabled_first(tmp_path):
|
||||
"""Ladder ordering: llm_enabled takes precedence over budget+cooldown+apikey."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store, daily_usd_cap=0.01)
|
||||
bl.record_spend(0.02) # over cap
|
||||
rl = RateLimitLedger(store)
|
||||
rl.record_429() # in cooldown
|
||||
|
||||
# llm_enabled=False short-circuits BEFORE cap + cooldown checks
|
||||
ok, reason = should_call_llm(bl, rl, llm_enabled=False, has_api_key=False)
|
||||
assert ok is False
|
||||
assert "llm_enabled" in reason
|
||||
|
||||
|
||||
def test_should_call_llm_ordering_cap_before_cooldown(tmp_path):
|
||||
"""With llm_enabled+api_key, budget cap check precedes cooldown."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger, should_call_llm
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
bl = BudgetLedger(store, daily_usd_cap=0.01)
|
||||
bl.record_spend(0.02) # over cap
|
||||
rl = RateLimitLedger(store)
|
||||
rl.record_429() # also in cooldown
|
||||
|
||||
ok, reason = should_call_llm(
|
||||
bl, rl, llm_enabled=True, has_api_key=True, estimated_usd=0.001
|
||||
)
|
||||
assert ok is False
|
||||
# "daily" message means cap was checked before cooldown
|
||||
assert "daily" in reason.lower()
|
||||
287
tests/test_heartbeat_scanner.py
Normal file
287
tests/test_heartbeat_scanner.py
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
"""Phase 10.4 — comprehensive tests for ``HeartbeatScanner``.
|
||||
|
||||
Covers the 9-test matrix from CONTEXT 10.4:
|
||||
- Empty dir scan returns [].
|
||||
- Single fresh heartbeat is FRESH (PID = current process, just-now refresh).
|
||||
- Stale heartbeat (last_refresh older than M) is STALE even if PID alive.
|
||||
- Orphan heartbeat (PID dead, fresh refresh) is ORPHAN.
|
||||
- Five simultaneous fresh heartbeats: ``fresh_count`` == 5; ``is_active`` True.
|
||||
- ``cleanup_stale_orphans`` deletes 3 of 4, leaves the fresh one.
|
||||
- ``heartbeat_idle_30min`` False when at least one fresh exists.
|
||||
- ``heartbeat_idle_30min`` True when only stale + orphan remain.
|
||||
- Concurrent scan tolerates a writer adding a heartbeat mid-scan.
|
||||
|
||||
Tests use ``os.getpid()`` for live-PID fixtures (deterministic) and a
|
||||
known-dead PID 99999 for orphan fixtures (verified dead at session start
|
||||
by the implementation's ``_is_pid_alive``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.heartbeat_scanner import (
|
||||
DEFAULT_STALE_THRESHOLD_SEC,
|
||||
HeartbeatScanner,
|
||||
HeartbeatStatus,
|
||||
_is_pid_alive,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- fixtures
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wrappers_dir(tmp_path: Path) -> Path:
|
||||
"""Empty wrappers directory under a fresh tmp_path."""
|
||||
wdir = tmp_path / "wrappers"
|
||||
wdir.mkdir()
|
||||
return wdir
|
||||
|
||||
|
||||
def _write_heartbeat(
|
||||
wrappers_dir: Path,
|
||||
pid: int,
|
||||
uuid: str,
|
||||
last_refresh: datetime,
|
||||
) -> Path:
|
||||
"""Write a heartbeat file with the given pid/uuid/last_refresh.
|
||||
|
||||
Returns the file path so tests can assert presence/absence after
|
||||
``cleanup_stale_orphans``.
|
||||
"""
|
||||
path = wrappers_dir / f"heartbeat-{pid}-{uuid}.json"
|
||||
payload = {
|
||||
"pid": pid,
|
||||
"uuid": uuid,
|
||||
"started_at": last_refresh.isoformat().replace("+00:00", "Z"),
|
||||
"last_refresh": last_refresh.isoformat().replace("+00:00", "Z"),
|
||||
"wrapper_version": "1.0.0",
|
||||
"schema_version": 1,
|
||||
}
|
||||
path.write_text(json.dumps(payload))
|
||||
return path
|
||||
|
||||
|
||||
# Known-dead PID — verified by ``_is_pid_alive`` in the test below.
|
||||
# 99999 is above macOS's PID ceiling (typically <99998) so it is a stable
|
||||
# choice for orphan fixtures. The verification test runs first to fail
|
||||
# loudly if this assumption is wrong on a future host.
|
||||
_DEAD_PID = 99999
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- sanity
|
||||
|
||||
|
||||
def test_dead_pid_fixture_is_actually_dead() -> None:
|
||||
"""Sanity: confirm PID 99999 is dead before relying on it in fixtures.
|
||||
|
||||
If a future host happens to allocate PID 99999, the orphan-status
|
||||
fixture would silently degrade into a FRESH classification. This
|
||||
test fails loudly so we notice the collision.
|
||||
"""
|
||||
assert _is_pid_alive(_DEAD_PID) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- scan / classify
|
||||
|
||||
|
||||
def test_scan_empty_dir_returns_empty(wrappers_dir: Path) -> None:
|
||||
"""Empty wrappers dir yields an empty entries list."""
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
entries = scanner.scan()
|
||||
assert entries == []
|
||||
assert scanner.fresh_count() == 0
|
||||
assert scanner.is_active() is False
|
||||
|
||||
|
||||
def test_scan_single_fresh_heartbeat(wrappers_dir: Path) -> None:
|
||||
"""Heartbeat with current PID + just-now refresh classifies FRESH."""
|
||||
own_pid = os.getpid()
|
||||
now = datetime.now(timezone.utc)
|
||||
_write_heartbeat(wrappers_dir, own_pid, "uuid-aaa", now)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
entries = scanner.scan()
|
||||
assert len(entries) == 1
|
||||
entry = entries[0]
|
||||
assert entry.pid == own_pid
|
||||
assert entry.uuid == "uuid-aaa"
|
||||
assert entry.status is HeartbeatStatus.FRESH
|
||||
assert scanner.is_active() is True
|
||||
|
||||
|
||||
def test_scan_stale_heartbeat(wrappers_dir: Path) -> None:
|
||||
"""last_refresh older than threshold is STALE even if PID alive."""
|
||||
own_pid = os.getpid()
|
||||
stale_ts = datetime.now(timezone.utc) - timedelta(
|
||||
seconds=DEFAULT_STALE_THRESHOLD_SEC + 10
|
||||
)
|
||||
_write_heartbeat(wrappers_dir, own_pid, "uuid-bbb", stale_ts)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
entries = scanner.scan()
|
||||
assert len(entries) == 1
|
||||
assert entries[0].status is HeartbeatStatus.STALE
|
||||
assert scanner.fresh_count() == 0
|
||||
assert scanner.is_active() is False
|
||||
|
||||
|
||||
def test_scan_orphan_heartbeat(wrappers_dir: Path) -> None:
|
||||
"""Fresh refresh + dead PID classifies ORPHAN."""
|
||||
now = datetime.now(timezone.utc)
|
||||
_write_heartbeat(wrappers_dir, _DEAD_PID, "uuid-ccc", now)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
entries = scanner.scan()
|
||||
assert len(entries) == 1
|
||||
assert entries[0].status is HeartbeatStatus.ORPHAN
|
||||
assert scanner.fresh_count() == 0
|
||||
|
||||
|
||||
def test_scan_5_simultaneous_wrappers(wrappers_dir: Path) -> None:
|
||||
"""Five fresh heartbeats: fresh_count == 5; is_active True."""
|
||||
own_pid = os.getpid()
|
||||
now = datetime.now(timezone.utc)
|
||||
for i in range(5):
|
||||
_write_heartbeat(wrappers_dir, own_pid, f"uuid-{i}", now)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
assert scanner.fresh_count() == 5
|
||||
assert scanner.is_active() is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- cleanup
|
||||
|
||||
|
||||
def test_cleanup_stale_orphans_deletes_files(wrappers_dir: Path) -> None:
|
||||
"""2 stale + 1 orphan + 1 fresh; cleanup returns 3; fresh remains."""
|
||||
own_pid = os.getpid()
|
||||
now = datetime.now(timezone.utc)
|
||||
stale_ts = now - timedelta(seconds=DEFAULT_STALE_THRESHOLD_SEC + 10)
|
||||
|
||||
fresh_path = _write_heartbeat(wrappers_dir, own_pid, "uuid-fresh", now)
|
||||
stale_path1 = _write_heartbeat(wrappers_dir, own_pid, "uuid-s1", stale_ts)
|
||||
stale_path2 = _write_heartbeat(wrappers_dir, own_pid, "uuid-s2", stale_ts)
|
||||
orphan_path = _write_heartbeat(wrappers_dir, _DEAD_PID, "uuid-orphan", now)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
deleted = scanner.cleanup_stale_orphans()
|
||||
assert deleted == 3
|
||||
|
||||
# Only the fresh file should still be on disk.
|
||||
assert fresh_path.exists()
|
||||
assert not stale_path1.exists()
|
||||
assert not stale_path2.exists()
|
||||
assert not orphan_path.exists()
|
||||
|
||||
# Subsequent scan reflects the cleanup.
|
||||
remaining = scanner.scan()
|
||||
assert len(remaining) == 1
|
||||
assert remaining[0].uuid == "uuid-fresh"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- heartbeat_idle_30min
|
||||
|
||||
|
||||
def test_heartbeat_idle_30min_with_recent_fresh_returns_false(
|
||||
wrappers_dir: Path,
|
||||
) -> None:
|
||||
"""A single fresh heartbeat suppresses the idle predicate."""
|
||||
own_pid = os.getpid()
|
||||
now = datetime.now(timezone.utc)
|
||||
_write_heartbeat(wrappers_dir, own_pid, "uuid-fresh", now)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
assert scanner.heartbeat_idle_30min() is False
|
||||
|
||||
|
||||
def test_heartbeat_idle_30min_no_fresh_returns_true(wrappers_dir: Path) -> None:
|
||||
"""Only stale + orphan entries: predicate returns True (no live wrapper)."""
|
||||
own_pid = os.getpid()
|
||||
now = datetime.now(timezone.utc)
|
||||
stale_ts = now - timedelta(seconds=DEFAULT_STALE_THRESHOLD_SEC + 10)
|
||||
_write_heartbeat(wrappers_dir, own_pid, "uuid-s", stale_ts)
|
||||
_write_heartbeat(wrappers_dir, _DEAD_PID, "uuid-o", now)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
assert scanner.heartbeat_idle_30min() is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- concurrency
|
||||
|
||||
|
||||
def test_concurrent_scan_safe(wrappers_dir: Path) -> None:
|
||||
"""A scan running concurrently with a writer must not raise.
|
||||
|
||||
Spawns a background writer that drops new heartbeat files in tight
|
||||
succession while the main thread runs ``scan()`` repeatedly. The
|
||||
contract is "no exception" — final fresh count after the writer
|
||||
finishes equals the number of files actually written.
|
||||
"""
|
||||
own_pid = os.getpid()
|
||||
now = datetime.now(timezone.utc)
|
||||
write_count = 50
|
||||
written: list[Path] = []
|
||||
errors: list[BaseException] = []
|
||||
stop = threading.Event()
|
||||
|
||||
def writer() -> None:
|
||||
try:
|
||||
for i in range(write_count):
|
||||
if stop.is_set():
|
||||
return
|
||||
p = _write_heartbeat(
|
||||
wrappers_dir, own_pid, f"uuid-cc-{i}", now
|
||||
)
|
||||
written.append(p)
|
||||
except BaseException as exc: # noqa: BLE001 — surface in test
|
||||
errors.append(exc)
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
t = threading.Thread(target=writer)
|
||||
t.start()
|
||||
try:
|
||||
# Spin scans while the writer adds files. The race we are testing
|
||||
# is "scanner glob includes a file that vanishes" or "writer
|
||||
# half-writes JSON" — both must be tolerated silently.
|
||||
for _ in range(20):
|
||||
scanner.scan() # must not raise
|
||||
time.sleep(0.001)
|
||||
finally:
|
||||
stop.set()
|
||||
t.join(timeout=5)
|
||||
|
||||
assert errors == [], f"writer raised: {errors!r}"
|
||||
final = scanner.scan()
|
||||
assert len(final) == len(written), (
|
||||
f"final scan count {len(final)} != written count {len(written)}"
|
||||
)
|
||||
assert all(e.status is HeartbeatStatus.FRESH for e in final)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- corruption tolerance
|
||||
|
||||
|
||||
def test_torn_write_falls_back_to_mtime(wrappers_dir: Path) -> None:
|
||||
"""Half-written JSON falls back to filename + mtime parse.
|
||||
|
||||
Drops a file containing only the opening brace ``{`` (simulating a
|
||||
crash mid-write). The scanner must still classify the file by its
|
||||
filesystem mtime + filename PID rather than dropping the entry.
|
||||
"""
|
||||
path = wrappers_dir / f"heartbeat-{os.getpid()}-uuid-torn.json"
|
||||
path.write_text("{") # invalid JSON
|
||||
|
||||
scanner = HeartbeatScanner(wrappers_dir)
|
||||
entries = scanner.scan()
|
||||
assert len(entries) == 1
|
||||
# Mtime is "now" by default so this should be FRESH (alive PID).
|
||||
assert entries[0].status is HeartbeatStatus.FRESH
|
||||
assert entries[0].pid == os.getpid()
|
||||
131
tests/test_hebbian.py
Normal file
131
tests/test_hebbian.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
"""Tests for Hebbian reinforcement, L0 seed, profile knobs, consolidate stub."""
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import UUID
|
||||
|
||||
from iai_mcp.core import DEFERRED_KNOBS, L0_ID, LIVE_KNOBS, _seed_l0_identity, dispatch
|
||||
from iai_mcp.store import MemoryStore
|
||||
from tests.test_store import _make
|
||||
|
||||
|
||||
def test_reinforce_creates_pairwise_edges(tmp_path):
|
||||
"""C(3,2) = 3 pairwise edges on three-way co-retrieval."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
recs = [_make() for _ in range(3)]
|
||||
for r in recs:
|
||||
store.insert(r)
|
||||
ids = [str(r.id) for r in recs]
|
||||
result = dispatch(store, "memory_reinforce", {"ids": ids})
|
||||
assert result["edges_boosted"] == 3
|
||||
|
||||
|
||||
def test_reinforce_twice_doubles_weight(tmp_path):
|
||||
"""calling reinforce twice on same ids stacks the delta (0.1 + 0.1 = 0.2)."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
recs = [_make() for _ in range(2)]
|
||||
for r in recs:
|
||||
store.insert(r)
|
||||
ids = [str(r.id) for r in recs]
|
||||
dispatch(store, "memory_reinforce", {"ids": ids})
|
||||
r2 = dispatch(store, "memory_reinforce", {"ids": ids})
|
||||
assert len(r2["new_weights"]) == 1
|
||||
key = next(iter(r2["new_weights"]))
|
||||
assert abs(r2["new_weights"][key] - 0.2) < 1e-5
|
||||
|
||||
|
||||
def test_l0_identity_seeded(tmp_path):
|
||||
"""D-14 + pinned L0 record exists with immutability flags."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_l0_identity(store)
|
||||
l0 = store.get(L0_ID)
|
||||
assert l0 is not None
|
||||
assert l0.pinned is True
|
||||
assert l0.never_decay is True
|
||||
assert l0.never_merge is True
|
||||
assert l0.detail_level == 5
|
||||
assert l0.tier == "semantic"
|
||||
assert "IAI-MCP" in l0.literal_surface
|
||||
|
||||
|
||||
def test_l0_seed_is_idempotent(tmp_path):
|
||||
"""Multiple boots of the core must not duplicate the L0 record."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
_seed_l0_identity(store)
|
||||
_seed_l0_identity(store)
|
||||
_seed_l0_identity(store)
|
||||
all_records = store.all_records()
|
||||
l0_count = sum(1 for r in all_records if r.id == L0_ID)
|
||||
assert l0_count == 1
|
||||
|
||||
|
||||
def test_profile_get_returns_live_knobs(tmp_path):
|
||||
"""15 live (14 autistic-kernel + wake_depth MCP-12) + 0 deferred."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
result = dispatch(store, "profile_get", {})
|
||||
assert result["live"]["literal_preservation"] == "strong" # AUTIST-04
|
||||
assert result["live"]["masking_off"] is True # AUTIST-06
|
||||
assert result["live"]["task_support"] == "cued_recognition" # AUTIST-07
|
||||
assert result["live"]["scene_construction_scaffold"] is True # AUTIST-14
|
||||
assert result["live"]["wake_depth"] == "minimal" # MCP-12
|
||||
# Plan 07.12-02: 10 autistic-kernel + wake_depth = 11 live (AUTIST-02/08/11/12 removed).
|
||||
assert len(result["live"]) == 11
|
||||
assert len(result["deferred"]) == 0
|
||||
|
||||
|
||||
def test_profile_get_specific_live_knob(tmp_path):
|
||||
store = MemoryStore(path=tmp_path)
|
||||
result = dispatch(store, "profile_get", {"knob": "literal_preservation"})
|
||||
assert result["knob"] == "literal_preservation"
|
||||
assert result["value"] == "strong"
|
||||
|
||||
|
||||
def test_profile_get_camouflaging_now_live_after_autist13_flip(tmp_path):
|
||||
"""AUTIST-13 camouflaging_relaxation is live; profile_get returns value."""
|
||||
# Reset per-process state in case earlier tests (e.g. relax_register) moved the knob.
|
||||
import iai_mcp.core as core
|
||||
core._profile_state["camouflaging_relaxation"] = 0.0
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
result = dispatch(store, "profile_get", {"knob": "camouflaging_relaxation"})
|
||||
assert result["knob"] == "camouflaging_relaxation"
|
||||
assert result["value"] == 0.0 # D-AUTIST13 default
|
||||
|
||||
|
||||
def test_profile_set_camouflaging_relaxation_now_succeeds(tmp_path):
|
||||
"""camouflaging_relaxation is live; profile_set accepts in-range float."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
result = dispatch(store, "profile_set", {"knob": "camouflaging_relaxation", "value": 0.3})
|
||||
assert result["status"] == "ok"
|
||||
# Reset for other tests
|
||||
dispatch(store, "profile_set", {"knob": "camouflaging_relaxation", "value": 0.0})
|
||||
|
||||
|
||||
def test_profile_set_live_knob_succeeds(tmp_path):
|
||||
"""live knob accepts valid enum values ("loose" is in the schema)."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
# Reset default before test to avoid test ordering issues
|
||||
LIVE_KNOBS["literal_preservation"] = "strong"
|
||||
# Plan 03 introduced schema validation (enum:strong|medium|loose).
|
||||
# Plan 01 accepted any value; now we use a valid enum entry.
|
||||
result = dispatch(store, "profile_set", {"knob": "literal_preservation", "value": "loose"})
|
||||
assert result["status"] == "ok"
|
||||
assert LIVE_KNOBS["literal_preservation"] == "loose"
|
||||
# Restore so other tests aren't affected
|
||||
LIVE_KNOBS["literal_preservation"] = "strong"
|
||||
|
||||
|
||||
def test_memory_consolidate_real(tmp_path):
|
||||
"""Plan 02-02 memory_consolidate now runs real heavy consolidation.
|
||||
|
||||
The stub returned {"status": "queued", "phase": "placeholder"};
|
||||
replaces that with actual sleep-cycle output:
|
||||
{"mode": "heavy", "tier": "tier0"|"tier1", "summaries_created": int,
|
||||
"decay_result": {...}, "schema_candidates": [...]}.
|
||||
"""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
result = dispatch(store, "memory_consolidate", {})
|
||||
assert result["mode"] == "heavy"
|
||||
assert result["tier"] in ("tier0", "tier1")
|
||||
assert "summaries_created" in result
|
||||
assert "decay_result" in result
|
||||
assert "schema_candidates" in result
|
||||
391
tests/test_hebbian_batching.py
Normal file
391
tests/test_hebbian_batching.py
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
"""Phase 7.4 — Hebbian write-batching coverage.
|
||||
|
||||
Eight sync tests (project does NOT use pytest-asyncio):
|
||||
|
||||
R1 / A2 — `test_boost_edges_emits_at_most_two_versions`
|
||||
R2 — `test_boost_edges_scalar_delta_unchanged`
|
||||
R2 — `test_boost_edges_sequence_delta_per_pair`
|
||||
R2 — `test_boost_edges_sequence_delta_length_mismatch_raises`
|
||||
A7 — `test_boost_edges_coalesces_duplicate_pairs`
|
||||
R3 site — `test_sleep_consolidated_from_batches_into_two_versions`
|
||||
R3 site — `test_curiosity_bridge_batches_into_two_versions`
|
||||
R3 site — `test_schema_bind_batches_into_two_versions`
|
||||
R3 site — `test_pipeline_profile_modulates_batches_with_sequence_delta`
|
||||
|
||||
Eight tests minimum — SPEC R4 asks for >= 5; this ships the full target from
|
||||
CONTEXT D7.4-08.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.store import EDGES_TABLE, MemoryStore
|
||||
|
||||
|
||||
# ----------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def _versions(store: MemoryStore) -> int:
|
||||
"""Return the current LanceDB version count for the edges table."""
|
||||
tbl = store.db.open_table(EDGES_TABLE)
|
||||
return len(tbl.list_versions())
|
||||
|
||||
|
||||
# ----------------------------------------------------------- R1 / A2 — versions
|
||||
|
||||
|
||||
def test_boost_edges_emits_at_most_two_versions(tmp_path):
|
||||
"""R1 + A2 acceptance: ONE call with 5 pairs (3 hits + 2 new) -> <= 2 new versions.
|
||||
|
||||
Today's pre-refactor body would emit 5 versions (1 per tbl.update / tbl.add).
|
||||
The refactor consolidates to <= 2 (one merge_insert for the 3
|
||||
updates, one tbl.add for the 2 new rows).
|
||||
"""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
a, b, c, d, e, f, g = (uuid4() for _ in range(7))
|
||||
|
||||
# Seed 3 edges via a single call (the seed itself produces ~1 version).
|
||||
store.boost_edges([(a, b), (c, d), (e, f)], delta=0.1, edge_type="hebbian")
|
||||
|
||||
versions_before = _versions(store)
|
||||
|
||||
# 5-pair call: 3 hits (a,b), (c,d), (e,f) + 2 new (a,c), (f,g).
|
||||
new = store.boost_edges(
|
||||
[(a, b), (c, d), (e, f), (a, c), (f, g)],
|
||||
delta=0.2,
|
||||
edge_type="hebbian",
|
||||
)
|
||||
|
||||
versions_after = _versions(store)
|
||||
delta_versions = versions_after - versions_before
|
||||
|
||||
# Hard cap: <= 2 (one merge_insert for updates + one tbl.add for inserts).
|
||||
assert delta_versions <= 2, (
|
||||
f"boost_edges emitted {delta_versions} versions "
|
||||
f"(expected <= 2 after batching)"
|
||||
)
|
||||
|
||||
# Returned weights must be: 0.3 for the 3 pre-existing pairs (0.1 + 0.2)
|
||||
# and 0.2 for the 2 new pairs (0 + 0.2). Keys are canonical-sorted.
|
||||
assert len(new) == 5
|
||||
for key, weight in new.items():
|
||||
if {key[0], key[1]} in ({str(a), str(b)}, {str(c), str(d)}, {str(e), str(f)}):
|
||||
assert abs(weight - 0.3) < 1e-5, f"{key} expected 0.3, got {weight}"
|
||||
else:
|
||||
assert abs(weight - 0.2) < 1e-5, f"{key} expected 0.2, got {weight}"
|
||||
|
||||
|
||||
# ----------------------------------------------------------- R2 — scalar delta
|
||||
|
||||
|
||||
def test_boost_edges_scalar_delta_unchanged(tmp_path):
|
||||
"""R2 backwards-compat: scalar `delta=0.3` applies uniformly to all pairs."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
a, b, c, d = (uuid4() for _ in range(4))
|
||||
|
||||
new = store.boost_edges([(a, b), (c, d)], delta=0.3, edge_type="hebbian")
|
||||
|
||||
assert len(new) == 2
|
||||
for weight in new.values():
|
||||
assert abs(weight - 0.3) < 1e-5
|
||||
|
||||
|
||||
# ----------------------------------------------------------- R2 — sequence delta
|
||||
|
||||
|
||||
def test_boost_edges_sequence_delta_per_pair(tmp_path):
|
||||
"""R2: `delta=[0.5, 0.7]` applies per-pair (in pair-list order)."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
a, b, c, d = (uuid4() for _ in range(4))
|
||||
|
||||
new = store.boost_edges(
|
||||
[(a, b), (c, d)],
|
||||
delta=[0.5, 0.7],
|
||||
edge_type="hebbian",
|
||||
)
|
||||
|
||||
assert len(new) == 2
|
||||
# Map back from canonical-sorted key to original pair to assert per-pair delta.
|
||||
key_ab = tuple(sorted([str(a), str(b)]))
|
||||
key_cd = tuple(sorted([str(c), str(d)]))
|
||||
assert abs(new[key_ab] - 0.5) < 1e-5
|
||||
assert abs(new[key_cd] - 0.7) < 1e-5
|
||||
|
||||
|
||||
def test_boost_edges_sequence_delta_length_mismatch_raises(tmp_path):
|
||||
"""R2: Sequence-delta with len(deltas) != len(pairs) -> ValueError."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
a, b, c, d = (uuid4() for _ in range(4))
|
||||
|
||||
with pytest.raises(ValueError, match="deltas length"):
|
||||
store.boost_edges(
|
||||
[(a, b), (c, d)],
|
||||
delta=[0.5, 0.7, 0.9], # 3 deltas for 2 pairs
|
||||
edge_type="hebbian",
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------- A7 — coalesce
|
||||
|
||||
|
||||
def test_boost_edges_coalesces_duplicate_pairs(tmp_path):
|
||||
"""A7: `[(a,b), (a,b)]` with delta=0.1 produces `cur + 0.2`, NOT `cur + 0.1`.
|
||||
|
||||
The legacy implementation refreshed `existing = tbl.to_pandas()` after every
|
||||
pair so duplicate canonical (src,dst) keys saw each other's delta. The
|
||||
refactor preserves this semantic via in-memory coalescing BEFORE the write.
|
||||
"""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
a, b = uuid4(), uuid4()
|
||||
|
||||
# First seed one edge so `cur` is non-zero.
|
||||
store.boost_edges([(a, b)], delta=0.1, edge_type="hebbian")
|
||||
|
||||
# Second call: SAME pair listed twice. Expect 0.1 (existing) + 0.2 (sum) = 0.3.
|
||||
new = store.boost_edges([(a, b), (a, b)], delta=0.1, edge_type="hebbian")
|
||||
|
||||
assert len(new) == 1, "duplicate pair should collapse to ONE canonical key"
|
||||
canonical = tuple(sorted([str(a), str(b)]))
|
||||
assert abs(new[canonical] - 0.3) < 1e-5, (
|
||||
f"coalesced delta should be cur + 2*delta = 0.3, got {new[canonical]}"
|
||||
)
|
||||
|
||||
|
||||
def test_boost_edges_coalesces_duplicate_pairs_first_call(tmp_path):
|
||||
"""A7 strengthen: even on a FRESH edge, `[(a,b), (a,b)]` with delta=0.1
|
||||
should produce 0.2 (NOT 0.1) — coalescing happens before write."""
|
||||
store = MemoryStore(path=tmp_path)
|
||||
a, b = uuid4(), uuid4()
|
||||
|
||||
new = store.boost_edges([(a, b), (a, b)], delta=0.1, edge_type="hebbian")
|
||||
canonical = tuple(sorted([str(a), str(b)]))
|
||||
assert abs(new[canonical] - 0.2) < 1e-5
|
||||
|
||||
|
||||
# ----------------------------------------------------------- R3 — site-level
|
||||
|
||||
|
||||
def test_sleep_consolidated_from_batches_into_two_versions(tmp_path):
|
||||
"""R3 site-level: sleep._create_semantic_summary's per-source loop now
|
||||
issues ONE boost_edges call (consolidated_from edges).
|
||||
|
||||
Asserts the summary's outgoing consolidated_from edges all exist with the
|
||||
expected weight, AND the create-summary call did not balloon the edges.lance
|
||||
version count by N (one per source) — only by <= 2 (one tbl.add for the new
|
||||
rows; merge_insert path empty since these are fresh edges).
|
||||
"""
|
||||
from iai_mcp.sleep import _create_semantic_summary
|
||||
from tests.test_store import _make
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
|
||||
# Seed 5 source records into a "cluster".
|
||||
cluster = [_make(text=f"source memory {i}") for i in range(5)]
|
||||
for r in cluster:
|
||||
store.insert(r)
|
||||
|
||||
versions_before = _versions(store)
|
||||
summary_id = _create_semantic_summary(
|
||||
store,
|
||||
cluster,
|
||||
summary_text="cls summary of 5 source memories",
|
||||
language="en",
|
||||
)
|
||||
versions_after = _versions(store)
|
||||
|
||||
delta_versions = versions_after - versions_before
|
||||
# <= 2 covers the 1 add for new edges (5 fresh consolidated_from rows) PLUS
|
||||
# any incidental merge_insert version when the merge_insert path is empty.
|
||||
assert delta_versions <= 2, (
|
||||
f"sleep.cls boost emitted {delta_versions} versions for 5 sources "
|
||||
f"(expected <= 2 after Phase 7.4)"
|
||||
)
|
||||
|
||||
tbl = store.db.open_table(EDGES_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
summary_str = str(summary_id)
|
||||
consolidated = df[
|
||||
(df["src"].isin([summary_str, *[str(r.id) for r in cluster]]))
|
||||
& (df["dst"].isin([summary_str, *[str(r.id) for r in cluster]]))
|
||||
& (df["edge_type"] == "consolidated_from")
|
||||
]
|
||||
assert len(consolidated) == 5, (
|
||||
f"expected 5 consolidated_from edges, got {len(consolidated)}"
|
||||
)
|
||||
# Every weight should equal delta=1.0 (the legacy per-iter scalar).
|
||||
for w in consolidated["weight"]:
|
||||
assert abs(float(w) - 1.0) < 1e-5
|
||||
|
||||
|
||||
def test_curiosity_bridge_batches_into_two_versions(tmp_path):
|
||||
"""R3 site-level: curiosity.fire's per-trigger loop now issues ONE
|
||||
boost_edges call (curiosity_bridge edges)."""
|
||||
from iai_mcp.curiosity import fire_curiosity
|
||||
from tests.test_store import _make
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
|
||||
# Seed 5 records that will become triggers (entropy must be high enough to
|
||||
# surface a question — we drive it via direct call below).
|
||||
triggers = [_make(text=f"ambiguous memory {i}") for i in range(5)]
|
||||
for r in triggers:
|
||||
store.insert(r)
|
||||
|
||||
# Build a fake hits structure compatible with fire_curiosity.
|
||||
class _Hit:
|
||||
def __init__(self, record_id):
|
||||
self.record_id = record_id
|
||||
self.score = 0.4
|
||||
|
||||
hits = [_Hit(r.id) for r in triggers]
|
||||
|
||||
versions_before = _versions(store)
|
||||
# entropy=1.5 (above ENTROPY_HIGH default) -> tier="question" path,
|
||||
# 5 trigger_ids, ONE batched boost_edges call after the refactor.
|
||||
q = fire_curiosity(
|
||||
store,
|
||||
hits=hits,
|
||||
cue="what was that thing",
|
||||
entropy=1.5,
|
||||
session_id="sess-curiosity",
|
||||
turn=10,
|
||||
)
|
||||
versions_after = _versions(store)
|
||||
|
||||
assert q is not None, "high-entropy curiosity call should fire"
|
||||
|
||||
delta_versions = versions_after - versions_before
|
||||
assert delta_versions <= 2, (
|
||||
f"curiosity boost emitted {delta_versions} versions for 5 triggers "
|
||||
f"(expected <= 2 after Phase 7.4)"
|
||||
)
|
||||
|
||||
tbl = store.db.open_table(EDGES_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
bridge = df[df["edge_type"] == "curiosity_bridge"]
|
||||
assert len(bridge) == 5, (
|
||||
f"expected 5 curiosity_bridge edges, got {len(bridge)}"
|
||||
)
|
||||
|
||||
|
||||
def test_schema_bind_batches_into_two_versions(tmp_path):
|
||||
"""R3 site-level: schema.bind's per-evidence loop now issues ONE
|
||||
boost_edges call (schema_instance_of edges)."""
|
||||
from iai_mcp.schema import SchemaCandidate, persist_schema
|
||||
from tests.test_store import _make
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
|
||||
# Seed 5 evidence records.
|
||||
evidence = [_make(text=f"evidence {i}") for i in range(5)]
|
||||
for r in evidence:
|
||||
store.insert(r)
|
||||
|
||||
# Pattern is unique to this test so the dedup branch in persist_schema
|
||||
# does NOT short-circuit (we want the new-schema insert path that contains
|
||||
# the line-374 for-loop -> batched call).
|
||||
candidate = SchemaCandidate(
|
||||
pattern="phase74_test_pattern_unique",
|
||||
confidence=0.7,
|
||||
evidence_ids=[r.id for r in evidence],
|
||||
evidence_count=5,
|
||||
status="auto",
|
||||
)
|
||||
|
||||
versions_before = _versions(store)
|
||||
schema_id = persist_schema(store, candidate)
|
||||
versions_after = _versions(store)
|
||||
|
||||
assert schema_id is not None
|
||||
|
||||
delta_versions = versions_after - versions_before
|
||||
# `induce` emits both schema_instance_of edges (this plan's batched call)
|
||||
# AND the schema record's own row insert (records.lance, not edges.lance —
|
||||
# so it doesn't hit our edges-version count). <= 2 covers the merge_insert
|
||||
# + tbl.add for 5 fresh schema_instance_of edges.
|
||||
assert delta_versions <= 2, (
|
||||
f"schema.bind boost emitted {delta_versions} versions for 5 evidence "
|
||||
f"(expected <= 2 after Phase 7.4)"
|
||||
)
|
||||
|
||||
tbl = store.db.open_table(EDGES_TABLE)
|
||||
df = tbl.to_pandas()
|
||||
instance_edges = df[df["edge_type"] == "schema_instance_of"]
|
||||
assert len(instance_edges) == 5, (
|
||||
f"expected 5 schema_instance_of edges, got {len(instance_edges)}"
|
||||
)
|
||||
|
||||
|
||||
def test_pipeline_profile_modulates_batches_with_sequence_delta(tmp_path):
|
||||
"""R3 site-level: pipeline.recall_hook's per-hit profile_modulates loop
|
||||
now issues ONE boost_edges call with `delta=deltas` Sequence (per-hit
|
||||
varying gain).
|
||||
|
||||
This directly exercises the loop body that was changed in pipeline.py:924.
|
||||
We unit-test the gather-then-batch pattern by simulating the hits + gains
|
||||
structure and asserting:
|
||||
1. ONE boost_edges call produces edges for all hits with non-empty gains.
|
||||
2. Hits with empty gains are skipped (preserves the existing fallback).
|
||||
3. Hits with total_gain<=0 fall back to delta=1.0 (preserves fallback).
|
||||
4. <= 2 versions per call regardless of hit count.
|
||||
"""
|
||||
from iai_mcp.pipeline import PROFILE_SENTINEL_UUID
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
|
||||
# 5 record ids; we treat them as h.record_id values.
|
||||
record_ids = [uuid4() for _ in range(5)]
|
||||
# Per-hit gains: gain values mirror what profile_modulation_gain dict gives.
|
||||
gains_per_hit = [
|
||||
{"profile_match_strong": 0.4, "language_match": 0.1}, # total = 0.5
|
||||
{}, # skipped (empty)
|
||||
{"profile_match_weak": 0.2}, # total = 0.2
|
||||
{"profile_match_neg": -0.5, "language_match": 0.1}, # total = -0.4 -> 1.0
|
||||
{"profile_match_strong": 0.7}, # total = 0.7
|
||||
]
|
||||
|
||||
# Replicate the gather-then-batch pattern from pipeline.py:924 in a
|
||||
# contained form so the test is independent of the full recall plumbing.
|
||||
pairs: list[tuple] = []
|
||||
deltas: list[float] = []
|
||||
for rid, gains in zip(record_ids, gains_per_hit):
|
||||
if not gains:
|
||||
continue
|
||||
total_gain = float(sum(gains.values()))
|
||||
if total_gain <= 0:
|
||||
total_gain = 1.0
|
||||
pairs.append((rid, PROFILE_SENTINEL_UUID))
|
||||
deltas.append(total_gain)
|
||||
|
||||
assert len(pairs) == 4, "4 hits should produce edges (1 skipped for empty gains)"
|
||||
assert len(deltas) == 4
|
||||
|
||||
versions_before = _versions(store)
|
||||
new = store.boost_edges(
|
||||
pairs,
|
||||
delta=deltas,
|
||||
edge_type="profile_modulates",
|
||||
)
|
||||
versions_after = _versions(store)
|
||||
|
||||
delta_versions = versions_after - versions_before
|
||||
assert delta_versions <= 2, (
|
||||
f"profile_modulates boost emitted {delta_versions} versions "
|
||||
f"(expected <= 2 after Phase 7.4)"
|
||||
)
|
||||
|
||||
# 4 edges created, each with the per-hit delta.
|
||||
assert len(new) == 4
|
||||
expected_per_pair = {
|
||||
tuple(sorted([str(record_ids[0]), str(PROFILE_SENTINEL_UUID)])): 0.5,
|
||||
tuple(sorted([str(record_ids[2]), str(PROFILE_SENTINEL_UUID)])): 0.2,
|
||||
tuple(sorted([str(record_ids[3]), str(PROFILE_SENTINEL_UUID)])): 1.0,
|
||||
tuple(sorted([str(record_ids[4]), str(PROFILE_SENTINEL_UUID)])): 0.7,
|
||||
}
|
||||
for key, exp in expected_per_pair.items():
|
||||
assert key in new, f"missing edge for {key}"
|
||||
assert abs(new[key] - exp) < 1e-5, (
|
||||
f"{key} expected {exp}, got {new[key]}"
|
||||
)
|
||||
230
tests/test_hebbian_ltp.py
Normal file
230
tests/test_hebbian_ltp.py
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
"""Tests for 02-REVIEW.md H-03 (CLS heavy cycle missing Hebbian LTP).
|
||||
|
||||
Bug: run_heavy_consolidation creates `consolidated_from` edges for cluster
|
||||
members (LTD-side write) but does NOT strengthen existing hebbian edges
|
||||
between co-retrieved cluster members (LTP). The spec requires both
|
||||
sides -- frequently-traversed edges strengthen; old rarely-traversed fade.
|
||||
Pre-fix, the only LTP source was store.boost_edges inside pipeline_recall,
|
||||
which fires on explicit user retrieval, never during offline consolidation.
|
||||
|
||||
Fix:
|
||||
- Add module constant HEAVY_LTP_DELTA = 0.05 in sleep.py.
|
||||
- In run_heavy_consolidation, after _create_semantic_summary runs for a
|
||||
cluster, call store.boost_edges(combinations(cluster_ids, 2),
|
||||
edge_type="hebbian", delta=HEAVY_LTP_DELTA) so existing hebbian edges
|
||||
between co-cluster members are potentiated.
|
||||
- Non-cluster edges remain untouched.
|
||||
|
||||
Constitutional contract (MEM-07 biological fidelity + symmetry):
|
||||
Hebbian LTP/LTD symmetry is the core Hebbian-learning invariant. Without
|
||||
LTP during consolidation the graph drifts monotonically weaker. Matches
|
||||
Woz 2022 SRS reinforcement on co-retrieval.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID, uuid4
|
||||
|
||||
import pytest
|
||||
|
||||
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
||||
|
||||
|
||||
# ---------------------------------------------------------------- helpers
|
||||
|
||||
|
||||
def _record(
|
||||
*,
|
||||
text: str = "n",
|
||||
language: str = "en",
|
||||
) -> MemoryRecord:
|
||||
now = datetime.now(timezone.utc)
|
||||
return MemoryRecord(
|
||||
id=uuid4(),
|
||||
tier="episodic",
|
||||
literal_surface=text,
|
||||
aaak_index="",
|
||||
embedding=[1.0] + [0.0] * (EMBED_DIM - 1),
|
||||
community_id=None,
|
||||
centrality=0.0,
|
||||
detail_level=2,
|
||||
pinned=False,
|
||||
stability=0.5,
|
||||
difficulty=0.3,
|
||||
last_reviewed=None,
|
||||
never_decay=False,
|
||||
never_merge=False,
|
||||
provenance=[],
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
tags=[],
|
||||
language=language,
|
||||
)
|
||||
|
||||
|
||||
def _hebbian_weight(store, a: UUID, b: UUID) -> float | None:
|
||||
"""Look up the current hebbian edge weight for (a, b), canonicalised."""
|
||||
from iai_mcp.store import EDGES_TABLE
|
||||
|
||||
key = sorted([str(a), str(b)])
|
||||
df = store.db.open_table(EDGES_TABLE).to_pandas()
|
||||
if df.empty:
|
||||
return None
|
||||
mask = (
|
||||
(df["src"] == key[0])
|
||||
& (df["dst"] == key[1])
|
||||
& (df["edge_type"] == "hebbian")
|
||||
)
|
||||
if not mask.any():
|
||||
return None
|
||||
return float(df.loc[mask, "weight"].iloc[0])
|
||||
|
||||
|
||||
# ==================================================== H-03: named constant
|
||||
|
||||
|
||||
def test_heavy_ltp_delta_is_named_constant():
|
||||
"""The LTP increment must be a module-scope constant (HEAVY_LTP_DELTA=0.05)
|
||||
so maintainers can tune it without hunting for magic numbers, matching the
|
||||
DECAY_BASE / DECAY_EPSILON pattern already used for the LTD side."""
|
||||
from iai_mcp import sleep as sleep_mod
|
||||
|
||||
assert hasattr(sleep_mod, "HEAVY_LTP_DELTA"), (
|
||||
"sleep.py must define HEAVY_LTP_DELTA at module scope"
|
||||
)
|
||||
assert sleep_mod.HEAVY_LTP_DELTA == pytest.approx(0.05, abs=1e-6), (
|
||||
f"HEAVY_LTP_DELTA must equal 0.05, got {sleep_mod.HEAVY_LTP_DELTA}"
|
||||
)
|
||||
|
||||
|
||||
# ==================================================== H-03: LTP on cluster members
|
||||
|
||||
|
||||
def test_heavy_cycle_strengthens_existing_hebbian_edges(tmp_path):
|
||||
"""4-member cluster with pre-existing hebbian edges: after heavy
|
||||
consolidation every pairwise edge weight increases by >= HEAVY_LTP_DELTA.
|
||||
|
||||
Pre-fix: weights stayed at 0.3 (decay-only behaviour).
|
||||
Post-fix: weights >= 0.35 (every pair potentiated once by LTP).
|
||||
"""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||||
from iai_mcp.sleep import HEAVY_LTP_DELTA, SleepConfig, run_heavy_consolidation
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
|
||||
# 4 records A B C D all cohesive
|
||||
recs = [_record(text=f"fact_{i}") for i in range(4)]
|
||||
for r in recs:
|
||||
store.insert(r)
|
||||
|
||||
# Pre-seed pairwise hebbian edges at 0.3 each
|
||||
ids = [r.id for r in recs]
|
||||
pairs = [
|
||||
(ids[i], ids[j])
|
||||
for i in range(len(ids))
|
||||
for j in range(i + 1, len(ids))
|
||||
]
|
||||
for a, b in pairs:
|
||||
store.boost_edges([(a, b)], edge_type="hebbian", delta=0.3)
|
||||
|
||||
# Sanity: all 6 pairs at 0.3
|
||||
for a, b in pairs:
|
||||
w = _hebbian_weight(store, a, b)
|
||||
assert w == pytest.approx(0.3, abs=1e-3), (
|
||||
f"pre-condition: {a}/{b} weight must be 0.3, got {w}"
|
||||
)
|
||||
|
||||
# Run heavy consolidation, Tier-0 path
|
||||
cfg = SleepConfig(llm_enabled=False)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
run_heavy_consolidation(
|
||||
store,
|
||||
session_id="ltp-test",
|
||||
config=cfg,
|
||||
budget=budget,
|
||||
rate=rate,
|
||||
has_api_key=False,
|
||||
)
|
||||
|
||||
# Every pairwise edge weight must have grown by at least HEAVY_LTP_DELTA
|
||||
for a, b in pairs:
|
||||
w = _hebbian_weight(store, a, b)
|
||||
assert w is not None, f"edge {a}/{b} must still exist"
|
||||
assert w >= 0.3 + HEAVY_LTP_DELTA - 1e-3, (
|
||||
f"hebbian edge {a}/{b} not potentiated: expected >= "
|
||||
f"{0.3 + HEAVY_LTP_DELTA}, got {w}"
|
||||
)
|
||||
|
||||
|
||||
def test_heavy_cycle_does_not_touch_non_cluster_edges(tmp_path):
|
||||
"""An edge between a cluster member and an unrelated record must NOT be
|
||||
boosted by the heavy cycle LTP path. Only co-cluster edges receive the
|
||||
potentiation."""
|
||||
from iai_mcp.guard import BudgetLedger, RateLimitLedger
|
||||
from iai_mcp.sleep import SleepConfig, run_heavy_consolidation
|
||||
from iai_mcp.store import MemoryStore
|
||||
|
||||
store = MemoryStore(path=tmp_path)
|
||||
|
||||
# Cluster A B C (all 3 hebbian-linked)
|
||||
cluster = [_record(text=f"c{i}") for i in range(3)]
|
||||
for r in cluster:
|
||||
store.insert(r)
|
||||
cluster_ids = [r.id for r in cluster]
|
||||
cluster_pairs = [
|
||||
(cluster_ids[0], cluster_ids[1]),
|
||||
(cluster_ids[1], cluster_ids[2]),
|
||||
(cluster_ids[0], cluster_ids[2]),
|
||||
]
|
||||
for a, b in cluster_pairs:
|
||||
store.boost_edges([(a, b)], edge_type="hebbian", delta=0.3)
|
||||
|
||||
# Extra record X with a hebbian edge to an UNRELATED record E
|
||||
rec_x = _record(text="x")
|
||||
rec_e = _record(text="e")
|
||||
store.insert(rec_x)
|
||||
store.insert(rec_e)
|
||||
# Only X<->E, not connected to the cluster
|
||||
store.boost_edges([(rec_x.id, rec_e.id)], edge_type="hebbian", delta=0.4)
|
||||
x_e_before = _hebbian_weight(store, rec_x.id, rec_e.id)
|
||||
assert x_e_before == pytest.approx(0.4, abs=1e-3)
|
||||
|
||||
# Run heavy
|
||||
cfg = SleepConfig(llm_enabled=False)
|
||||
budget = BudgetLedger(store)
|
||||
rate = RateLimitLedger(store)
|
||||
run_heavy_consolidation(
|
||||
store,
|
||||
session_id="ltp-isolate",
|
||||
config=cfg,
|
||||
budget=budget,
|
||||
rate=rate,
|
||||
has_api_key=False,
|
||||
)
|
||||
|
||||
# X-E edge untouched because it is its own isolated 2-node component
|
||||
# (below CLUSTER_MIN_SIZE=3), so no LTP fires on it.
|
||||
x_e_after = _hebbian_weight(store, rec_x.id, rec_e.id)
|
||||
assert x_e_after == pytest.approx(0.4, abs=1e-3), (
|
||||
f"non-cluster edge must stay at 0.4, got {x_e_after}"
|
||||
)
|
||||
|
||||
|
||||
def test_heavy_cycle_boost_edges_uses_hebbian_type(tmp_path):
|
||||
"""Structural check: run_heavy_consolidation source MUST call
|
||||
boost_edges with edge_type='hebbian' (not consolidated_from). Prevents a
|
||||
regression where someone 'fixes' this by just reusing the consolidated_from
|
||||
write path."""
|
||||
import inspect
|
||||
from iai_mcp import sleep as sleep_mod
|
||||
|
||||
src = inspect.getsource(sleep_mod.run_heavy_consolidation)
|
||||
assert "edge_type=\"hebbian\"" in src or "edge_type='hebbian'" in src, (
|
||||
"run_heavy_consolidation must boost hebbian edges (LTP), not only "
|
||||
"create consolidated_from edges"
|
||||
)
|
||||
assert "HEAVY_LTP_DELTA" in src, (
|
||||
"run_heavy_consolidation must use the named HEAVY_LTP_DELTA constant"
|
||||
)
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue