Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
335 lines
11 KiB
Python
335 lines
11 KiB
Python
"""M-03 RAM footprint bench. Reports RSS at store size N.
|
|
|
|
Target: RSS <= 300 MB warm at N=10k on a 16+ GB machine.
|
|
|
|
Pressplay 8 GB M1 hung mid-run on 2026-04-19 while trying to build the
|
|
runtime graph at N=10k (Pitfall 4 from 05-RESEARCH: bge-m3 ~2 GB +
|
|
NetworkX ~200 MB + LanceDB ~50 MB + Python overhead -> swap thrash).
|
|
Phase 5 measures on this 16 GB dev Mac; pressplay cross-validates at
|
|
N <= 2000 per D5-09.
|
|
|
|
JSON output (one line to stdout):
|
|
|
|
{
|
|
"n": int,
|
|
"rss_mb_peak": float, # platform-adjusted MB
|
|
"threshold_mb": 300.0,
|
|
"passed": bool, # True iff rss_mb_peak <= threshold_mb
|
|
"platform": "darwin"|"linux"|"win32",
|
|
"stage_ms": {"seed": float, "graph": float},
|
|
"seed_n": int, # records that actually made it in
|
|
"graph_built": bool, # True iff build_runtime_graph finished
|
|
}
|
|
|
|
Exit codes:
|
|
0 if passed, 1 otherwise.
|
|
|
|
CLI:
|
|
python -m bench.memory_footprint [--n 10000] [--dim 1024] [--seed 42]
|
|
[--skip-graph]
|
|
|
|
--skip-graph keeps the RSS reading to the seeded-store baseline (no
|
|
NetworkX graph build); useful when the graph build is the timeout cause
|
|
and we want to isolate the store-only overhead.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import gc
|
|
import json
|
|
import os
|
|
import resource
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from uuid import uuid4
|
|
|
|
import numpy as np
|
|
|
|
from iai_mcp.store import MemoryStore
|
|
from iai_mcp.types import EMBED_DIM, MemoryRecord
|
|
|
|
THRESHOLD_MB = 300.0
|
|
|
|
|
|
def _isolate_keyring_in_memory() -> None:
|
|
"""Install an in-memory keyring backend so MemoryStore's crypto layer
|
|
never calls macOS Keychain (which hangs under SecItemCopyMatching when
|
|
the bench is invoked from a non-interactive shell).
|
|
|
|
Idempotent: if the current backend already has our sentinel attribute,
|
|
it's a no-op. This is strictly bench-scope — production code paths do
|
|
NOT touch this function.
|
|
"""
|
|
import keyring
|
|
from keyring.backend import KeyringBackend
|
|
|
|
if getattr(keyring.get_keyring(), "_iai_bench_noop", False):
|
|
return
|
|
|
|
class _BenchNoOpKeyring(KeyringBackend):
|
|
priority = 99
|
|
_iai_bench_noop = True
|
|
_kv: dict[tuple[str, str], str] = {}
|
|
|
|
def get_password(self, service: str, username: str) -> str | None:
|
|
return self._kv.get((service, username))
|
|
|
|
def set_password(self, service: str, username: str, password: str) -> None:
|
|
self._kv[(service, username)] = password
|
|
|
|
def delete_password(self, service: str, username: str) -> None:
|
|
self._kv.pop((service, username), None)
|
|
|
|
keyring.set_keyring(_BenchNoOpKeyring())
|
|
|
|
|
|
def _rss_mb() -> float:
|
|
"""Peak RSS in MB, platform-adjusted.
|
|
|
|
macOS returns ru_maxrss in BYTES.
|
|
Linux returns ru_maxrss in KB.
|
|
Windows via resource is not supported; the Windows branch falls back to
|
|
a best-effort reading and the platform marker in the JSON output lets
|
|
the report flag it.
|
|
"""
|
|
r = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
if sys.platform == "darwin":
|
|
return float(r) / 1024.0 / 1024.0
|
|
# Linux reports kilobytes; everything else treated as KB for safety.
|
|
return float(r) / 1024.0
|
|
|
|
|
|
def _make_noise_record(i: int, rng: np.random.Generator, dim: int) -> MemoryRecord:
|
|
"""Inline noise-record maker that does not pull in bench/verbatim.
|
|
|
|
Keeps this bench self-contained so imports don't drag heavy deps.
|
|
"""
|
|
now = datetime.now(timezone.utc)
|
|
vec = rng.standard_normal(dim)
|
|
norm = float(np.linalg.norm(vec))
|
|
if norm > 0:
|
|
vec = vec / norm
|
|
return MemoryRecord(
|
|
id=uuid4(),
|
|
tier="episodic",
|
|
literal_surface=f"bench noise record {i}",
|
|
aaak_index="",
|
|
embedding=vec.tolist(),
|
|
community_id=None,
|
|
centrality=0.0,
|
|
detail_level=2,
|
|
pinned=False,
|
|
stability=0.0,
|
|
difficulty=0.0,
|
|
last_reviewed=None,
|
|
never_decay=False,
|
|
never_merge=False,
|
|
provenance=[],
|
|
created_at=now,
|
|
updated_at=now,
|
|
tags=["bench", "ops-11"],
|
|
language="en",
|
|
)
|
|
|
|
|
|
def _seed_store(
|
|
store: MemoryStore, n: int, dim: int, seed: int, *, concurrent: bool = False
|
|
) -> int:
|
|
"""Seed N synthetic records. Returns the count actually inserted.
|
|
|
|
When ``concurrent`` is True, inserts are dispatched from a thread
|
|
pool so the coalescing AsyncWriteQueue can actually batch records
|
|
inside its 100 ms window. Sequential blocking inserts (the default
|
|
sync path) see no coalesce benefit because each insert waits on its
|
|
own batch flush before the next enqueue even happens.
|
|
"""
|
|
rng = np.random.default_rng(seed)
|
|
records = [_make_noise_record(i, rng, dim=dim) for i in range(n)]
|
|
if not concurrent:
|
|
for r in records:
|
|
store.insert(r)
|
|
return len(records)
|
|
|
|
# Concurrent path: a thread pool fires enqueues from many threads so
|
|
# the queue's coalesce window fills. Pool size ~256 is large enough
|
|
# to always fill a max_batch=128 window on this hardware.
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
with ThreadPoolExecutor(max_workers=256) as pool:
|
|
list(pool.map(store.insert, records))
|
|
return len(records)
|
|
|
|
|
|
def run_memory_footprint(
|
|
n: int = 10_000,
|
|
store_path: Path | str | None = None,
|
|
dim: int = EMBED_DIM,
|
|
seed: int = 42,
|
|
*,
|
|
skip_graph: bool = False,
|
|
isolate_keyring: bool = True,
|
|
async_writes: bool = False,
|
|
) -> dict:
|
|
"""Seed N records, optionally build the runtime graph, measure RSS.
|
|
|
|
`isolate_keyring` (default True) installs an in-memory keyring backend
|
|
so MemoryStore's crypto layer never hits macOS Keychain. Set False only
|
|
when benching against an existing ~/.iai-mcp store whose real key lives
|
|
in the user keyring.
|
|
|
|
Returns a JSON-shaped dict with the keys described in the module docstring.
|
|
"""
|
|
if isolate_keyring:
|
|
_isolate_keyring_in_memory()
|
|
|
|
cleanup: tempfile.TemporaryDirectory | None = None
|
|
if store_path is None:
|
|
cleanup = tempfile.TemporaryDirectory(prefix="iai-bench-ops11-")
|
|
path = Path(cleanup.name)
|
|
else:
|
|
path = Path(store_path)
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Honour the caller's --dim request by setting IAI_MCP_EMBED_DIM BEFORE
|
|
# the MemoryStore is constructed. The store reads this env var via
|
|
# store._resolve_embed_dim() on first table creation (see store.py:115).
|
|
# Restore the prior value after the run so other benches/tests are not
|
|
# contaminated.
|
|
prev_embed_dim = os.environ.get("IAI_MCP_EMBED_DIM")
|
|
if dim != EMBED_DIM:
|
|
os.environ["IAI_MCP_EMBED_DIM"] = str(dim)
|
|
|
|
try:
|
|
store = MemoryStore(path=path)
|
|
# Match the store's actual embed dim so inserts don't get silently
|
|
# rejected when the env override was ignored (e.g. existing table
|
|
# on disk pins a different dim).
|
|
eff_dim = store.embed_dim
|
|
|
|
# if --async-writes is set, enable the coalescing
|
|
# write queue before the seed loop so every store.insert() below
|
|
# routes through it. The queue is drained + torn down after the
|
|
# seed completes, keeping the graph build / RSS reading on the
|
|
# legacy sync path.
|
|
if async_writes:
|
|
import asyncio as _asyncio
|
|
|
|
async def _enable():
|
|
await store.enable_async_writes()
|
|
|
|
_asyncio.run(_enable())
|
|
|
|
t0 = time.perf_counter()
|
|
seed_n = _seed_store(
|
|
store, n, dim=eff_dim, seed=seed, concurrent=async_writes,
|
|
)
|
|
seed_ms = (time.perf_counter() - t0) * 1000.0
|
|
|
|
if async_writes:
|
|
import asyncio as _asyncio
|
|
|
|
async def _disable():
|
|
await store.disable_async_writes()
|
|
|
|
_asyncio.run(_disable())
|
|
|
|
graph_built = False
|
|
graph_ms = 0.0
|
|
if not skip_graph:
|
|
# Lazy import so --skip-graph runs don't pay the NetworkX load.
|
|
from iai_mcp import retrieve
|
|
|
|
t1 = time.perf_counter()
|
|
try:
|
|
_graph, _assignment, _rc = retrieve.build_runtime_graph(store)
|
|
graph_built = True
|
|
except Exception:
|
|
# Graph build can OOM on small hosts; surface that as the
|
|
# diagnostic rather than crashing the bench. The RSS reading
|
|
# still reflects peak consumed up to the failure.
|
|
graph_built = False
|
|
graph_ms = (time.perf_counter() - t1) * 1000.0
|
|
|
|
gc.collect()
|
|
rss_mb_peak = _rss_mb()
|
|
|
|
return {
|
|
"n": n,
|
|
"rss_mb_peak": round(rss_mb_peak, 2),
|
|
"threshold_mb": THRESHOLD_MB,
|
|
"passed": rss_mb_peak <= THRESHOLD_MB,
|
|
"platform": sys.platform,
|
|
"stage_ms": {
|
|
"seed": round(seed_ms, 2),
|
|
"graph": round(graph_ms, 2),
|
|
},
|
|
"seed_n": seed_n,
|
|
"graph_built": graph_built,
|
|
"dim": eff_dim,
|
|
"async_writes": bool(async_writes),
|
|
}
|
|
finally:
|
|
# Restore IAI_MCP_EMBED_DIM so other benches / tests run with the
|
|
# host default.
|
|
if dim != EMBED_DIM:
|
|
if prev_embed_dim is None:
|
|
os.environ.pop("IAI_MCP_EMBED_DIM", None)
|
|
else:
|
|
os.environ["IAI_MCP_EMBED_DIM"] = prev_embed_dim
|
|
if cleanup is not None:
|
|
cleanup.cleanup()
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(
|
|
prog="bench.memory_footprint",
|
|
description=(
|
|
"OPS-11 / RAM bench. Seeds N records, optionally builds "
|
|
"the runtime graph, reports peak RSS. Target: <=300 MB at "
|
|
"N=10k on a 16+ GB host."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--n", "--n-records", dest="n", type=int, default=10_000,
|
|
help="record count to seed (default 10000)",
|
|
)
|
|
parser.add_argument(
|
|
"--dim", type=int, default=EMBED_DIM,
|
|
help=f"embedding dimension (default {EMBED_DIM}; tests use 32/64 for speed)",
|
|
)
|
|
parser.add_argument(
|
|
"--seed", type=int, default=42, help="RNG seed (default 42)",
|
|
)
|
|
parser.add_argument(
|
|
"--skip-graph", action="store_true",
|
|
help="Skip build_runtime_graph; isolate store-only RSS",
|
|
)
|
|
parser.add_argument(
|
|
"--async-writes", action="store_true",
|
|
help=(
|
|
"enable MemoryStore.enable_async_writes() before the "
|
|
"seed loop so inserts go through the coalescing AsyncWriteQueue. "
|
|
"Target: amortise the ~0.3 MB/insert LanceDB buffer overhead by "
|
|
"batching 128 inserts per flush."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--out", type=str, default=None,
|
|
help="Write the JSON result to this file (in addition to stdout).",
|
|
)
|
|
args = parser.parse_args(argv)
|
|
result = run_memory_footprint(
|
|
n=args.n, dim=args.dim, seed=args.seed,
|
|
skip_graph=args.skip_graph, async_writes=args.async_writes,
|
|
)
|
|
if args.out:
|
|
with open(args.out, "w") as fh:
|
|
json.dump(result, fh)
|
|
print(json.dumps(result))
|
|
return 0 if result["passed"] else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|