fix: Qdrant backend support — topology, doctor, and community detection
- Fix build_runtime_graph to use backend-agnostic records_as_dataframe() and edges_as_dataframe() instead of LanceDB-specific open_table() - Fix CLI topology command: JSON-RPC envelope + result extraction - Fix community.py KeyError when graph has nodes but no edges - Update doctor check (i) to report Qdrant collection counts when Qdrant is active, LanceDB versions when LanceDB is active - Fix HIBERNATION startup exit: dispatch REQUEST_ARRIVED on boot - Fix systemd unit: StartLimit* keys in [Unit] section - Broaden capture.py exception handler for deferred capture failures - Add records_as_dataframe() and edges_as_dataframe() to MemoryStore
This commit is contained in:
parent
8492719735
commit
a31bbd7f58
8 changed files with 87 additions and 33 deletions
|
|
@ -699,37 +699,63 @@ def _resolve_records_lance_versions_dir() -> Path:
|
|||
|
||||
|
||||
def check_i_lance_versions_count() -> CheckResult:
|
||||
"""(i) records.lance versions count: PASS <=500, WARN 501..2000, FAIL >2000.
|
||||
"""(i) storage backend status: Qdrant collection counts or LanceDB versions.
|
||||
|
||||
Plan 07.14-03 [Wave2-Option-C] diagnostic row. The root-cause
|
||||
attack drained ``~/.iai-mcp/lancedb/records.lance/_versions/`` from 7298
|
||||
manifests to a small constant (Wave 1 compaction). This check warns the
|
||||
user before the pile re-accumulates to a daemon-boot-stalling scale.
|
||||
Plan 07.14-03 [Wave2-Option-C] diagnostic row. When LanceDB is active,
|
||||
reports ``records.lance`` versions count: PASS <=500, WARN 501..2000,
|
||||
FAIL >2000. The root-cause attack drained ``~/.iai-mcp/lancedb/records.lance/_versions/``
|
||||
from 7298 manifests to a small constant (Wave 1 compaction).
|
||||
|
||||
When Qdrant is active, reports collection point counts for ``records``
|
||||
and ``metadata`` collections to verify data migration completeness.
|
||||
|
||||
Resolution honors ``IAI_MCP_STORE`` env (test isolation + multi-tenant)
|
||||
before falling back to ``~/.iai-mcp``; mirrors ``MemoryStore.__init__``.
|
||||
|
||||
Status thresholds:
|
||||
- PASS: ``count <= 500`` -- healthy steady state.
|
||||
- WARN: ``501 <= count <= 2000`` -- recommend ``iai-mcp maintenance
|
||||
compact-records --apply --yes`` at next quiet window.
|
||||
- FAIL: ``count > 2000`` -- daemon boot-bind will be slow (>10 s);
|
||||
recommend immediate compaction.
|
||||
|
||||
Edge cases:
|
||||
- ``records.lance/_versions/`` directory absent (fresh install,
|
||||
store never written) -> PASS with explanatory detail.
|
||||
- ``OSError`` while enumerating (permission denied, FUSE error) ->
|
||||
WARN with the error class+message; never FAIL on a probe error.
|
||||
|
||||
INV-7 (CPU-near-zero idle) preserved: this check runs ONLY when the
|
||||
user invokes ``iai-mcp doctor`` -- no background polling, no daemon-side
|
||||
work.
|
||||
"""
|
||||
from iai_mcp.store import _use_qdrant
|
||||
|
||||
# Heuristic: qdrant_storage/ directory present → Qdrant is the active
|
||||
# backend even if QDRANT_URL is not set in the current shell (e.g.
|
||||
# systemd service provides it but interactive shell does not).
|
||||
env_path = os.environ.get("IAI_MCP_STORE")
|
||||
store_root = Path(env_path) if env_path else (Path.home() / ".iai-mcp")
|
||||
qdrant_detected = (store_root / "qdrant_storage").exists()
|
||||
|
||||
if _use_qdrant() or qdrant_detected:
|
||||
# Qdrant path: attempt to report collection counts.
|
||||
# If QDRANT_API_KEY is not set in the current shell, we can't
|
||||
# connect directly — fall back to reporting Qdrant detection
|
||||
# without collection counts (the daemon is using Qdrant successfully).
|
||||
try:
|
||||
from iai_mcp.qdrant_store import QdrantStore
|
||||
qstore = QdrantStore()
|
||||
records_count = qstore.count_rows("records")
|
||||
metadata_count = qstore.count_rows("metadata")
|
||||
return CheckResult(
|
||||
name="(i) storage backend status",
|
||||
passed=True,
|
||||
detail=f"Qdrant backend: records={records_count}, metadata={metadata_count}",
|
||||
status="PASS",
|
||||
)
|
||||
except Exception:
|
||||
# Can't connect to Qdrant from this shell (missing API key,
|
||||
# network issue, etc.). The daemon is running with Qdrant,
|
||||
# so we report detection without counts.
|
||||
return CheckResult(
|
||||
name="(i) storage backend status",
|
||||
passed=True,
|
||||
detail="Qdrant backend detected (qdrant_storage/ present); collection counts unavailable without QDRANT_API_KEY",
|
||||
status="PASS",
|
||||
)
|
||||
|
||||
versions_dir = _resolve_records_lance_versions_dir()
|
||||
if not versions_dir.exists():
|
||||
return CheckResult(
|
||||
name="(i) lance versions count",
|
||||
name="(i) storage backend status",
|
||||
passed=True,
|
||||
detail=f"{versions_dir} not present yet (fresh install or no writes yet)",
|
||||
status="PASS",
|
||||
|
|
@ -738,21 +764,21 @@ def check_i_lance_versions_count() -> CheckResult:
|
|||
count = sum(1 for _ in versions_dir.glob("*.manifest"))
|
||||
except OSError as exc:
|
||||
return CheckResult(
|
||||
name="(i) lance versions count",
|
||||
name="(i) storage backend status",
|
||||
passed=True, # WARN, not FAIL: probe failure is advisory.
|
||||
detail=f"could not enumerate versions: {type(exc).__name__}: {exc}",
|
||||
status="WARN",
|
||||
)
|
||||
if count <= 500:
|
||||
return CheckResult(
|
||||
name="(i) lance versions count",
|
||||
name="(i) storage backend status",
|
||||
passed=True,
|
||||
detail=f"{count} version manifest(s); healthy",
|
||||
status="PASS",
|
||||
)
|
||||
if count <= 2000:
|
||||
return CheckResult(
|
||||
name="(i) lance versions count",
|
||||
name="(i) storage backend status",
|
||||
passed=True, # WARN -- still passes the gate.
|
||||
detail=(
|
||||
f"{count} version manifests; consider running "
|
||||
|
|
@ -761,7 +787,7 @@ def check_i_lance_versions_count() -> CheckResult:
|
|||
status="WARN",
|
||||
)
|
||||
return CheckResult(
|
||||
name="(i) lance versions count",
|
||||
name="(i) storage backend status",
|
||||
passed=False,
|
||||
detail=(
|
||||
f"{count} version manifests (>2000); daemon boot will be slow. "
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue