From 5da28062b95ec6c9583ecec1b1b292d8405bb3b4 Mon Sep 17 00:00:00 2001 From: Apunkt Date: Wed, 27 May 2026 18:50:08 +0200 Subject: [PATCH] feat: works, but no title gen in opencode --- src/iai_mcp/qdrant_store.py | 256 +++++++++++++++++++++--------------- 1 file changed, 149 insertions(+), 107 deletions(-) diff --git a/src/iai_mcp/qdrant_store.py b/src/iai_mcp/qdrant_store.py index 8668535..9e1cc63 100644 --- a/src/iai_mcp/qdrant_store.py +++ b/src/iai_mcp/qdrant_store.py @@ -651,6 +651,64 @@ class QdrantStore: offset = next_offset return [self._from_point(p) for p in all_points] + @staticmethod + def _parse_where(where: str | None) -> Filter | None: + """Parse a LanceDB-style where clause into a Qdrant Filter. + + Supported grammar: + - ``key = 'value'`` or ``key = "value"`` → MatchValue + - ``key > N`` or ``key >= N`` → Range(gt/gte) + - ``key < N`` or ``key <= N`` → Range(lt/lte) + - Multiple clauses joined by AND → Filter(must=[...]) + + Numeric values are auto-detected; string values must be + quoted. Returns None if the clause is empty or cannot be parsed. + """ + if not where: + return None + + clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE) + conditions: list = [] + + for clause in clauses: + clause = clause.strip() + if not clause: + continue + + m = re.match( + r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""", + clause, + ) + if m: + key, op, val_str = m.group(1), m.group(2), m.group(3) + val = float(val_str) + if op == ">": + conditions.append(FieldCondition(key=key, range=models.Range(gt=val))) + elif op == ">=": + conditions.append(FieldCondition(key=key, range=models.Range(gte=val))) + elif op == "<": + conditions.append(FieldCondition(key=key, range=models.Range(lt=val))) + elif op == "<=": + conditions.append(FieldCondition(key=key, range=models.Range(lte=val))) + continue + + m = re.match( + r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""", + clause, + ) + if m: + key, val = m.group(1), m.group(2) + conditions.append( + FieldCondition(key=key, match=MatchValue(value=val)) + ) + continue + + continue + + if not conditions: + return None + return Filter(must=conditions) + def iter_records( self, *, @@ -661,11 +719,10 @@ class QdrantStore: """Streaming iterator over records (filtered by table=records).""" offset = None while True: - # Build filter: always include table=records, optionally add tier conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))] - if where and where.startswith("tier = "): - tier = where.split("'")[1] - conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier))) + where_filter = self._parse_where(where) + if where_filter is not None: + conditions.extend(where_filter.must) qdrant_filter = Filter(must=conditions) if conditions else None points, next_offset = self._client.scroll( @@ -698,9 +755,9 @@ class QdrantStore: offset = None while True: conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))] - if where and where.startswith("tier = "): - tier = where.split("'")[1] - conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier))) + where_filter = self._parse_where(where) + if where_filter is not None: + conditions.extend(where_filter.must) qdrant_filter = Filter(must=conditions) if conditions else None points, next_offset = self._client.scroll( @@ -712,7 +769,6 @@ class QdrantStore: with_vectors=False, ) for point in points: - # Filter to requested columns row = {k: v for k, v in point.payload.items() if k in columns} if "id" in columns: row["id"] = point.id @@ -1349,7 +1405,14 @@ class QdrantStore: return pd.DataFrame(columns=["src", "dst", "edge_type", "weight", "updated_at"]) def records_as_dataframe(self) -> "pd.DataFrame": - """Return all records from the records collection as a pandas DataFrame.""" + """Return all records from the records collection as a pandas DataFrame. + + Column types match the LanceDB schema: + - timestamps are datetime objects (not str) + - structure_hv is bytes (not hex string) + - provenance_json is AES-256-GCM ciphertext (not plaintext JSON) + - tags_json is a JSON string + """ try: records = self.all_records() if not records: @@ -1360,10 +1423,13 @@ class QdrantStore: "stability", "difficulty", "last_reviewed", "never_decay", "never_merge", "detail_level", "s5_trust_score", "structure_hv", - "provenance_json", "created_at", "schema_version", + "provenance_json", "created_at", "updated_at", "schema_version", ]) rows = [] for r in records: + # Re-encrypt provenance to match LanceDB's ciphertext column. + prov_plain = json.dumps(r.provenance or []) + prov_ct = self._encrypt_for_record(r.id, prov_plain) rows.append({ "id": str(r.id), "tier": r.tier, @@ -1372,19 +1438,20 @@ class QdrantStore: "community_id": str(r.community_id) if r.community_id else None, "centrality": r.centrality, "pinned": r.pinned, - "tags_json": r.tags_json if hasattr(r, "tags_json") else "[]", + "tags_json": json.dumps(r.tags), "language": r.language, "aaak_index": r.aaak_index, "stability": r.stability, "difficulty": r.difficulty, - "last_reviewed": str(r.last_reviewed) if r.last_reviewed else None, + "last_reviewed": r.last_reviewed, "never_decay": r.never_decay, "never_merge": r.never_merge, "detail_level": r.detail_level, "s5_trust_score": r.s5_trust_score, - "structure_hv": r.structure_hv.hex() if r.structure_hv else "", - "provenance_json": json.dumps(r.provenance) if r.provenance else "[]", - "created_at": str(r.created_at) if r.created_at else None, + "structure_hv": bytes(r.structure_hv) if r.structure_hv else b"", + "provenance_json": prov_ct, + "created_at": r.created_at, + "updated_at": r.updated_at, "schema_version": r.schema_version, }) return pd.DataFrame(rows) @@ -1396,7 +1463,7 @@ class QdrantStore: "stability", "difficulty", "last_reviewed", "never_decay", "never_merge", "detail_level", "s5_trust_score", "structure_hv", - "provenance_json", "created_at", "schema_version", + "provenance_json", "created_at", "updated_at", "schema_version", ]) # ------------------------------------------------------------------ db shim @@ -1674,6 +1741,15 @@ class QdrantStore: if isinstance(ts_val, datetime) else str(ts_val) if ts_val else "" ) + # Compute ts_epoch for numeric range filtering (matches events_add) + ts_epoch = None + if isinstance(ts_val, datetime): + ts_epoch = ts_val.timestamp() + elif ts_str: + try: + ts_epoch = datetime.fromisoformat(ts_str).timestamp() + except (ValueError, TypeError): + pass points.append(PointStruct( id=str(row.get("id", uuid4())), vector={}, @@ -1685,6 +1761,7 @@ class QdrantStore: "severity": row.get("severity") or "", "domain": row.get("domain") or "", "ts": ts_str, + "ts_epoch": ts_epoch, "data_json": row.get("data_json", ""), "session_id": row.get("session_id", "-"), "source_ids_json": row.get("source_ids_json", "[]"), @@ -1747,105 +1824,70 @@ class QdrantStore: ) def _add_records(self, rows: list[dict]) -> None: - """Insert record rows — converts dicts to PointStruct via _to_point.""" + """Insert record rows — builds PointStruct directly from row dicts. + + The row dicts come from _to_row() which already contains + encrypted ciphertext for literal_surface / provenance_json / + profile_modulation_gain_json. Building a MemoryRecord and + passing it through _to_point would double-encrypt those fields, + so we construct the PointStruct payload directly here. + """ points = [] for row in rows: - # Build a minimal MemoryRecord from the dict so _to_point works - from uuid import UUID as _UUID - rec = MemoryRecord( - id=_UUID(row["id"]), - tier=row.get("tier", "episodic"), - literal_surface="", - aaak_index=row.get("aaak_index", ""), - embedding=list(row.get("embedding", [0.0] * self._store._embed_dim)), - community_id=_UUID(row["community_id"]) if row.get("community_id") else None, - centrality=float(row.get("centrality", 0.0)), - detail_level=int(row.get("detail_level", 1)), - pinned=bool(row.get("pinned", False)), - stability=float(row.get("stability", 0.0)), - difficulty=float(row.get("difficulty", 0.0)), - last_reviewed=None, - never_decay=bool(row.get("never_decay", False)), - never_merge=bool(row.get("never_merge", False)), - provenance=[], - created_at=None, - updated_at=None, - tags=json.loads(row.get("tags_json") or "[]"), - language=row.get("language", "en"), - s5_trust_score=float(row.get("s5_trust_score", 0.5)), - profile_modulation_gain={}, - schema_version=int(row.get("schema_version", 1)), - structure_hv=b"", - ) - points.append(self._store._to_point(rec)) + def _ts(val): + if val is None: + return None + if isinstance(val, datetime): + return val.isoformat() + return str(val) + + # Decode structure_hv from bytes (LanceDB pa.binary → bytes) + structure_raw = row.get("structure_hv") + if isinstance(structure_raw, (bytes, bytearray)): + structure_b64 = base64.b64encode(structure_raw).decode("ascii") + elif isinstance(structure_raw, str): + # Already base64 or hex — keep as-is + structure_b64 = structure_raw + else: + structure_b64 = "" + + points.append(PointStruct( + id=str(row.get("id", uuid4())), + vector=list(row.get("embedding", [0.0] * self._store._embed_dim)), + payload={ + "table": RECORDS_TABLE, + "group_id": self._store._group_id, + "tier": row.get("tier", "episodic"), + "literal_surface": row.get("literal_surface", ""), + "aaak_index": row.get("aaak_index", ""), + "structure_hv": structure_b64, + "community_id": row.get("community_id", ""), + "centrality": float(row.get("centrality", 0.0)), + "detail_level": int(row.get("detail_level", 1)), + "pinned": bool(row.get("pinned", False)), + "stability": float(row.get("stability", 0.0)), + "difficulty": float(row.get("difficulty", 0.0)), + "last_reviewed": _ts(row.get("last_reviewed")), + "never_decay": bool(row.get("never_decay", False)), + "never_merge": bool(row.get("never_merge", False)), + "provenance_json": row.get("provenance_json", "[]"), + "created_at": _ts(row.get("created_at")), + "updated_at": _ts(row.get("updated_at")), + "tags_json": row.get("tags_json", "[]"), + "language": str(row.get("language", "en")), + "s5_trust_score": float(row.get("s5_trust_score", 0.5)), + "profile_modulation_gain_json": row.get("profile_modulation_gain_json", "{}"), + "schema_version": int(row.get("schema_version", 1)), + }, + )) if points: self._store._client.upsert( collection_name=RECORDS_TABLE, points=points, ) def _parse_where(self, where: str | None) -> Filter | None: - """Parse a LanceDB-style where clause into a Qdrant Filter. - - Supported grammar: - - ``key = 'value'`` or ``key = "value"`` → MatchValue - - ``key > N`` or ``key >= N`` → Range(gt/gte) - - ``key < N`` or ``key <= N`` → Range(lt/lte) - - Multiple clauses joined by AND → Filter(must=[...]) - - Numeric values are auto-detected; string values must be - quoted. Returns None if the clause is empty or cannot be parsed. - """ - if not where: - return None - - # Split on AND (case-insensitive) to get individual clauses. - # Use regex to avoid splitting inside quoted strings. - clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE) - conditions: list = [] - - for clause in clauses: - clause = clause.strip() - if not clause: - continue - - # Try numeric comparison operators first: >, >=, <, <= - m = re.match( - r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""", - clause, - ) - if m: - key, op, val_str = m.group(1), m.group(2), m.group(3) - val = float(val_str) - if op == ">": - conditions.append(FieldCondition(key=key, range=models.Range(gt=val))) - elif op == ">=": - conditions.append(FieldCondition(key=key, range=models.Range(gte=val))) - elif op == "<": - conditions.append(FieldCondition(key=key, range=models.Range(lt=val))) - elif op == "<=": - conditions.append(FieldCondition(key=key, range=models.Range(lte=val))) - continue - - # Try string equality: key = 'value' or key = "value" - m = re.match( - r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""", - clause, - ) - if m: - key, val = m.group(1), m.group(2) - conditions.append( - FieldCondition(key=key, match=MatchValue(value=val)) - ) - continue - - # Unparseable clause — skip silently (LanceDB would too). - continue - - if not conditions: - return None - if len(conditions) == 1: - return Filter(must=conditions) - return Filter(must=conditions) + """Delegate to QdrantStore._parse_where (single source of truth).""" + return QdrantStore._parse_where(where) @property def db(self) -> "QdrantStore._DbShim":