From 5da28062b95ec6c9583ecec1b1b292d8405bb3b4 Mon Sep 17 00:00:00 2001
From: Apunkt <a.schwibbe@gmx.net>
Date: Wed, 27 May 2026 18:50:08 +0200
Subject: [PATCH] feat: works, but no title gen in opencode

---
 src/iai_mcp/qdrant_store.py | 256 +++++++++++++++++++++---------------
 1 file changed, 149 insertions(+), 107 deletions(-)

diff --git a/src/iai_mcp/qdrant_store.py b/src/iai_mcp/qdrant_store.py
index 8668535..9e1cc63 100644
--- a/src/iai_mcp/qdrant_store.py
+++ b/src/iai_mcp/qdrant_store.py
@@ -651,6 +651,64 @@ class QdrantStore:
             offset = next_offset
         return [self._from_point(p) for p in all_points]
 
+    @staticmethod
+    def _parse_where(where: str | None) -> Filter | None:
+        """Parse a LanceDB-style where clause into a Qdrant Filter.
+
+        Supported grammar:
+          - ``key = 'value'`` or ``key = "value"`` → MatchValue
+          - ``key > N`` or ``key >= N`` → Range(gt/gte)
+          - ``key < N`` or ``key <= N`` → Range(lt/lte)
+          - Multiple clauses joined by AND → Filter(must=[...])
+
+        Numeric values are auto-detected; string values must be
+        quoted. Returns None if the clause is empty or cannot be parsed.
+        """
+        if not where:
+            return None
+
+        clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
+        conditions: list = []
+
+        for clause in clauses:
+            clause = clause.strip()
+            if not clause:
+                continue
+
+            m = re.match(
+                r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
+                clause,
+            )
+            if m:
+                key, op, val_str = m.group(1), m.group(2), m.group(3)
+                val = float(val_str)
+                if op == ">":
+                    conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
+                elif op == ">=":
+                    conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
+                elif op == "<":
+                    conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
+                elif op == "<=":
+                    conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
+                continue
+
+            m = re.match(
+                r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
+                clause,
+            )
+            if m:
+                key, val = m.group(1), m.group(2)
+                conditions.append(
+                    FieldCondition(key=key, match=MatchValue(value=val))
+                )
+                continue
+
+            continue
+
+        if not conditions:
+            return None
+        return Filter(must=conditions)
+
     def iter_records(
         self,
         *,
@@ -661,11 +719,10 @@ class QdrantStore:
         """Streaming iterator over records (filtered by table=records)."""
         offset = None
         while True:
-            # Build filter: always include table=records, optionally add tier
             conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
-            if where and where.startswith("tier = "):
-                tier = where.split("'")[1]
-                conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
+            where_filter = self._parse_where(where)
+            if where_filter is not None:
+                conditions.extend(where_filter.must)
             qdrant_filter = Filter(must=conditions) if conditions else None
 
             points, next_offset = self._client.scroll(
@@ -698,9 +755,9 @@ class QdrantStore:
         offset = None
         while True:
             conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
-            if where and where.startswith("tier = "):
-                tier = where.split("'")[1]
-                conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
+            where_filter = self._parse_where(where)
+            if where_filter is not None:
+                conditions.extend(where_filter.must)
             qdrant_filter = Filter(must=conditions) if conditions else None
 
             points, next_offset = self._client.scroll(
@@ -712,7 +769,6 @@ class QdrantStore:
                 with_vectors=False,
             )
             for point in points:
-                # Filter to requested columns
                 row = {k: v for k, v in point.payload.items() if k in columns}
                 if "id" in columns:
                     row["id"] = point.id
@@ -1349,7 +1405,14 @@ class QdrantStore:
             return pd.DataFrame(columns=["src", "dst", "edge_type", "weight", "updated_at"])
 
     def records_as_dataframe(self) -> "pd.DataFrame":
-        """Return all records from the records collection as a pandas DataFrame."""
+        """Return all records from the records collection as a pandas DataFrame.
+
+        Column types match the LanceDB schema:
+        - timestamps are datetime objects (not str)
+        - structure_hv is bytes (not hex string)
+        - provenance_json is AES-256-GCM ciphertext (not plaintext JSON)
+        - tags_json is a JSON string
+        """
         try:
             records = self.all_records()
             if not records:
@@ -1360,10 +1423,13 @@ class QdrantStore:
                     "stability", "difficulty", "last_reviewed",
                     "never_decay", "never_merge", "detail_level",
                     "s5_trust_score", "structure_hv",
-                    "provenance_json", "created_at", "schema_version",
+                    "provenance_json", "created_at", "updated_at", "schema_version",
                 ])
             rows = []
             for r in records:
+                # Re-encrypt provenance to match LanceDB's ciphertext column.
+                prov_plain = json.dumps(r.provenance or [])
+                prov_ct = self._encrypt_for_record(r.id, prov_plain)
                 rows.append({
                     "id": str(r.id),
                     "tier": r.tier,
@@ -1372,19 +1438,20 @@ class QdrantStore:
                     "community_id": str(r.community_id) if r.community_id else None,
                     "centrality": r.centrality,
                     "pinned": r.pinned,
-                    "tags_json": r.tags_json if hasattr(r, "tags_json") else "[]",
+                    "tags_json": json.dumps(r.tags),
                     "language": r.language,
                     "aaak_index": r.aaak_index,
                     "stability": r.stability,
                     "difficulty": r.difficulty,
-                    "last_reviewed": str(r.last_reviewed) if r.last_reviewed else None,
+                    "last_reviewed": r.last_reviewed,
                     "never_decay": r.never_decay,
                     "never_merge": r.never_merge,
                     "detail_level": r.detail_level,
                     "s5_trust_score": r.s5_trust_score,
-                    "structure_hv": r.structure_hv.hex() if r.structure_hv else "",
-                    "provenance_json": json.dumps(r.provenance) if r.provenance else "[]",
-                    "created_at": str(r.created_at) if r.created_at else None,
+                    "structure_hv": bytes(r.structure_hv) if r.structure_hv else b"",
+                    "provenance_json": prov_ct,
+                    "created_at": r.created_at,
+                    "updated_at": r.updated_at,
                     "schema_version": r.schema_version,
                 })
             return pd.DataFrame(rows)
@@ -1396,7 +1463,7 @@ class QdrantStore:
                 "stability", "difficulty", "last_reviewed",
                 "never_decay", "never_merge", "detail_level",
                 "s5_trust_score", "structure_hv",
-                "provenance_json", "created_at", "schema_version",
+                "provenance_json", "created_at", "updated_at", "schema_version",
             ])
 
   # ------------------------------------------------------------------ db shim
@@ -1674,6 +1741,15 @@ class QdrantStore:
                         if isinstance(ts_val, datetime)
                         else str(ts_val) if ts_val else ""
                     )
+                    # Compute ts_epoch for numeric range filtering (matches events_add)
+                    ts_epoch = None
+                    if isinstance(ts_val, datetime):
+                        ts_epoch = ts_val.timestamp()
+                    elif ts_str:
+                        try:
+                            ts_epoch = datetime.fromisoformat(ts_str).timestamp()
+                        except (ValueError, TypeError):
+                            pass
                     points.append(PointStruct(
                         id=str(row.get("id", uuid4())),
                         vector={},
@@ -1685,6 +1761,7 @@ class QdrantStore:
                             "severity": row.get("severity") or "",
                             "domain": row.get("domain") or "",
                             "ts": ts_str,
+                            "ts_epoch": ts_epoch,
                             "data_json": row.get("data_json", ""),
                             "session_id": row.get("session_id", "-"),
                             "source_ids_json": row.get("source_ids_json", "[]"),
@@ -1747,105 +1824,70 @@ class QdrantStore:
                     )
 
             def _add_records(self, rows: list[dict]) -> None:
-                """Insert record rows — converts dicts to PointStruct via _to_point."""
+                """Insert record rows — builds PointStruct directly from row dicts.
+
+                The row dicts come from _to_row() which already contains
+                encrypted ciphertext for literal_surface / provenance_json /
+                profile_modulation_gain_json.  Building a MemoryRecord and
+                passing it through _to_point would double-encrypt those fields,
+                so we construct the PointStruct payload directly here.
+                """
                 points = []
                 for row in rows:
-                    # Build a minimal MemoryRecord from the dict so _to_point works
-                    from uuid import UUID as _UUID
-                    rec = MemoryRecord(
-                        id=_UUID(row["id"]),
-                        tier=row.get("tier", "episodic"),
-                        literal_surface="",
-                        aaak_index=row.get("aaak_index", ""),
-                        embedding=list(row.get("embedding", [0.0] * self._store._embed_dim)),
-                        community_id=_UUID(row["community_id"]) if row.get("community_id") else None,
-                        centrality=float(row.get("centrality", 0.0)),
-                        detail_level=int(row.get("detail_level", 1)),
-                        pinned=bool(row.get("pinned", False)),
-                        stability=float(row.get("stability", 0.0)),
-                        difficulty=float(row.get("difficulty", 0.0)),
-                        last_reviewed=None,
-                        never_decay=bool(row.get("never_decay", False)),
-                        never_merge=bool(row.get("never_merge", False)),
-                        provenance=[],
-                        created_at=None,
-                        updated_at=None,
-                        tags=json.loads(row.get("tags_json") or "[]"),
-                        language=row.get("language", "en"),
-                        s5_trust_score=float(row.get("s5_trust_score", 0.5)),
-                        profile_modulation_gain={},
-                        schema_version=int(row.get("schema_version", 1)),
-                        structure_hv=b"",
-                    )
-                    points.append(self._store._to_point(rec))
+                    def _ts(val):
+                        if val is None:
+                            return None
+                        if isinstance(val, datetime):
+                            return val.isoformat()
+                        return str(val)
+
+                    # Decode structure_hv from bytes (LanceDB pa.binary → bytes)
+                    structure_raw = row.get("structure_hv")
+                    if isinstance(structure_raw, (bytes, bytearray)):
+                        structure_b64 = base64.b64encode(structure_raw).decode("ascii")
+                    elif isinstance(structure_raw, str):
+                        # Already base64 or hex — keep as-is
+                        structure_b64 = structure_raw
+                    else:
+                        structure_b64 = ""
+
+                    points.append(PointStruct(
+                        id=str(row.get("id", uuid4())),
+                        vector=list(row.get("embedding", [0.0] * self._store._embed_dim)),
+                        payload={
+                            "table": RECORDS_TABLE,
+                            "group_id": self._store._group_id,
+                            "tier": row.get("tier", "episodic"),
+                            "literal_surface": row.get("literal_surface", ""),
+                            "aaak_index": row.get("aaak_index", ""),
+                            "structure_hv": structure_b64,
+                            "community_id": row.get("community_id", ""),
+                            "centrality": float(row.get("centrality", 0.0)),
+                            "detail_level": int(row.get("detail_level", 1)),
+                            "pinned": bool(row.get("pinned", False)),
+                            "stability": float(row.get("stability", 0.0)),
+                            "difficulty": float(row.get("difficulty", 0.0)),
+                            "last_reviewed": _ts(row.get("last_reviewed")),
+                            "never_decay": bool(row.get("never_decay", False)),
+                            "never_merge": bool(row.get("never_merge", False)),
+                            "provenance_json": row.get("provenance_json", "[]"),
+                            "created_at": _ts(row.get("created_at")),
+                            "updated_at": _ts(row.get("updated_at")),
+                            "tags_json": row.get("tags_json", "[]"),
+                            "language": str(row.get("language", "en")),
+                            "s5_trust_score": float(row.get("s5_trust_score", 0.5)),
+                            "profile_modulation_gain_json": row.get("profile_modulation_gain_json", "{}"),
+                            "schema_version": int(row.get("schema_version", 1)),
+                        },
+                    ))
                 if points:
                     self._store._client.upsert(
                         collection_name=RECORDS_TABLE, points=points,
                     )
 
             def _parse_where(self, where: str | None) -> Filter | None:
-                """Parse a LanceDB-style where clause into a Qdrant Filter.
-
-                Supported grammar:
-                  - ``key = 'value'`` or ``key = "value"`` → MatchValue
-                  - ``key > N`` or ``key >= N`` → Range(gt/gte)
-                  - ``key < N`` or ``key <= N`` → Range(lt/lte)
-                  - Multiple clauses joined by AND → Filter(must=[...])
-
-                Numeric values are auto-detected; string values must be
-                quoted. Returns None if the clause is empty or cannot be parsed.
-                """
-                if not where:
-                    return None
-
-                # Split on AND (case-insensitive) to get individual clauses.
-                # Use regex to avoid splitting inside quoted strings.
-                clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
-                conditions: list = []
-
-                for clause in clauses:
-                    clause = clause.strip()
-                    if not clause:
-                        continue
-
-                    # Try numeric comparison operators first: >, >=, <, <=
-                    m = re.match(
-                        r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
-                        clause,
-                    )
-                    if m:
-                        key, op, val_str = m.group(1), m.group(2), m.group(3)
-                        val = float(val_str)
-                        if op == ">":
-                            conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
-                        elif op == ">=":
-                            conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
-                        elif op == "<":
-                            conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
-                        elif op == "<=":
-                            conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
-                        continue
-
-                    # Try string equality: key = 'value' or key = "value"
-                    m = re.match(
-                        r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
-                        clause,
-                    )
-                    if m:
-                        key, val = m.group(1), m.group(2)
-                        conditions.append(
-                            FieldCondition(key=key, match=MatchValue(value=val))
-                        )
-                        continue
-
-                    # Unparseable clause — skip silently (LanceDB would too).
-                    continue
-
-                if not conditions:
-                    return None
-                if len(conditions) == 1:
-                    return Filter(must=conditions)
-                return Filter(must=conditions)
+                """Delegate to QdrantStore._parse_where (single source of truth)."""
+                return QdrantStore._parse_where(where)
 
     @property
     def db(self) -> "QdrantStore._DbShim":