feat: works, but no title gen in opencode

2026-05-27 18:50:08 +02:00 · 2026-05-27 18:50:08 +02:00 · 5da28062b9
commit 5da28062b9
parent 37ee97b8ec
1 changed files with 149 additions and 107 deletions
--- a/src/iai_mcp/qdrant_store.py
+++ b/src/iai_mcp/qdrant_store.py
@ -651,6 +651,64 @@ class QdrantStore:
            offset = next_offset
        return [self._from_point(p) for p in all_points]

+    @staticmethod
+    def _parse_where(where: str | None) -> Filter | None:
+        """Parse a LanceDB-style where clause into a Qdrant Filter.
+
+        Supported grammar:
+          - ``key = 'value'`` or ``key = "value"`` → MatchValue
+          - ``key > N`` or ``key >= N`` → Range(gt/gte)
+          - ``key < N`` or ``key <= N`` → Range(lt/lte)
+          - Multiple clauses joined by AND → Filter(must=[...])
+
+        Numeric values are auto-detected; string values must be
+        quoted. Returns None if the clause is empty or cannot be parsed.
+        """
+        if not where:
+            return None
+
+        clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
+        conditions: list = []
+
+        for clause in clauses:
+            clause = clause.strip()
+            if not clause:
+                continue
+
+            m = re.match(
+                r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
+                clause,
+            )
+            if m:
+                key, op, val_str = m.group(1), m.group(2), m.group(3)
+                val = float(val_str)
+                if op == ">":
+                    conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
+                elif op == ">=":
+                    conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
+                elif op == "<":
+                    conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
+                elif op == "<=":
+                    conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
+                continue
+
+            m = re.match(
+                r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
+                clause,
+            )
+            if m:
+                key, val = m.group(1), m.group(2)
+                conditions.append(
+                    FieldCondition(key=key, match=MatchValue(value=val))
+                )
+                continue
+
+            continue
+
+        if not conditions:
+            return None
+        return Filter(must=conditions)
+
    def iter_records(
        self,
        *,
@ -661,11 +719,10 @@ class QdrantStore:
        """Streaming iterator over records (filtered by table=records)."""
        offset = None
        while True:
-            # Build filter: always include table=records, optionally add tier
            conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
-            if where and where.startswith("tier = "):
-                tier = where.split("'")[1]
-                conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
+            where_filter = self._parse_where(where)
+            if where_filter is not None:
+                conditions.extend(where_filter.must)
            qdrant_filter = Filter(must=conditions) if conditions else None

            points, next_offset = self._client.scroll(
@ -698,9 +755,9 @@ class QdrantStore:
        offset = None
        while True:
            conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
-            if where and where.startswith("tier = "):
-                tier = where.split("'")[1]
-                conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
+            where_filter = self._parse_where(where)
+            if where_filter is not None:
+                conditions.extend(where_filter.must)
            qdrant_filter = Filter(must=conditions) if conditions else None

            points, next_offset = self._client.scroll(
@ -712,7 +769,6 @@ class QdrantStore:
                with_vectors=False,
            )
            for point in points:
-                # Filter to requested columns
                row = {k: v for k, v in point.payload.items() if k in columns}
                if "id" in columns:
                    row["id"] = point.id
@ -1349,7 +1405,14 @@ class QdrantStore:
            return pd.DataFrame(columns=["src", "dst", "edge_type", "weight", "updated_at"])

    def records_as_dataframe(self) -> "pd.DataFrame":
-        """Return all records from the records collection as a pandas DataFrame."""
+        """Return all records from the records collection as a pandas DataFrame.
+
+        Column types match the LanceDB schema:
+        - timestamps are datetime objects (not str)
+        - structure_hv is bytes (not hex string)
+        - provenance_json is AES-256-GCM ciphertext (not plaintext JSON)
+        - tags_json is a JSON string
+        """
        try:
            records = self.all_records()
            if not records:
@ -1360,10 +1423,13 @@ class QdrantStore:
                    "stability", "difficulty", "last_reviewed",
                    "never_decay", "never_merge", "detail_level",
                    "s5_trust_score", "structure_hv",
-                    "provenance_json", "created_at", "schema_version",
+                    "provenance_json", "created_at", "updated_at", "schema_version",
                ])
            rows = []
            for r in records:
+                # Re-encrypt provenance to match LanceDB's ciphertext column.
+                prov_plain = json.dumps(r.provenance or [])
+                prov_ct = self._encrypt_for_record(r.id, prov_plain)
                rows.append({
                    "id": str(r.id),
                    "tier": r.tier,
@ -1372,19 +1438,20 @@ class QdrantStore:
                    "community_id": str(r.community_id) if r.community_id else None,
                    "centrality": r.centrality,
                    "pinned": r.pinned,
-                    "tags_json": r.tags_json if hasattr(r, "tags_json") else "[]",
+                    "tags_json": json.dumps(r.tags),
                    "language": r.language,
                    "aaak_index": r.aaak_index,
                    "stability": r.stability,
                    "difficulty": r.difficulty,
-                    "last_reviewed": str(r.last_reviewed) if r.last_reviewed else None,
+                    "last_reviewed": r.last_reviewed,
                    "never_decay": r.never_decay,
                    "never_merge": r.never_merge,
                    "detail_level": r.detail_level,
                    "s5_trust_score": r.s5_trust_score,
-                    "structure_hv": r.structure_hv.hex() if r.structure_hv else "",
-                    "provenance_json": json.dumps(r.provenance) if r.provenance else "[]",
-                    "created_at": str(r.created_at) if r.created_at else None,
+                    "structure_hv": bytes(r.structure_hv) if r.structure_hv else b"",
+                    "provenance_json": prov_ct,
+                    "created_at": r.created_at,
+                    "updated_at": r.updated_at,
                    "schema_version": r.schema_version,
                })
            return pd.DataFrame(rows)
@ -1396,7 +1463,7 @@ class QdrantStore:
                "stability", "difficulty", "last_reviewed",
                "never_decay", "never_merge", "detail_level",
                "s5_trust_score", "structure_hv",
-                "provenance_json", "created_at", "schema_version",
+                "provenance_json", "created_at", "updated_at", "schema_version",
            ])

  # ------------------------------------------------------------------ db shim
@ -1674,6 +1741,15 @@ class QdrantStore:
                        if isinstance(ts_val, datetime)
                        else str(ts_val) if ts_val else ""
                    )
+                    # Compute ts_epoch for numeric range filtering (matches events_add)
+                    ts_epoch = None
+                    if isinstance(ts_val, datetime):
+                        ts_epoch = ts_val.timestamp()
+                    elif ts_str:
+                        try:
+                            ts_epoch = datetime.fromisoformat(ts_str).timestamp()
+                        except (ValueError, TypeError):
+                            pass
                    points.append(PointStruct(
                        id=str(row.get("id", uuid4())),
                        vector={},
@ -1685,6 +1761,7 @@ class QdrantStore:
                            "severity": row.get("severity") or "",
                            "domain": row.get("domain") or "",
                            "ts": ts_str,
+                            "ts_epoch": ts_epoch,
                            "data_json": row.get("data_json", ""),
                            "session_id": row.get("session_id", "-"),
                            "source_ids_json": row.get("source_ids_json", "[]"),
@ -1747,105 +1824,70 @@ class QdrantStore:
                    )

            def _add_records(self, rows: list[dict]) -> None:
-                """Insert record rows — converts dicts to PointStruct via _to_point."""
+                """Insert record rows — builds PointStruct directly from row dicts.
+
+                The row dicts come from _to_row() which already contains
+                encrypted ciphertext for literal_surface / provenance_json /
+                profile_modulation_gain_json.  Building a MemoryRecord and
+                passing it through _to_point would double-encrypt those fields,
+                so we construct the PointStruct payload directly here.
+                """
                points = []
                for row in rows:
-                    # Build a minimal MemoryRecord from the dict so _to_point works
-                    from uuid import UUID as _UUID
-                    rec = MemoryRecord(
-                        id=_UUID(row["id"]),
-                        tier=row.get("tier", "episodic"),
-                        literal_surface="",
-                        aaak_index=row.get("aaak_index", ""),
-                        embedding=list(row.get("embedding", [0.0] * self._store._embed_dim)),
-                        community_id=_UUID(row["community_id"]) if row.get("community_id") else None,
-                        centrality=float(row.get("centrality", 0.0)),
-                        detail_level=int(row.get("detail_level", 1)),
-                        pinned=bool(row.get("pinned", False)),
-                        stability=float(row.get("stability", 0.0)),
-                        difficulty=float(row.get("difficulty", 0.0)),
-                        last_reviewed=None,
-                        never_decay=bool(row.get("never_decay", False)),
-                        never_merge=bool(row.get("never_merge", False)),
-                        provenance=[],
-                        created_at=None,
-                        updated_at=None,
-                        tags=json.loads(row.get("tags_json") or "[]"),
-                        language=row.get("language", "en"),
-                        s5_trust_score=float(row.get("s5_trust_score", 0.5)),
-                        profile_modulation_gain={},
-                        schema_version=int(row.get("schema_version", 1)),
-                        structure_hv=b"",
-                    )
-                    points.append(self._store._to_point(rec))
+                    def _ts(val):
+                        if val is None:
+                            return None
+                        if isinstance(val, datetime):
+                            return val.isoformat()
+                        return str(val)
+
+                    # Decode structure_hv from bytes (LanceDB pa.binary → bytes)
+                    structure_raw = row.get("structure_hv")
+                    if isinstance(structure_raw, (bytes, bytearray)):
+                        structure_b64 = base64.b64encode(structure_raw).decode("ascii")
+                    elif isinstance(structure_raw, str):
+                        # Already base64 or hex — keep as-is
+                        structure_b64 = structure_raw
+                    else:
+                        structure_b64 = ""
+
+                    points.append(PointStruct(
+                        id=str(row.get("id", uuid4())),
+                        vector=list(row.get("embedding", [0.0] * self._store._embed_dim)),
+                        payload={
+                            "table": RECORDS_TABLE,
+                            "group_id": self._store._group_id,
+                            "tier": row.get("tier", "episodic"),
+                            "literal_surface": row.get("literal_surface", ""),
+                            "aaak_index": row.get("aaak_index", ""),
+                            "structure_hv": structure_b64,
+                            "community_id": row.get("community_id", ""),
+                            "centrality": float(row.get("centrality", 0.0)),
+                            "detail_level": int(row.get("detail_level", 1)),
+                            "pinned": bool(row.get("pinned", False)),
+                            "stability": float(row.get("stability", 0.0)),
+                            "difficulty": float(row.get("difficulty", 0.0)),
+                            "last_reviewed": _ts(row.get("last_reviewed")),
+                            "never_decay": bool(row.get("never_decay", False)),
+                            "never_merge": bool(row.get("never_merge", False)),
+                            "provenance_json": row.get("provenance_json", "[]"),
+                            "created_at": _ts(row.get("created_at")),
+                            "updated_at": _ts(row.get("updated_at")),
+                            "tags_json": row.get("tags_json", "[]"),
+                            "language": str(row.get("language", "en")),
+                            "s5_trust_score": float(row.get("s5_trust_score", 0.5)),
+                            "profile_modulation_gain_json": row.get("profile_modulation_gain_json", "{}"),
+                            "schema_version": int(row.get("schema_version", 1)),
+                        },
+                    ))
                if points:
                    self._store._client.upsert(
                        collection_name=RECORDS_TABLE, points=points,
                    )

            def _parse_where(self, where: str | None) -> Filter | None:
-                """Parse a LanceDB-style where clause into a Qdrant Filter.
-
-                Supported grammar:
-                  - ``key = 'value'`` or ``key = "value"`` → MatchValue
-                  - ``key > N`` or ``key >= N`` → Range(gt/gte)
-                  - ``key < N`` or ``key <= N`` → Range(lt/lte)
-                  - Multiple clauses joined by AND → Filter(must=[...])
-
-                Numeric values are auto-detected; string values must be
-                quoted. Returns None if the clause is empty or cannot be parsed.
-                """
-                if not where:
-                    return None
-
-                # Split on AND (case-insensitive) to get individual clauses.
-                # Use regex to avoid splitting inside quoted strings.
-                clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
-                conditions: list = []
-
-                for clause in clauses:
-                    clause = clause.strip()
-                    if not clause:
-                        continue
-
-                    # Try numeric comparison operators first: >, >=, <, <=
-                    m = re.match(
-                        r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
-                        clause,
-                    )
-                    if m:
-                        key, op, val_str = m.group(1), m.group(2), m.group(3)
-                        val = float(val_str)
-                        if op == ">":
-                            conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
-                        elif op == ">=":
-                            conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
-                        elif op == "<":
-                            conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
-                        elif op == "<=":
-                            conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
-                        continue
-
-                    # Try string equality: key = 'value' or key = "value"
-                    m = re.match(
-                        r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
-                        clause,
-                    )
-                    if m:
-                        key, val = m.group(1), m.group(2)
-                        conditions.append(
-                            FieldCondition(key=key, match=MatchValue(value=val))
-                        )
-                        continue
-
-                    # Unparseable clause — skip silently (LanceDB would too).
-                    continue
-
-                if not conditions:
-                    return None
-                if len(conditions) == 1:
-                    return Filter(must=conditions)
-                return Filter(must=conditions)
+                """Delegate to QdrantStore._parse_where (single source of truth)."""
+                return QdrantStore._parse_where(where)

    @property
    def db(self) -> "QdrantStore._DbShim":