feat: works, but no title gen in opencode

This commit is contained in:
Apunkt 2026-05-27 18:50:08 +02:00
parent 37ee97b8ec
commit 5da28062b9
No known key found for this signature in database

View file

@ -651,6 +651,64 @@ class QdrantStore:
offset = next_offset
return [self._from_point(p) for p in all_points]
@staticmethod
def _parse_where(where: str | None) -> Filter | None:
"""Parse a LanceDB-style where clause into a Qdrant Filter.
Supported grammar:
- ``key = 'value'`` or ``key = "value"`` MatchValue
- ``key > N`` or ``key >= N`` Range(gt/gte)
- ``key < N`` or ``key <= N`` Range(lt/lte)
- Multiple clauses joined by AND Filter(must=[...])
Numeric values are auto-detected; string values must be
quoted. Returns None if the clause is empty or cannot be parsed.
"""
if not where:
return None
clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
conditions: list = []
for clause in clauses:
clause = clause.strip()
if not clause:
continue
m = re.match(
r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
clause,
)
if m:
key, op, val_str = m.group(1), m.group(2), m.group(3)
val = float(val_str)
if op == ">":
conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
elif op == ">=":
conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
elif op == "<":
conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
elif op == "<=":
conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
continue
m = re.match(
r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
clause,
)
if m:
key, val = m.group(1), m.group(2)
conditions.append(
FieldCondition(key=key, match=MatchValue(value=val))
)
continue
continue
if not conditions:
return None
return Filter(must=conditions)
def iter_records(
self,
*,
@ -661,11 +719,10 @@ class QdrantStore:
"""Streaming iterator over records (filtered by table=records)."""
offset = None
while True:
# Build filter: always include table=records, optionally add tier
conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
if where and where.startswith("tier = "):
tier = where.split("'")[1]
conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
where_filter = self._parse_where(where)
if where_filter is not None:
conditions.extend(where_filter.must)
qdrant_filter = Filter(must=conditions) if conditions else None
points, next_offset = self._client.scroll(
@ -698,9 +755,9 @@ class QdrantStore:
offset = None
while True:
conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
if where and where.startswith("tier = "):
tier = where.split("'")[1]
conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
where_filter = self._parse_where(where)
if where_filter is not None:
conditions.extend(where_filter.must)
qdrant_filter = Filter(must=conditions) if conditions else None
points, next_offset = self._client.scroll(
@ -712,7 +769,6 @@ class QdrantStore:
with_vectors=False,
)
for point in points:
# Filter to requested columns
row = {k: v for k, v in point.payload.items() if k in columns}
if "id" in columns:
row["id"] = point.id
@ -1349,7 +1405,14 @@ class QdrantStore:
return pd.DataFrame(columns=["src", "dst", "edge_type", "weight", "updated_at"])
def records_as_dataframe(self) -> "pd.DataFrame":
"""Return all records from the records collection as a pandas DataFrame."""
"""Return all records from the records collection as a pandas DataFrame.
Column types match the LanceDB schema:
- timestamps are datetime objects (not str)
- structure_hv is bytes (not hex string)
- provenance_json is AES-256-GCM ciphertext (not plaintext JSON)
- tags_json is a JSON string
"""
try:
records = self.all_records()
if not records:
@ -1360,10 +1423,13 @@ class QdrantStore:
"stability", "difficulty", "last_reviewed",
"never_decay", "never_merge", "detail_level",
"s5_trust_score", "structure_hv",
"provenance_json", "created_at", "schema_version",
"provenance_json", "created_at", "updated_at", "schema_version",
])
rows = []
for r in records:
# Re-encrypt provenance to match LanceDB's ciphertext column.
prov_plain = json.dumps(r.provenance or [])
prov_ct = self._encrypt_for_record(r.id, prov_plain)
rows.append({
"id": str(r.id),
"tier": r.tier,
@ -1372,19 +1438,20 @@ class QdrantStore:
"community_id": str(r.community_id) if r.community_id else None,
"centrality": r.centrality,
"pinned": r.pinned,
"tags_json": r.tags_json if hasattr(r, "tags_json") else "[]",
"tags_json": json.dumps(r.tags),
"language": r.language,
"aaak_index": r.aaak_index,
"stability": r.stability,
"difficulty": r.difficulty,
"last_reviewed": str(r.last_reviewed) if r.last_reviewed else None,
"last_reviewed": r.last_reviewed,
"never_decay": r.never_decay,
"never_merge": r.never_merge,
"detail_level": r.detail_level,
"s5_trust_score": r.s5_trust_score,
"structure_hv": r.structure_hv.hex() if r.structure_hv else "",
"provenance_json": json.dumps(r.provenance) if r.provenance else "[]",
"created_at": str(r.created_at) if r.created_at else None,
"structure_hv": bytes(r.structure_hv) if r.structure_hv else b"",
"provenance_json": prov_ct,
"created_at": r.created_at,
"updated_at": r.updated_at,
"schema_version": r.schema_version,
})
return pd.DataFrame(rows)
@ -1396,7 +1463,7 @@ class QdrantStore:
"stability", "difficulty", "last_reviewed",
"never_decay", "never_merge", "detail_level",
"s5_trust_score", "structure_hv",
"provenance_json", "created_at", "schema_version",
"provenance_json", "created_at", "updated_at", "schema_version",
])
# ------------------------------------------------------------------ db shim
@ -1674,6 +1741,15 @@ class QdrantStore:
if isinstance(ts_val, datetime)
else str(ts_val) if ts_val else ""
)
# Compute ts_epoch for numeric range filtering (matches events_add)
ts_epoch = None
if isinstance(ts_val, datetime):
ts_epoch = ts_val.timestamp()
elif ts_str:
try:
ts_epoch = datetime.fromisoformat(ts_str).timestamp()
except (ValueError, TypeError):
pass
points.append(PointStruct(
id=str(row.get("id", uuid4())),
vector={},
@ -1685,6 +1761,7 @@ class QdrantStore:
"severity": row.get("severity") or "",
"domain": row.get("domain") or "",
"ts": ts_str,
"ts_epoch": ts_epoch,
"data_json": row.get("data_json", ""),
"session_id": row.get("session_id", "-"),
"source_ids_json": row.get("source_ids_json", "[]"),
@ -1747,105 +1824,70 @@ class QdrantStore:
)
def _add_records(self, rows: list[dict]) -> None:
"""Insert record rows — converts dicts to PointStruct via _to_point."""
"""Insert record rows — builds PointStruct directly from row dicts.
The row dicts come from _to_row() which already contains
encrypted ciphertext for literal_surface / provenance_json /
profile_modulation_gain_json. Building a MemoryRecord and
passing it through _to_point would double-encrypt those fields,
so we construct the PointStruct payload directly here.
"""
points = []
for row in rows:
# Build a minimal MemoryRecord from the dict so _to_point works
from uuid import UUID as _UUID
rec = MemoryRecord(
id=_UUID(row["id"]),
tier=row.get("tier", "episodic"),
literal_surface="",
aaak_index=row.get("aaak_index", ""),
embedding=list(row.get("embedding", [0.0] * self._store._embed_dim)),
community_id=_UUID(row["community_id"]) if row.get("community_id") else None,
centrality=float(row.get("centrality", 0.0)),
detail_level=int(row.get("detail_level", 1)),
pinned=bool(row.get("pinned", False)),
stability=float(row.get("stability", 0.0)),
difficulty=float(row.get("difficulty", 0.0)),
last_reviewed=None,
never_decay=bool(row.get("never_decay", False)),
never_merge=bool(row.get("never_merge", False)),
provenance=[],
created_at=None,
updated_at=None,
tags=json.loads(row.get("tags_json") or "[]"),
language=row.get("language", "en"),
s5_trust_score=float(row.get("s5_trust_score", 0.5)),
profile_modulation_gain={},
schema_version=int(row.get("schema_version", 1)),
structure_hv=b"",
)
points.append(self._store._to_point(rec))
def _ts(val):
if val is None:
return None
if isinstance(val, datetime):
return val.isoformat()
return str(val)
# Decode structure_hv from bytes (LanceDB pa.binary → bytes)
structure_raw = row.get("structure_hv")
if isinstance(structure_raw, (bytes, bytearray)):
structure_b64 = base64.b64encode(structure_raw).decode("ascii")
elif isinstance(structure_raw, str):
# Already base64 or hex — keep as-is
structure_b64 = structure_raw
else:
structure_b64 = ""
points.append(PointStruct(
id=str(row.get("id", uuid4())),
vector=list(row.get("embedding", [0.0] * self._store._embed_dim)),
payload={
"table": RECORDS_TABLE,
"group_id": self._store._group_id,
"tier": row.get("tier", "episodic"),
"literal_surface": row.get("literal_surface", ""),
"aaak_index": row.get("aaak_index", ""),
"structure_hv": structure_b64,
"community_id": row.get("community_id", ""),
"centrality": float(row.get("centrality", 0.0)),
"detail_level": int(row.get("detail_level", 1)),
"pinned": bool(row.get("pinned", False)),
"stability": float(row.get("stability", 0.0)),
"difficulty": float(row.get("difficulty", 0.0)),
"last_reviewed": _ts(row.get("last_reviewed")),
"never_decay": bool(row.get("never_decay", False)),
"never_merge": bool(row.get("never_merge", False)),
"provenance_json": row.get("provenance_json", "[]"),
"created_at": _ts(row.get("created_at")),
"updated_at": _ts(row.get("updated_at")),
"tags_json": row.get("tags_json", "[]"),
"language": str(row.get("language", "en")),
"s5_trust_score": float(row.get("s5_trust_score", 0.5)),
"profile_modulation_gain_json": row.get("profile_modulation_gain_json", "{}"),
"schema_version": int(row.get("schema_version", 1)),
},
))
if points:
self._store._client.upsert(
collection_name=RECORDS_TABLE, points=points,
)
def _parse_where(self, where: str | None) -> Filter | None:
"""Parse a LanceDB-style where clause into a Qdrant Filter.
Supported grammar:
- ``key = 'value'`` or ``key = "value"`` MatchValue
- ``key > N`` or ``key >= N`` Range(gt/gte)
- ``key < N`` or ``key <= N`` Range(lt/lte)
- Multiple clauses joined by AND Filter(must=[...])
Numeric values are auto-detected; string values must be
quoted. Returns None if the clause is empty or cannot be parsed.
"""
if not where:
return None
# Split on AND (case-insensitive) to get individual clauses.
# Use regex to avoid splitting inside quoted strings.
clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
conditions: list = []
for clause in clauses:
clause = clause.strip()
if not clause:
continue
# Try numeric comparison operators first: >, >=, <, <=
m = re.match(
r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
clause,
)
if m:
key, op, val_str = m.group(1), m.group(2), m.group(3)
val = float(val_str)
if op == ">":
conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
elif op == ">=":
conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
elif op == "<":
conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
elif op == "<=":
conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
continue
# Try string equality: key = 'value' or key = "value"
m = re.match(
r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
clause,
)
if m:
key, val = m.group(1), m.group(2)
conditions.append(
FieldCondition(key=key, match=MatchValue(value=val))
)
continue
# Unparseable clause — skip silently (LanceDB would too).
continue
if not conditions:
return None
if len(conditions) == 1:
return Filter(must=conditions)
return Filter(must=conditions)
"""Delegate to QdrantStore._parse_where (single source of truth)."""
return QdrantStore._parse_where(where)
@property
def db(self) -> "QdrantStore._DbShim":