feat: works, but no title gen in opencode
This commit is contained in:
parent
37ee97b8ec
commit
5da28062b9
1 changed files with 149 additions and 107 deletions
|
|
@ -651,6 +651,64 @@ class QdrantStore:
|
|||
offset = next_offset
|
||||
return [self._from_point(p) for p in all_points]
|
||||
|
||||
@staticmethod
|
||||
def _parse_where(where: str | None) -> Filter | None:
|
||||
"""Parse a LanceDB-style where clause into a Qdrant Filter.
|
||||
|
||||
Supported grammar:
|
||||
- ``key = 'value'`` or ``key = "value"`` → MatchValue
|
||||
- ``key > N`` or ``key >= N`` → Range(gt/gte)
|
||||
- ``key < N`` or ``key <= N`` → Range(lt/lte)
|
||||
- Multiple clauses joined by AND → Filter(must=[...])
|
||||
|
||||
Numeric values are auto-detected; string values must be
|
||||
quoted. Returns None if the clause is empty or cannot be parsed.
|
||||
"""
|
||||
if not where:
|
||||
return None
|
||||
|
||||
clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
|
||||
conditions: list = []
|
||||
|
||||
for clause in clauses:
|
||||
clause = clause.strip()
|
||||
if not clause:
|
||||
continue
|
||||
|
||||
m = re.match(
|
||||
r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
|
||||
clause,
|
||||
)
|
||||
if m:
|
||||
key, op, val_str = m.group(1), m.group(2), m.group(3)
|
||||
val = float(val_str)
|
||||
if op == ">":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
|
||||
elif op == ">=":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
|
||||
elif op == "<":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
|
||||
elif op == "<=":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
|
||||
continue
|
||||
|
||||
m = re.match(
|
||||
r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
|
||||
clause,
|
||||
)
|
||||
if m:
|
||||
key, val = m.group(1), m.group(2)
|
||||
conditions.append(
|
||||
FieldCondition(key=key, match=MatchValue(value=val))
|
||||
)
|
||||
continue
|
||||
|
||||
continue
|
||||
|
||||
if not conditions:
|
||||
return None
|
||||
return Filter(must=conditions)
|
||||
|
||||
def iter_records(
|
||||
self,
|
||||
*,
|
||||
|
|
@ -661,11 +719,10 @@ class QdrantStore:
|
|||
"""Streaming iterator over records (filtered by table=records)."""
|
||||
offset = None
|
||||
while True:
|
||||
# Build filter: always include table=records, optionally add tier
|
||||
conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
|
||||
if where and where.startswith("tier = "):
|
||||
tier = where.split("'")[1]
|
||||
conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
|
||||
where_filter = self._parse_where(where)
|
||||
if where_filter is not None:
|
||||
conditions.extend(where_filter.must)
|
||||
qdrant_filter = Filter(must=conditions) if conditions else None
|
||||
|
||||
points, next_offset = self._client.scroll(
|
||||
|
|
@ -698,9 +755,9 @@ class QdrantStore:
|
|||
offset = None
|
||||
while True:
|
||||
conditions = [FieldCondition(key="table", match=MatchValue(value=RECORDS_TABLE))]
|
||||
if where and where.startswith("tier = "):
|
||||
tier = where.split("'")[1]
|
||||
conditions.append(FieldCondition(key="tier", match=MatchValue(value=tier)))
|
||||
where_filter = self._parse_where(where)
|
||||
if where_filter is not None:
|
||||
conditions.extend(where_filter.must)
|
||||
qdrant_filter = Filter(must=conditions) if conditions else None
|
||||
|
||||
points, next_offset = self._client.scroll(
|
||||
|
|
@ -712,7 +769,6 @@ class QdrantStore:
|
|||
with_vectors=False,
|
||||
)
|
||||
for point in points:
|
||||
# Filter to requested columns
|
||||
row = {k: v for k, v in point.payload.items() if k in columns}
|
||||
if "id" in columns:
|
||||
row["id"] = point.id
|
||||
|
|
@ -1349,7 +1405,14 @@ class QdrantStore:
|
|||
return pd.DataFrame(columns=["src", "dst", "edge_type", "weight", "updated_at"])
|
||||
|
||||
def records_as_dataframe(self) -> "pd.DataFrame":
|
||||
"""Return all records from the records collection as a pandas DataFrame."""
|
||||
"""Return all records from the records collection as a pandas DataFrame.
|
||||
|
||||
Column types match the LanceDB schema:
|
||||
- timestamps are datetime objects (not str)
|
||||
- structure_hv is bytes (not hex string)
|
||||
- provenance_json is AES-256-GCM ciphertext (not plaintext JSON)
|
||||
- tags_json is a JSON string
|
||||
"""
|
||||
try:
|
||||
records = self.all_records()
|
||||
if not records:
|
||||
|
|
@ -1360,10 +1423,13 @@ class QdrantStore:
|
|||
"stability", "difficulty", "last_reviewed",
|
||||
"never_decay", "never_merge", "detail_level",
|
||||
"s5_trust_score", "structure_hv",
|
||||
"provenance_json", "created_at", "schema_version",
|
||||
"provenance_json", "created_at", "updated_at", "schema_version",
|
||||
])
|
||||
rows = []
|
||||
for r in records:
|
||||
# Re-encrypt provenance to match LanceDB's ciphertext column.
|
||||
prov_plain = json.dumps(r.provenance or [])
|
||||
prov_ct = self._encrypt_for_record(r.id, prov_plain)
|
||||
rows.append({
|
||||
"id": str(r.id),
|
||||
"tier": r.tier,
|
||||
|
|
@ -1372,19 +1438,20 @@ class QdrantStore:
|
|||
"community_id": str(r.community_id) if r.community_id else None,
|
||||
"centrality": r.centrality,
|
||||
"pinned": r.pinned,
|
||||
"tags_json": r.tags_json if hasattr(r, "tags_json") else "[]",
|
||||
"tags_json": json.dumps(r.tags),
|
||||
"language": r.language,
|
||||
"aaak_index": r.aaak_index,
|
||||
"stability": r.stability,
|
||||
"difficulty": r.difficulty,
|
||||
"last_reviewed": str(r.last_reviewed) if r.last_reviewed else None,
|
||||
"last_reviewed": r.last_reviewed,
|
||||
"never_decay": r.never_decay,
|
||||
"never_merge": r.never_merge,
|
||||
"detail_level": r.detail_level,
|
||||
"s5_trust_score": r.s5_trust_score,
|
||||
"structure_hv": r.structure_hv.hex() if r.structure_hv else "",
|
||||
"provenance_json": json.dumps(r.provenance) if r.provenance else "[]",
|
||||
"created_at": str(r.created_at) if r.created_at else None,
|
||||
"structure_hv": bytes(r.structure_hv) if r.structure_hv else b"",
|
||||
"provenance_json": prov_ct,
|
||||
"created_at": r.created_at,
|
||||
"updated_at": r.updated_at,
|
||||
"schema_version": r.schema_version,
|
||||
})
|
||||
return pd.DataFrame(rows)
|
||||
|
|
@ -1396,7 +1463,7 @@ class QdrantStore:
|
|||
"stability", "difficulty", "last_reviewed",
|
||||
"never_decay", "never_merge", "detail_level",
|
||||
"s5_trust_score", "structure_hv",
|
||||
"provenance_json", "created_at", "schema_version",
|
||||
"provenance_json", "created_at", "updated_at", "schema_version",
|
||||
])
|
||||
|
||||
# ------------------------------------------------------------------ db shim
|
||||
|
|
@ -1674,6 +1741,15 @@ class QdrantStore:
|
|||
if isinstance(ts_val, datetime)
|
||||
else str(ts_val) if ts_val else ""
|
||||
)
|
||||
# Compute ts_epoch for numeric range filtering (matches events_add)
|
||||
ts_epoch = None
|
||||
if isinstance(ts_val, datetime):
|
||||
ts_epoch = ts_val.timestamp()
|
||||
elif ts_str:
|
||||
try:
|
||||
ts_epoch = datetime.fromisoformat(ts_str).timestamp()
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
points.append(PointStruct(
|
||||
id=str(row.get("id", uuid4())),
|
||||
vector={},
|
||||
|
|
@ -1685,6 +1761,7 @@ class QdrantStore:
|
|||
"severity": row.get("severity") or "",
|
||||
"domain": row.get("domain") or "",
|
||||
"ts": ts_str,
|
||||
"ts_epoch": ts_epoch,
|
||||
"data_json": row.get("data_json", ""),
|
||||
"session_id": row.get("session_id", "-"),
|
||||
"source_ids_json": row.get("source_ids_json", "[]"),
|
||||
|
|
@ -1747,105 +1824,70 @@ class QdrantStore:
|
|||
)
|
||||
|
||||
def _add_records(self, rows: list[dict]) -> None:
|
||||
"""Insert record rows — converts dicts to PointStruct via _to_point."""
|
||||
"""Insert record rows — builds PointStruct directly from row dicts.
|
||||
|
||||
The row dicts come from _to_row() which already contains
|
||||
encrypted ciphertext for literal_surface / provenance_json /
|
||||
profile_modulation_gain_json. Building a MemoryRecord and
|
||||
passing it through _to_point would double-encrypt those fields,
|
||||
so we construct the PointStruct payload directly here.
|
||||
"""
|
||||
points = []
|
||||
for row in rows:
|
||||
# Build a minimal MemoryRecord from the dict so _to_point works
|
||||
from uuid import UUID as _UUID
|
||||
rec = MemoryRecord(
|
||||
id=_UUID(row["id"]),
|
||||
tier=row.get("tier", "episodic"),
|
||||
literal_surface="",
|
||||
aaak_index=row.get("aaak_index", ""),
|
||||
embedding=list(row.get("embedding", [0.0] * self._store._embed_dim)),
|
||||
community_id=_UUID(row["community_id"]) if row.get("community_id") else None,
|
||||
centrality=float(row.get("centrality", 0.0)),
|
||||
detail_level=int(row.get("detail_level", 1)),
|
||||
pinned=bool(row.get("pinned", False)),
|
||||
stability=float(row.get("stability", 0.0)),
|
||||
difficulty=float(row.get("difficulty", 0.0)),
|
||||
last_reviewed=None,
|
||||
never_decay=bool(row.get("never_decay", False)),
|
||||
never_merge=bool(row.get("never_merge", False)),
|
||||
provenance=[],
|
||||
created_at=None,
|
||||
updated_at=None,
|
||||
tags=json.loads(row.get("tags_json") or "[]"),
|
||||
language=row.get("language", "en"),
|
||||
s5_trust_score=float(row.get("s5_trust_score", 0.5)),
|
||||
profile_modulation_gain={},
|
||||
schema_version=int(row.get("schema_version", 1)),
|
||||
structure_hv=b"",
|
||||
)
|
||||
points.append(self._store._to_point(rec))
|
||||
def _ts(val):
|
||||
if val is None:
|
||||
return None
|
||||
if isinstance(val, datetime):
|
||||
return val.isoformat()
|
||||
return str(val)
|
||||
|
||||
# Decode structure_hv from bytes (LanceDB pa.binary → bytes)
|
||||
structure_raw = row.get("structure_hv")
|
||||
if isinstance(structure_raw, (bytes, bytearray)):
|
||||
structure_b64 = base64.b64encode(structure_raw).decode("ascii")
|
||||
elif isinstance(structure_raw, str):
|
||||
# Already base64 or hex — keep as-is
|
||||
structure_b64 = structure_raw
|
||||
else:
|
||||
structure_b64 = ""
|
||||
|
||||
points.append(PointStruct(
|
||||
id=str(row.get("id", uuid4())),
|
||||
vector=list(row.get("embedding", [0.0] * self._store._embed_dim)),
|
||||
payload={
|
||||
"table": RECORDS_TABLE,
|
||||
"group_id": self._store._group_id,
|
||||
"tier": row.get("tier", "episodic"),
|
||||
"literal_surface": row.get("literal_surface", ""),
|
||||
"aaak_index": row.get("aaak_index", ""),
|
||||
"structure_hv": structure_b64,
|
||||
"community_id": row.get("community_id", ""),
|
||||
"centrality": float(row.get("centrality", 0.0)),
|
||||
"detail_level": int(row.get("detail_level", 1)),
|
||||
"pinned": bool(row.get("pinned", False)),
|
||||
"stability": float(row.get("stability", 0.0)),
|
||||
"difficulty": float(row.get("difficulty", 0.0)),
|
||||
"last_reviewed": _ts(row.get("last_reviewed")),
|
||||
"never_decay": bool(row.get("never_decay", False)),
|
||||
"never_merge": bool(row.get("never_merge", False)),
|
||||
"provenance_json": row.get("provenance_json", "[]"),
|
||||
"created_at": _ts(row.get("created_at")),
|
||||
"updated_at": _ts(row.get("updated_at")),
|
||||
"tags_json": row.get("tags_json", "[]"),
|
||||
"language": str(row.get("language", "en")),
|
||||
"s5_trust_score": float(row.get("s5_trust_score", 0.5)),
|
||||
"profile_modulation_gain_json": row.get("profile_modulation_gain_json", "{}"),
|
||||
"schema_version": int(row.get("schema_version", 1)),
|
||||
},
|
||||
))
|
||||
if points:
|
||||
self._store._client.upsert(
|
||||
collection_name=RECORDS_TABLE, points=points,
|
||||
)
|
||||
|
||||
def _parse_where(self, where: str | None) -> Filter | None:
|
||||
"""Parse a LanceDB-style where clause into a Qdrant Filter.
|
||||
|
||||
Supported grammar:
|
||||
- ``key = 'value'`` or ``key = "value"`` → MatchValue
|
||||
- ``key > N`` or ``key >= N`` → Range(gt/gte)
|
||||
- ``key < N`` or ``key <= N`` → Range(lt/lte)
|
||||
- Multiple clauses joined by AND → Filter(must=[...])
|
||||
|
||||
Numeric values are auto-detected; string values must be
|
||||
quoted. Returns None if the clause is empty or cannot be parsed.
|
||||
"""
|
||||
if not where:
|
||||
return None
|
||||
|
||||
# Split on AND (case-insensitive) to get individual clauses.
|
||||
# Use regex to avoid splitting inside quoted strings.
|
||||
clauses = re.split(r'\s+AND\s+', where, flags=re.IGNORECASE)
|
||||
conditions: list = []
|
||||
|
||||
for clause in clauses:
|
||||
clause = clause.strip()
|
||||
if not clause:
|
||||
continue
|
||||
|
||||
# Try numeric comparison operators first: >, >=, <, <=
|
||||
m = re.match(
|
||||
r"""([a-z_]+)\s*(>=|<=|>|<)\s*(-?\d+(?:\.\d+)?)\s*$""",
|
||||
clause,
|
||||
)
|
||||
if m:
|
||||
key, op, val_str = m.group(1), m.group(2), m.group(3)
|
||||
val = float(val_str)
|
||||
if op == ">":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(gt=val)))
|
||||
elif op == ">=":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(gte=val)))
|
||||
elif op == "<":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(lt=val)))
|
||||
elif op == "<=":
|
||||
conditions.append(FieldCondition(key=key, range=models.Range(lte=val)))
|
||||
continue
|
||||
|
||||
# Try string equality: key = 'value' or key = "value"
|
||||
m = re.match(
|
||||
r"""([a-z_]+)\s*=\s*['"]([^'"]+)['"]\s*$""",
|
||||
clause,
|
||||
)
|
||||
if m:
|
||||
key, val = m.group(1), m.group(2)
|
||||
conditions.append(
|
||||
FieldCondition(key=key, match=MatchValue(value=val))
|
||||
)
|
||||
continue
|
||||
|
||||
# Unparseable clause — skip silently (LanceDB would too).
|
||||
continue
|
||||
|
||||
if not conditions:
|
||||
return None
|
||||
if len(conditions) == 1:
|
||||
return Filter(must=conditions)
|
||||
return Filter(must=conditions)
|
||||
"""Delegate to QdrantStore._parse_where (single source of truth)."""
|
||||
return QdrantStore._parse_where(where)
|
||||
|
||||
@property
|
||||
def db(self) -> "QdrantStore._DbShim":
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue