CLI auth migration, document embeddings core lifecycle (#913)

Migrate get_kg_core and put_kg_core CLI tools to use Api/SocketClient
with first-frame auth (fixes broken raw websocket path). Fix wire
format field names (root/vector). Remove ~600 lines of dead raw
websocket code from invoke_graph_rag.py.

Add document embeddings core lifecycle to the knowledge service:
list/get/put/delete/load operations across schema, translator,
Cassandra table store, knowledge manager, gateway registry, REST API,
socket client, and CLI (tg-get-de-core, tg-put-de-core).

Fix delete_kg_core to also clean up document embeddings rows.
This commit is contained in:
cybermaggedon 2026-05-14 10:30:21 +01:00 committed by GitHub
parent dd974b0cac
commit f0ad282708
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 762 additions and 825 deletions

View file

@ -1,6 +1,7 @@
from typing import Dict, Any, Tuple, Optional
from ...schema import (
KnowledgeRequest, KnowledgeResponse, Triples, GraphEmbeddings,
DocumentEmbeddings, ChunkEmbeddings,
Metadata, EntityEmbeddings
)
from .base import MessageTranslator
@ -43,6 +44,23 @@ class KnowledgeRequestTranslator(MessageTranslator):
]
)
document_embeddings = None
if "document-embeddings" in data:
document_embeddings = DocumentEmbeddings(
metadata=Metadata(
id=data["document-embeddings"]["metadata"]["id"],
root=data["document-embeddings"]["metadata"].get("root", ""),
collection=data["document-embeddings"]["metadata"]["collection"]
),
chunks=[
ChunkEmbeddings(
chunk_id=ch["chunk_id"],
vector=ch["vector"],
)
for ch in data["document-embeddings"]["chunks"]
]
)
return KnowledgeRequest(
operation=data.get("operation"),
id=data.get("id"),
@ -50,6 +68,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
collection=data.get("collection"),
triples=triples,
graph_embeddings=graph_embeddings,
document_embeddings=document_embeddings,
)
def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]:
@ -90,6 +109,22 @@ class KnowledgeRequestTranslator(MessageTranslator):
],
}
if obj.document_embeddings:
result["document-embeddings"] = {
"metadata": {
"id": obj.document_embeddings.metadata.id,
"root": obj.document_embeddings.metadata.root,
"collection": obj.document_embeddings.metadata.collection,
},
"chunks": [
{
"chunk_id": ch.chunk_id,
"vector": ch.vector,
}
for ch in obj.document_embeddings.chunks
],
}
return result
@ -140,6 +175,25 @@ class KnowledgeResponseTranslator(MessageTranslator):
}
}
# Streaming document embeddings response
if obj.document_embeddings:
return {
"document-embeddings": {
"metadata": {
"id": obj.document_embeddings.metadata.id,
"root": obj.document_embeddings.metadata.root,
"collection": obj.document_embeddings.metadata.collection,
},
"chunks": [
{
"chunk_id": ch.chunk_id,
"vector": ch.vector,
}
for ch in obj.document_embeddings.chunks
],
}
}
# End of stream marker
if obj.eos is True:
return {"eos": True}
@ -155,7 +209,7 @@ class KnowledgeResponseTranslator(MessageTranslator):
is_final = (
obj.ids is not None or # List response
obj.eos is True or # End of stream
(not obj.triples and not obj.graph_embeddings) # Empty response
(not obj.triples and not obj.graph_embeddings and not obj.document_embeddings) # Empty response
)
return response, is_final