mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-16 19:05:14 +02:00
CLI auth migration, document embeddings core lifecycle (#913)
Migrate get_kg_core and put_kg_core CLI tools to use Api/SocketClient with first-frame auth (fixes broken raw websocket path). Fix wire format field names (root/vector). Remove ~600 lines of dead raw websocket code from invoke_graph_rag.py. Add document embeddings core lifecycle to the knowledge service: list/get/put/delete/load operations across schema, translator, Cassandra table store, knowledge manager, gateway registry, REST API, socket client, and CLI (tg-get-de-core, tg-put-de-core). Fix delete_kg_core to also clean up document embeddings rows.
This commit is contained in:
parent
dd974b0cac
commit
f0ad282708
14 changed files with 762 additions and 825 deletions
|
|
@ -132,3 +132,34 @@ class Knowledge:
|
|||
|
||||
self.request(request = input)
|
||||
|
||||
def list_de_cores(self):
|
||||
|
||||
input = {
|
||||
"operation": "list-de-cores",
|
||||
"workspace": self.api.workspace,
|
||||
}
|
||||
|
||||
return self.request(request = input)["ids"]
|
||||
|
||||
def delete_de_core(self, id):
|
||||
|
||||
input = {
|
||||
"operation": "delete-de-core",
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
def load_de_core(self, id, flow="default", collection="default"):
|
||||
|
||||
input = {
|
||||
"operation": "load-de-core",
|
||||
"workspace": self.api.workspace,
|
||||
"id": id,
|
||||
"flow": flow,
|
||||
"collection": collection,
|
||||
}
|
||||
|
||||
self.request(request = input)
|
||||
|
||||
|
|
|
|||
|
|
@ -491,6 +491,58 @@ class SocketClient:
|
|||
triples=raw_triples,
|
||||
)
|
||||
|
||||
def get_kg_core(self, id: str) -> Iterator[Dict[str, Any]]:
|
||||
request = {
|
||||
"operation": "get-kg-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
for response in self._send_request_sync(
|
||||
"knowledge", None, request, streaming_raw=True,
|
||||
):
|
||||
if response.get("eos"):
|
||||
break
|
||||
yield response
|
||||
|
||||
def put_kg_core(
|
||||
self, id: str, triples=None, graph_embeddings=None,
|
||||
) -> Dict[str, Any]:
|
||||
request = {
|
||||
"operation": "put-kg-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
if triples is not None:
|
||||
request["triples"] = triples
|
||||
if graph_embeddings is not None:
|
||||
request["graph-embeddings"] = graph_embeddings
|
||||
return self._send_request_sync("knowledge", None, request)
|
||||
|
||||
def get_de_core(self, id: str) -> Iterator[Dict[str, Any]]:
|
||||
request = {
|
||||
"operation": "get-de-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
for response in self._send_request_sync(
|
||||
"knowledge", None, request, streaming_raw=True,
|
||||
):
|
||||
if response.get("eos"):
|
||||
break
|
||||
yield response
|
||||
|
||||
def put_de_core(
|
||||
self, id: str, document_embeddings=None,
|
||||
) -> Dict[str, Any]:
|
||||
request = {
|
||||
"operation": "put-de-core",
|
||||
"workspace": self.workspace,
|
||||
"id": id,
|
||||
}
|
||||
if document_embeddings is not None:
|
||||
request["document-embeddings"] = document_embeddings
|
||||
return self._send_request_sync("knowledge", None, request)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the persistent WebSocket connection."""
|
||||
if self._loop and not self._loop.is_closed():
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from typing import Dict, Any, Tuple, Optional
|
||||
from ...schema import (
|
||||
KnowledgeRequest, KnowledgeResponse, Triples, GraphEmbeddings,
|
||||
DocumentEmbeddings, ChunkEmbeddings,
|
||||
Metadata, EntityEmbeddings
|
||||
)
|
||||
from .base import MessageTranslator
|
||||
|
|
@ -43,6 +44,23 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
]
|
||||
)
|
||||
|
||||
document_embeddings = None
|
||||
if "document-embeddings" in data:
|
||||
document_embeddings = DocumentEmbeddings(
|
||||
metadata=Metadata(
|
||||
id=data["document-embeddings"]["metadata"]["id"],
|
||||
root=data["document-embeddings"]["metadata"].get("root", ""),
|
||||
collection=data["document-embeddings"]["metadata"]["collection"]
|
||||
),
|
||||
chunks=[
|
||||
ChunkEmbeddings(
|
||||
chunk_id=ch["chunk_id"],
|
||||
vector=ch["vector"],
|
||||
)
|
||||
for ch in data["document-embeddings"]["chunks"]
|
||||
]
|
||||
)
|
||||
|
||||
return KnowledgeRequest(
|
||||
operation=data.get("operation"),
|
||||
id=data.get("id"),
|
||||
|
|
@ -50,6 +68,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
collection=data.get("collection"),
|
||||
triples=triples,
|
||||
graph_embeddings=graph_embeddings,
|
||||
document_embeddings=document_embeddings,
|
||||
)
|
||||
|
||||
def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]:
|
||||
|
|
@ -90,6 +109,22 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
],
|
||||
}
|
||||
|
||||
if obj.document_embeddings:
|
||||
result["document-embeddings"] = {
|
||||
"metadata": {
|
||||
"id": obj.document_embeddings.metadata.id,
|
||||
"root": obj.document_embeddings.metadata.root,
|
||||
"collection": obj.document_embeddings.metadata.collection,
|
||||
},
|
||||
"chunks": [
|
||||
{
|
||||
"chunk_id": ch.chunk_id,
|
||||
"vector": ch.vector,
|
||||
}
|
||||
for ch in obj.document_embeddings.chunks
|
||||
],
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -140,6 +175,25 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
}
|
||||
}
|
||||
|
||||
# Streaming document embeddings response
|
||||
if obj.document_embeddings:
|
||||
return {
|
||||
"document-embeddings": {
|
||||
"metadata": {
|
||||
"id": obj.document_embeddings.metadata.id,
|
||||
"root": obj.document_embeddings.metadata.root,
|
||||
"collection": obj.document_embeddings.metadata.collection,
|
||||
},
|
||||
"chunks": [
|
||||
{
|
||||
"chunk_id": ch.chunk_id,
|
||||
"vector": ch.vector,
|
||||
}
|
||||
for ch in obj.document_embeddings.chunks
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
# End of stream marker
|
||||
if obj.eos is True:
|
||||
return {"eos": True}
|
||||
|
|
@ -155,7 +209,7 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
is_final = (
|
||||
obj.ids is not None or # List response
|
||||
obj.eos is True or # End of stream
|
||||
(not obj.triples and not obj.graph_embeddings) # Empty response
|
||||
(not obj.triples and not obj.graph_embeddings and not obj.document_embeddings) # Empty response
|
||||
)
|
||||
|
||||
return response, is_final
|
||||
|
|
@ -4,7 +4,7 @@ from ..core.topic import queue
|
|||
from ..core.metadata import Metadata
|
||||
from .document import Document, TextDocument
|
||||
from .graph import Triples
|
||||
from .embeddings import GraphEmbeddings
|
||||
from .embeddings import GraphEmbeddings, DocumentEmbeddings
|
||||
|
||||
# get-kg-core
|
||||
# -> (???)
|
||||
|
|
@ -41,6 +41,9 @@ class KnowledgeRequest:
|
|||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
|
||||
# put-de-core
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
|
||||
@dataclass
|
||||
class KnowledgeResponse:
|
||||
error: Error | None = None
|
||||
|
|
@ -48,6 +51,7 @@ class KnowledgeResponse:
|
|||
eos: bool = False # Indicates end of knowledge core stream
|
||||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
|
||||
knowledge_request_queue = queue('knowledge', cls='request')
|
||||
knowledge_response_queue = queue('knowledge', cls='response')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue