mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-13 08:45:13 +02:00
CLI auth migration, document embeddings core lifecycle (#913)
Migrate get_kg_core and put_kg_core CLI tools to use Api/SocketClient with first-frame auth (fixes broken raw websocket path). Fix wire format field names (root/vector). Remove ~600 lines of dead raw websocket code from invoke_graph_rag.py. Add document embeddings core lifecycle to the knowledge service: list/get/put/delete/load operations across schema, translator, Cassandra table store, knowledge manager, gateway registry, REST API, socket client, and CLI (tg-get-de-core, tg-put-de-core). Fix delete_kg_core to also clean up document embeddings rows.
This commit is contained in:
parent
dd974b0cac
commit
f0ad282708
14 changed files with 762 additions and 825 deletions
|
|
@ -1,5 +1,6 @@
|
|||
|
||||
from .. schema import KnowledgeResponse, Error, Triples, GraphEmbeddings
|
||||
from .. schema import DocumentEmbeddings
|
||||
from .. knowledge import hash
|
||||
from .. exceptions import RequestError
|
||||
from .. tables.knowledge import KnowledgeTableStore
|
||||
|
|
@ -157,6 +158,98 @@ class KnowledgeManager:
|
|||
)
|
||||
)
|
||||
|
||||
async def list_de_cores(self, request, respond, workspace):
|
||||
|
||||
ids = await self.table_store.list_de_cores(workspace)
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = ids,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def get_de_core(self, request, respond, workspace):
|
||||
|
||||
logger.info("Getting document embeddings core...")
|
||||
|
||||
async def publish_de(de):
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
document_embeddings = de,
|
||||
)
|
||||
)
|
||||
|
||||
await self.table_store.get_document_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_de,
|
||||
)
|
||||
|
||||
logger.debug("Document embeddings core retrieval complete")
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = True,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def put_de_core(self, request, respond, workspace):
|
||||
|
||||
if request.document_embeddings:
|
||||
await self.table_store.add_document_embeddings(
|
||||
workspace, request.document_embeddings
|
||||
)
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def delete_de_core(self, request, respond, workspace):
|
||||
|
||||
logger.info("Deleting document embeddings core...")
|
||||
|
||||
await self.table_store.delete_document_embeddings(
|
||||
workspace, request.id
|
||||
)
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None,
|
||||
)
|
||||
)
|
||||
|
||||
async def load_de_core(self, request, respond, workspace):
|
||||
|
||||
if self.background_task is None:
|
||||
self.background_task = asyncio.create_task(
|
||||
self.core_loader()
|
||||
)
|
||||
|
||||
await self.loader_queue.put((request, respond, workspace))
|
||||
|
||||
async def core_loader(self):
|
||||
|
||||
logger.info("Knowledge background processor running...")
|
||||
|
|
@ -165,7 +258,7 @@ class KnowledgeManager:
|
|||
logger.debug("Waiting for next load...")
|
||||
request, respond, workspace = await self.loader_queue.get()
|
||||
|
||||
logger.info(f"Loading knowledge: {request.id}")
|
||||
logger.info(f"Loading: {request.operation} {request.id}")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -187,25 +280,14 @@ class KnowledgeManager:
|
|||
if "interfaces" not in flow:
|
||||
raise RuntimeError("No defined interfaces")
|
||||
|
||||
if "triples-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no triples-store")
|
||||
|
||||
if "graph-embeddings-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no graph-embeddings-store")
|
||||
|
||||
t_q = flow["interfaces"]["triples-store"]["flow"]
|
||||
ge_q = flow["interfaces"]["graph-embeddings-store"]["flow"]
|
||||
|
||||
# Got this far, it should all work
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None
|
||||
if request.operation == "load-de-core":
|
||||
await self._load_de_core(
|
||||
request, respond, workspace, flow,
|
||||
)
|
||||
else:
|
||||
await self._load_kg_core(
|
||||
request, respond, workspace, flow,
|
||||
)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
|
|
@ -223,72 +305,145 @@ class KnowledgeManager:
|
|||
)
|
||||
)
|
||||
|
||||
|
||||
logger.debug("Starting knowledge loading process...")
|
||||
|
||||
try:
|
||||
|
||||
t_pub = None
|
||||
ge_pub = None
|
||||
|
||||
logger.debug(f"Triples queue: {t_q}")
|
||||
logger.debug(f"Graph embeddings queue: {ge_q}")
|
||||
|
||||
t_pub = Publisher(
|
||||
self.flow_config.pubsub, t_q,
|
||||
schema=Triples,
|
||||
)
|
||||
ge_pub = Publisher(
|
||||
self.flow_config.pubsub, ge_q,
|
||||
schema=GraphEmbeddings
|
||||
)
|
||||
|
||||
logger.debug("Starting publishers...")
|
||||
|
||||
await t_pub.start()
|
||||
await ge_pub.start()
|
||||
|
||||
async def publish_triples(t):
|
||||
# Override collection with request collection
|
||||
if hasattr(t, 'metadata') and hasattr(t.metadata, 'collection'):
|
||||
t.metadata.collection = request.collection or "default"
|
||||
await t_pub.send(None, t)
|
||||
|
||||
logger.debug("Publishing triples...")
|
||||
|
||||
await self.table_store.get_triples(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_triples,
|
||||
)
|
||||
|
||||
async def publish_ge(g):
|
||||
# Override collection with request collection
|
||||
if hasattr(g, 'metadata') and hasattr(g.metadata, 'collection'):
|
||||
g.metadata.collection = request.collection or "default"
|
||||
await ge_pub.send(None, g)
|
||||
|
||||
logger.debug("Publishing graph embeddings...")
|
||||
|
||||
await self.table_store.get_graph_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_ge,
|
||||
)
|
||||
|
||||
logger.debug("Knowledge loading completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
logger.debug("Stopping publishers...")
|
||||
|
||||
if t_pub: await t_pub.stop()
|
||||
if ge_pub: await ge_pub.stop()
|
||||
|
||||
logger.debug("Knowledge processing done")
|
||||
|
||||
continue
|
||||
|
||||
async def _load_kg_core(self, request, respond, workspace, flow):
|
||||
|
||||
if "triples-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no triples-store")
|
||||
|
||||
if "graph-embeddings-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no graph-embeddings-store")
|
||||
|
||||
t_q = flow["interfaces"]["triples-store"]["flow"]
|
||||
ge_q = flow["interfaces"]["graph-embeddings-store"]["flow"]
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None
|
||||
)
|
||||
)
|
||||
|
||||
t_pub = None
|
||||
ge_pub = None
|
||||
|
||||
try:
|
||||
|
||||
logger.debug(f"Triples queue: {t_q}")
|
||||
logger.debug(f"Graph embeddings queue: {ge_q}")
|
||||
|
||||
t_pub = Publisher(
|
||||
self.flow_config.pubsub, t_q,
|
||||
schema=Triples,
|
||||
)
|
||||
ge_pub = Publisher(
|
||||
self.flow_config.pubsub, ge_q,
|
||||
schema=GraphEmbeddings
|
||||
)
|
||||
|
||||
logger.debug("Starting publishers...")
|
||||
|
||||
await t_pub.start()
|
||||
await ge_pub.start()
|
||||
|
||||
async def publish_triples(t):
|
||||
if hasattr(t, 'metadata') and hasattr(t.metadata, 'collection'):
|
||||
t.metadata.collection = request.collection or "default"
|
||||
await t_pub.send(None, t)
|
||||
|
||||
logger.debug("Publishing triples...")
|
||||
|
||||
await self.table_store.get_triples(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_triples,
|
||||
)
|
||||
|
||||
async def publish_ge(g):
|
||||
if hasattr(g, 'metadata') and hasattr(g.metadata, 'collection'):
|
||||
g.metadata.collection = request.collection or "default"
|
||||
await ge_pub.send(None, g)
|
||||
|
||||
logger.debug("Publishing graph embeddings...")
|
||||
|
||||
await self.table_store.get_graph_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_ge,
|
||||
)
|
||||
|
||||
logger.debug("Knowledge core loading completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
logger.debug("Stopping publishers...")
|
||||
|
||||
if t_pub: await t_pub.stop()
|
||||
if ge_pub: await ge_pub.stop()
|
||||
|
||||
async def _load_de_core(self, request, respond, workspace, flow):
|
||||
|
||||
if "document-embeddings-store" not in flow["interfaces"]:
|
||||
raise RuntimeError("Flow has no document-embeddings-store")
|
||||
|
||||
de_q = flow["interfaces"]["document-embeddings-store"]["flow"]
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = None,
|
||||
ids = None,
|
||||
eos = False,
|
||||
triples = None,
|
||||
graph_embeddings = None
|
||||
)
|
||||
)
|
||||
|
||||
de_pub = None
|
||||
|
||||
try:
|
||||
|
||||
logger.debug(f"Document embeddings queue: {de_q}")
|
||||
|
||||
de_pub = Publisher(
|
||||
self.flow_config.pubsub, de_q,
|
||||
schema=DocumentEmbeddings,
|
||||
)
|
||||
|
||||
logger.debug("Starting publisher...")
|
||||
|
||||
await de_pub.start()
|
||||
|
||||
async def publish_de(de):
|
||||
if hasattr(de, 'metadata') and hasattr(de.metadata, 'collection'):
|
||||
de.metadata.collection = request.collection or "default"
|
||||
await de_pub.send(None, de)
|
||||
|
||||
logger.debug("Publishing document embeddings...")
|
||||
|
||||
await self.table_store.get_document_embeddings(
|
||||
workspace,
|
||||
request.id,
|
||||
publish_de,
|
||||
)
|
||||
|
||||
logger.debug("Document embeddings core loading completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
logger.debug("Stopping publisher...")
|
||||
|
||||
if de_pub: await de_pub.stop()
|
||||
|
|
|
|||
|
|
@ -187,6 +187,11 @@ class Processor(WorkspaceProcessor):
|
|||
"put-kg-core": self.knowledge.put_kg_core,
|
||||
"load-kg-core": self.knowledge.load_kg_core,
|
||||
"unload-kg-core": self.knowledge.unload_kg_core,
|
||||
"list-de-cores": self.knowledge.list_de_cores,
|
||||
"get-de-core": self.knowledge.get_de_core,
|
||||
"delete-de-core": self.knowledge.delete_de_core,
|
||||
"put-de-core": self.knowledge.put_de_core,
|
||||
"load-de-core": self.knowledge.load_de_core,
|
||||
}
|
||||
|
||||
if v.operation not in impls:
|
||||
|
|
|
|||
|
|
@ -457,6 +457,12 @@ for _op in ("put-kg-core", "delete-kg-core",
|
|||
"load-kg-core", "unload-kg-core"):
|
||||
_register_kind_op("knowledge", _op, "knowledge:write")
|
||||
|
||||
# knowledge: document-embeddings core service.
|
||||
for _op in ("get-de-core", "list-de-cores"):
|
||||
_register_kind_op("knowledge", _op, "knowledge:read")
|
||||
for _op in ("put-de-core", "delete-de-core", "load-de-core"):
|
||||
_register_kind_op("knowledge", _op, "knowledge:write")
|
||||
|
||||
|
||||
# collection-management: workspace collection lifecycle.
|
||||
_register_kind_op("collection-management", "list-collections", "collections:read")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
||||
from .. schema import Metadata, Term, IRI, LITERAL, GraphEmbeddings
|
||||
from .. schema import DocumentEmbeddings, ChunkEmbeddings
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
|
||||
|
|
@ -217,6 +218,16 @@ class KnowledgeTableStore:
|
|||
WHERE workspace = ? AND document_id = ?
|
||||
""")
|
||||
|
||||
self.delete_document_embeddings_stmt = self.cassandra.prepare("""
|
||||
DELETE FROM document_embeddings
|
||||
WHERE workspace = ? AND document_id = ?
|
||||
""")
|
||||
|
||||
self.list_de_cores_stmt = self.cassandra.prepare("""
|
||||
SELECT DISTINCT workspace, document_id FROM document_embeddings
|
||||
WHERE workspace = ?
|
||||
""")
|
||||
|
||||
async def add_triples(self, workspace, m):
|
||||
|
||||
when = int(time.time() * 1000)
|
||||
|
|
@ -338,6 +349,50 @@ class KnowledgeTableStore:
|
|||
logger.error("Exception occurred", exc_info=True)
|
||||
raise
|
||||
|
||||
try:
|
||||
await async_execute(
|
||||
self.cassandra,
|
||||
self.delete_document_embeddings_stmt,
|
||||
(workspace, document_id),
|
||||
)
|
||||
except Exception:
|
||||
logger.error("Exception occurred", exc_info=True)
|
||||
raise
|
||||
|
||||
async def delete_document_embeddings(self, workspace, document_id):
|
||||
|
||||
logger.debug("Delete document embeddings...")
|
||||
|
||||
try:
|
||||
await async_execute(
|
||||
self.cassandra,
|
||||
self.delete_document_embeddings_stmt,
|
||||
(workspace, document_id),
|
||||
)
|
||||
except Exception:
|
||||
logger.error("Exception occurred", exc_info=True)
|
||||
raise
|
||||
|
||||
async def list_de_cores(self, workspace):
|
||||
|
||||
logger.debug("List DE cores...")
|
||||
|
||||
try:
|
||||
rows = await async_execute(
|
||||
self.cassandra,
|
||||
self.list_de_cores_stmt,
|
||||
(workspace,),
|
||||
)
|
||||
except Exception:
|
||||
logger.error("Exception occurred", exc_info=True)
|
||||
raise
|
||||
|
||||
lst = [row[1] for row in rows]
|
||||
|
||||
logger.debug("Done")
|
||||
|
||||
return lst
|
||||
|
||||
async def get_triples(self, workspace, document_id, receiver):
|
||||
|
||||
logger.debug("Get triples...")
|
||||
|
|
@ -417,3 +472,42 @@ class KnowledgeTableStore:
|
|||
|
||||
logger.debug("Done")
|
||||
|
||||
async def get_document_embeddings(self, workspace, document_id, receiver):
|
||||
|
||||
logger.debug("Get DE...")
|
||||
|
||||
try:
|
||||
rows = await async_execute(
|
||||
self.cassandra,
|
||||
self.get_document_embeddings_stmt,
|
||||
(workspace, document_id),
|
||||
)
|
||||
except Exception:
|
||||
logger.error("Exception occurred", exc_info=True)
|
||||
raise
|
||||
|
||||
for row in rows:
|
||||
|
||||
if row[3]:
|
||||
chunks = [
|
||||
ChunkEmbeddings(
|
||||
chunk_id=ch[0],
|
||||
vector=ch[1],
|
||||
)
|
||||
for ch in row[3]
|
||||
]
|
||||
else:
|
||||
chunks = []
|
||||
|
||||
await receiver(
|
||||
DocumentEmbeddings(
|
||||
metadata = Metadata(
|
||||
id = document_id,
|
||||
collection = "default",
|
||||
),
|
||||
chunks = chunks
|
||||
)
|
||||
)
|
||||
|
||||
logger.debug("Done")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue