mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-21 04:38:07 +02:00
feat: complete knowledge core storage — named graphs, provenance, source material (#973)
Implements all three changes from the knowledge-core-completeness tech spec: 1. Named graph field preserved through Cassandra storage (7-element tuple), enabling provenance triples to retain their graph URIs on round-trip. 2. Provenance triples already arrive on triples-input — no routing change needed; Change 1 was sufficient. 3. Source material (library documents) streamed alongside triples and embeddings during core download/upload. The knowledge manager fetches the document hierarchy from the librarian on download and recreates it on upload, preserving the full provenance chain across instances.
This commit is contained in:
parent
aa158e1ba3
commit
6df7471a55
14 changed files with 1347 additions and 15 deletions
|
|
@ -502,6 +502,7 @@ class SocketClient:
|
|||
|
||||
def put_kg_core(
|
||||
self, id: str, triples=None, graph_embeddings=None,
|
||||
library_metadata=None, library_blob=None,
|
||||
) -> Dict[str, Any]:
|
||||
request = {
|
||||
"operation": "put-kg-core",
|
||||
|
|
@ -512,6 +513,10 @@ class SocketClient:
|
|||
request["triples"] = triples
|
||||
if graph_embeddings is not None:
|
||||
request["graph-embeddings"] = graph_embeddings
|
||||
if library_metadata is not None:
|
||||
request["library-metadata"] = library_metadata
|
||||
if library_blob is not None:
|
||||
request["library-blob"] = library_blob
|
||||
return self._send_request_sync("knowledge", None, request)
|
||||
|
||||
def get_de_core(self, id: str) -> Iterator[Dict[str, Any]]:
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@ from typing import Dict, Any, Tuple, Optional
|
|||
from ...schema import (
|
||||
KnowledgeRequest, KnowledgeResponse, Triples, GraphEmbeddings,
|
||||
DocumentEmbeddings, ChunkEmbeddings,
|
||||
Metadata, EntityEmbeddings
|
||||
Metadata, EntityEmbeddings,
|
||||
LibraryMetadata, LibraryBlob,
|
||||
)
|
||||
from .base import MessageTranslator
|
||||
from .primitives import ValueTranslator, SubgraphTranslator
|
||||
|
|
@ -61,6 +62,27 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
]
|
||||
)
|
||||
|
||||
library_metadata = None
|
||||
if "library-metadata" in data:
|
||||
lm = data["library-metadata"]
|
||||
library_metadata = LibraryMetadata(
|
||||
id=lm.get("id", ""),
|
||||
kind=lm.get("kind", ""),
|
||||
title=lm.get("title", ""),
|
||||
parent_id=lm.get("parent-id", ""),
|
||||
document_type=lm.get("document-type", ""),
|
||||
comments=lm.get("comments", ""),
|
||||
tags=lm.get("tags", []),
|
||||
)
|
||||
|
||||
library_blob = None
|
||||
if "library-blob" in data:
|
||||
lb = data["library-blob"]
|
||||
library_blob = LibraryBlob(
|
||||
id=lb.get("id", ""),
|
||||
data=lb.get("data", b""),
|
||||
)
|
||||
|
||||
return KnowledgeRequest(
|
||||
operation=data.get("operation"),
|
||||
id=data.get("id"),
|
||||
|
|
@ -69,6 +91,8 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
triples=triples,
|
||||
graph_embeddings=graph_embeddings,
|
||||
document_embeddings=document_embeddings,
|
||||
library_metadata=library_metadata,
|
||||
library_blob=library_blob,
|
||||
)
|
||||
|
||||
def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]:
|
||||
|
|
@ -125,6 +149,26 @@ class KnowledgeRequestTranslator(MessageTranslator):
|
|||
],
|
||||
}
|
||||
|
||||
if obj.library_metadata:
|
||||
result["library-metadata"] = {
|
||||
"id": obj.library_metadata.id,
|
||||
"kind": obj.library_metadata.kind,
|
||||
"title": obj.library_metadata.title,
|
||||
"parent-id": obj.library_metadata.parent_id,
|
||||
"document-type": obj.library_metadata.document_type,
|
||||
"comments": obj.library_metadata.comments,
|
||||
"tags": obj.library_metadata.tags,
|
||||
}
|
||||
|
||||
if obj.library_blob:
|
||||
data = obj.library_blob.data
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode("utf-8")
|
||||
result["library-blob"] = {
|
||||
"id": obj.library_blob.id,
|
||||
"data": data,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -194,6 +238,32 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
}
|
||||
}
|
||||
|
||||
# Streaming library metadata response
|
||||
if obj.library_metadata:
|
||||
return {
|
||||
"library-metadata": {
|
||||
"id": obj.library_metadata.id,
|
||||
"kind": obj.library_metadata.kind,
|
||||
"title": obj.library_metadata.title,
|
||||
"parent-id": obj.library_metadata.parent_id,
|
||||
"document-type": obj.library_metadata.document_type,
|
||||
"comments": obj.library_metadata.comments,
|
||||
"tags": obj.library_metadata.tags,
|
||||
}
|
||||
}
|
||||
|
||||
# Streaming library blob response
|
||||
if obj.library_blob:
|
||||
data = obj.library_blob.data
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode("utf-8")
|
||||
return {
|
||||
"library-blob": {
|
||||
"id": obj.library_blob.id,
|
||||
"data": data,
|
||||
}
|
||||
}
|
||||
|
||||
# End of stream marker
|
||||
if obj.eos is True:
|
||||
return {"eos": True}
|
||||
|
|
@ -209,7 +279,9 @@ class KnowledgeResponseTranslator(MessageTranslator):
|
|||
is_final = (
|
||||
obj.ids is not None or # List response
|
||||
obj.eos is True or # End of stream
|
||||
(not obj.triples and not obj.graph_embeddings and not obj.document_embeddings) # Empty response
|
||||
(not obj.triples and not obj.graph_embeddings
|
||||
and not obj.document_embeddings
|
||||
and not obj.library_metadata and not obj.library_blob) # Empty response
|
||||
)
|
||||
|
||||
return response, is_final
|
||||
|
|
@ -21,6 +21,21 @@ from .embeddings import GraphEmbeddings, DocumentEmbeddings
|
|||
# <- ()
|
||||
# <- (error)
|
||||
|
||||
@dataclass
|
||||
class LibraryMetadata:
|
||||
id: str = ""
|
||||
kind: str = ""
|
||||
title: str = ""
|
||||
parent_id: str = ""
|
||||
document_type: str = ""
|
||||
comments: str = ""
|
||||
tags: list[str] = field(default_factory=list)
|
||||
|
||||
@dataclass
|
||||
class LibraryBlob:
|
||||
id: str = ""
|
||||
data: bytes = b""
|
||||
|
||||
@dataclass
|
||||
class KnowledgeRequest:
|
||||
# get-kg-core, delete-kg-core, list-kg-cores, put-kg-core
|
||||
|
|
@ -44,6 +59,10 @@ class KnowledgeRequest:
|
|||
# put-de-core
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
|
||||
# put-kg-core (source material)
|
||||
library_metadata: LibraryMetadata | None = None
|
||||
library_blob: LibraryBlob | None = None
|
||||
|
||||
@dataclass
|
||||
class KnowledgeResponse:
|
||||
error: Error | None = None
|
||||
|
|
@ -52,6 +71,8 @@ class KnowledgeResponse:
|
|||
triples: Triples | None = None
|
||||
graph_embeddings: GraphEmbeddings | None = None
|
||||
document_embeddings: DocumentEmbeddings | None = None
|
||||
library_metadata: LibraryMetadata | None = None
|
||||
library_blob: LibraryBlob | None = None
|
||||
|
||||
knowledge_request_queue = queue('knowledge', cls='request')
|
||||
knowledge_response_queue = queue('knowledge', cls='response')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue