feat: complete knowledge core storage — named graphs, provenance, source material (#973)

Implements all three changes from the knowledge-core-completeness tech spec:

1. Named graph field preserved through Cassandra storage (7-element tuple),
   enabling provenance triples to retain their graph URIs on round-trip.

2. Provenance triples already arrive on triples-input — no routing change
   needed; Change 1 was sufficient.

3. Source material (library documents) streamed alongside triples and
   embeddings during core download/upload. The knowledge manager fetches
   the document hierarchy from the librarian on download and recreates it
   on upload, preserving the full provenance chain across instances.
This commit is contained in:
cybermaggedon 2026-06-03 10:46:52 +01:00 committed by GitHub
parent aa158e1ba3
commit 6df7471a55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 1347 additions and 15 deletions

View file

@ -502,6 +502,7 @@ class SocketClient:
def put_kg_core(
self, id: str, triples=None, graph_embeddings=None,
library_metadata=None, library_blob=None,
) -> Dict[str, Any]:
request = {
"operation": "put-kg-core",
@ -512,6 +513,10 @@ class SocketClient:
request["triples"] = triples
if graph_embeddings is not None:
request["graph-embeddings"] = graph_embeddings
if library_metadata is not None:
request["library-metadata"] = library_metadata
if library_blob is not None:
request["library-blob"] = library_blob
return self._send_request_sync("knowledge", None, request)
def get_de_core(self, id: str) -> Iterator[Dict[str, Any]]:

View file

@ -2,7 +2,8 @@ from typing import Dict, Any, Tuple, Optional
from ...schema import (
KnowledgeRequest, KnowledgeResponse, Triples, GraphEmbeddings,
DocumentEmbeddings, ChunkEmbeddings,
Metadata, EntityEmbeddings
Metadata, EntityEmbeddings,
LibraryMetadata, LibraryBlob,
)
from .base import MessageTranslator
from .primitives import ValueTranslator, SubgraphTranslator
@ -61,6 +62,27 @@ class KnowledgeRequestTranslator(MessageTranslator):
]
)
library_metadata = None
if "library-metadata" in data:
lm = data["library-metadata"]
library_metadata = LibraryMetadata(
id=lm.get("id", ""),
kind=lm.get("kind", ""),
title=lm.get("title", ""),
parent_id=lm.get("parent-id", ""),
document_type=lm.get("document-type", ""),
comments=lm.get("comments", ""),
tags=lm.get("tags", []),
)
library_blob = None
if "library-blob" in data:
lb = data["library-blob"]
library_blob = LibraryBlob(
id=lb.get("id", ""),
data=lb.get("data", b""),
)
return KnowledgeRequest(
operation=data.get("operation"),
id=data.get("id"),
@ -69,6 +91,8 @@ class KnowledgeRequestTranslator(MessageTranslator):
triples=triples,
graph_embeddings=graph_embeddings,
document_embeddings=document_embeddings,
library_metadata=library_metadata,
library_blob=library_blob,
)
def encode(self, obj: KnowledgeRequest) -> Dict[str, Any]:
@ -125,6 +149,26 @@ class KnowledgeRequestTranslator(MessageTranslator):
],
}
if obj.library_metadata:
result["library-metadata"] = {
"id": obj.library_metadata.id,
"kind": obj.library_metadata.kind,
"title": obj.library_metadata.title,
"parent-id": obj.library_metadata.parent_id,
"document-type": obj.library_metadata.document_type,
"comments": obj.library_metadata.comments,
"tags": obj.library_metadata.tags,
}
if obj.library_blob:
data = obj.library_blob.data
if isinstance(data, bytes):
data = data.decode("utf-8")
result["library-blob"] = {
"id": obj.library_blob.id,
"data": data,
}
return result
@ -194,6 +238,32 @@ class KnowledgeResponseTranslator(MessageTranslator):
}
}
# Streaming library metadata response
if obj.library_metadata:
return {
"library-metadata": {
"id": obj.library_metadata.id,
"kind": obj.library_metadata.kind,
"title": obj.library_metadata.title,
"parent-id": obj.library_metadata.parent_id,
"document-type": obj.library_metadata.document_type,
"comments": obj.library_metadata.comments,
"tags": obj.library_metadata.tags,
}
}
# Streaming library blob response
if obj.library_blob:
data = obj.library_blob.data
if isinstance(data, bytes):
data = data.decode("utf-8")
return {
"library-blob": {
"id": obj.library_blob.id,
"data": data,
}
}
# End of stream marker
if obj.eos is True:
return {"eos": True}
@ -209,7 +279,9 @@ class KnowledgeResponseTranslator(MessageTranslator):
is_final = (
obj.ids is not None or # List response
obj.eos is True or # End of stream
(not obj.triples and not obj.graph_embeddings and not obj.document_embeddings) # Empty response
(not obj.triples and not obj.graph_embeddings
and not obj.document_embeddings
and not obj.library_metadata and not obj.library_blob) # Empty response
)
return response, is_final

View file

@ -21,6 +21,21 @@ from .embeddings import GraphEmbeddings, DocumentEmbeddings
# <- ()
# <- (error)
@dataclass
class LibraryMetadata:
id: str = ""
kind: str = ""
title: str = ""
parent_id: str = ""
document_type: str = ""
comments: str = ""
tags: list[str] = field(default_factory=list)
@dataclass
class LibraryBlob:
id: str = ""
data: bytes = b""
@dataclass
class KnowledgeRequest:
# get-kg-core, delete-kg-core, list-kg-cores, put-kg-core
@ -44,6 +59,10 @@ class KnowledgeRequest:
# put-de-core
document_embeddings: DocumentEmbeddings | None = None
# put-kg-core (source material)
library_metadata: LibraryMetadata | None = None
library_blob: LibraryBlob | None = None
@dataclass
class KnowledgeResponse:
error: Error | None = None
@ -52,6 +71,8 @@ class KnowledgeResponse:
triples: Triples | None = None
graph_embeddings: GraphEmbeddings | None = None
document_embeddings: DocumentEmbeddings | None = None
library_metadata: LibraryMetadata | None = None
library_blob: LibraryBlob | None = None
knowledge_request_queue = queue('knowledge', cls='request')
knowledge_response_queue = queue('knowledge', cls='response')