feat: complete knowledge core storage — named graphs, provenance, source material (#973)

Implements all three changes from the knowledge-core-completeness tech spec:

1. Named graph field preserved through Cassandra storage (7-element tuple),
   enabling provenance triples to retain their graph URIs on round-trip.

2. Provenance triples already arrive on triples-input — no routing change
   needed; Change 1 was sufficient.

3. Source material (library documents) streamed alongside triples and
   embeddings during core download/upload. The knowledge manager fetches
   the document hierarchy from the librarian on download and recreates it
   on upload, preserving the full provenance chain across instances.
This commit is contained in:
cybermaggedon 2026-06-03 10:46:52 +01:00 committed by GitHub
parent aa158e1ba3
commit 6df7471a55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 1347 additions and 15 deletions

View file

@ -21,6 +21,21 @@ from .embeddings import GraphEmbeddings, DocumentEmbeddings
# <- ()
# <- (error)
@dataclass
class LibraryMetadata:
id: str = ""
kind: str = ""
title: str = ""
parent_id: str = ""
document_type: str = ""
comments: str = ""
tags: list[str] = field(default_factory=list)
@dataclass
class LibraryBlob:
id: str = ""
data: bytes = b""
@dataclass
class KnowledgeRequest:
# get-kg-core, delete-kg-core, list-kg-cores, put-kg-core
@ -44,6 +59,10 @@ class KnowledgeRequest:
# put-de-core
document_embeddings: DocumentEmbeddings | None = None
# put-kg-core (source material)
library_metadata: LibraryMetadata | None = None
library_blob: LibraryBlob | None = None
@dataclass
class KnowledgeResponse:
error: Error | None = None
@ -52,6 +71,8 @@ class KnowledgeResponse:
triples: Triples | None = None
graph_embeddings: GraphEmbeddings | None = None
document_embeddings: DocumentEmbeddings | None = None
library_metadata: LibraryMetadata | None = None
library_blob: LibraryBlob | None = None
knowledge_request_queue = queue('knowledge', cls='request')
knowledge_response_queue = queue('knowledge', cls='response')