feat: complete knowledge core storage — named graphs, provenance, source material (#973)

Implements all three changes from the knowledge-core-completeness tech spec:

1. Named graph field preserved through Cassandra storage (7-element tuple),
   enabling provenance triples to retain their graph URIs on round-trip.

2. Provenance triples already arrive on triples-input — no routing change
   needed; Change 1 was sufficient.

3. Source material (library documents) streamed alongside triples and
   embeddings during core download/upload. The knowledge manager fetches
   the document hierarchy from the librarian on download and recreates it
   on upload, preserving the full provenance chain across instances.
This commit is contained in:
cybermaggedon 2026-06-03 10:46:52 +01:00 committed by GitHub
parent aa158e1ba3
commit 6df7471a55
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 1347 additions and 15 deletions

View file

@ -1,5 +1,6 @@
"""
Round-trip unit tests for KnowledgeRequestTranslator.
Round-trip unit tests for KnowledgeRequestTranslator and
KnowledgeResponseTranslator.
Regression coverage: a previous version of the decode side constructed
EntityEmbeddings(vectors=...) the schema field is `vector` (singular),
@ -15,9 +16,13 @@ Triples breaks the test.
import pytest
from trustgraph.messaging.translators.knowledge import KnowledgeRequestTranslator
from trustgraph.messaging.translators.knowledge import (
KnowledgeRequestTranslator,
KnowledgeResponseTranslator,
)
from trustgraph.schema import (
KnowledgeRequest,
KnowledgeResponse,
GraphEmbeddings,
EntityEmbeddings,
Triples,
@ -25,6 +30,8 @@ from trustgraph.schema import (
Metadata,
Term,
IRI,
LibraryMetadata,
LibraryBlob,
)
@ -145,3 +152,161 @@ class TestKnowledgeRequestTranslatorTriples:
assert t.s.iri == "http://example.org/alice"
assert t.p.iri == "http://example.org/knows"
assert t.o.iri == "http://example.org/bob"
class TestKnowledgeRequestTranslatorLibrary:
def test_roundtrip_preserves_library_metadata(self, translator):
request = KnowledgeRequest(
operation="put-kg-core",
id="doc-1",
library_metadata=LibraryMetadata(
id="doc-1",
kind="application/pdf",
title="Test Document",
parent_id="",
document_type="source",
comments="test comments",
tags=["tag1", "tag2"],
),
)
encoded = translator.encode(request)
assert "library-metadata" in encoded
lm = encoded["library-metadata"]
assert lm["id"] == "doc-1"
assert lm["kind"] == "application/pdf"
assert lm["title"] == "Test Document"
assert lm["parent-id"] == ""
assert lm["document-type"] == "source"
assert lm["comments"] == "test comments"
assert lm["tags"] == ["tag1", "tag2"]
decoded = translator.decode(encoded)
assert decoded.library_metadata is not None
assert decoded.library_metadata.id == "doc-1"
assert decoded.library_metadata.kind == "application/pdf"
assert decoded.library_metadata.title == "Test Document"
assert decoded.library_metadata.parent_id == ""
assert decoded.library_metadata.document_type == "source"
assert decoded.library_metadata.comments == "test comments"
assert decoded.library_metadata.tags == ["tag1", "tag2"]
def test_roundtrip_preserves_child_document_metadata(self, translator):
request = KnowledgeRequest(
operation="put-kg-core",
id="doc-1",
library_metadata=LibraryMetadata(
id="chunk-1",
kind="text/plain",
title="Chunk 1",
parent_id="doc-1",
document_type="chunk",
),
)
encoded = translator.encode(request)
decoded = translator.decode(encoded)
assert decoded.library_metadata.parent_id == "doc-1"
assert decoded.library_metadata.document_type == "chunk"
def test_roundtrip_preserves_library_blob(self, translator):
request = KnowledgeRequest(
operation="put-kg-core",
id="doc-1",
library_blob=LibraryBlob(
id="doc-1",
data=b"SGVsbG8gV29ybGQ=",
),
)
encoded = translator.encode(request)
assert "library-blob" in encoded
assert encoded["library-blob"]["id"] == "doc-1"
assert encoded["library-blob"]["data"] == "SGVsbG8gV29ybGQ="
decoded = translator.decode(encoded)
assert decoded.library_blob is not None
assert decoded.library_blob.id == "doc-1"
assert decoded.library_blob.data == "SGVsbG8gV29ybGQ="
def test_absent_library_fields_decode_as_none(self, translator):
decoded = translator.decode({
"operation": "get-kg-core",
"id": "doc-1",
})
assert decoded.library_metadata is None
assert decoded.library_blob is None
class TestKnowledgeResponseTranslatorLibrary:
@pytest.fixture
def response_translator(self):
return KnowledgeResponseTranslator()
def test_encode_library_metadata(self, response_translator):
response = KnowledgeResponse(
ids=None,
library_metadata=LibraryMetadata(
id="doc-1",
kind="application/pdf",
title="Test",
parent_id="",
document_type="source",
comments="",
tags=[],
),
)
encoded = response_translator.encode(response)
assert "library-metadata" in encoded
assert encoded["library-metadata"]["id"] == "doc-1"
assert encoded["library-metadata"]["kind"] == "application/pdf"
assert encoded["library-metadata"]["document-type"] == "source"
def test_encode_library_blob_bytes_to_string(self, response_translator):
response = KnowledgeResponse(
ids=None,
library_blob=LibraryBlob(
id="doc-1",
data=b"dGVzdCBkYXRh",
),
)
encoded = response_translator.encode(response)
assert "library-blob" in encoded
assert encoded["library-blob"]["id"] == "doc-1"
assert encoded["library-blob"]["data"] == "dGVzdCBkYXRh"
assert isinstance(encoded["library-blob"]["data"], str)
def test_encode_library_blob_string_passthrough(self, response_translator):
response = KnowledgeResponse(
ids=None,
library_blob=LibraryBlob(
id="doc-1",
data="already-a-string",
),
)
encoded = response_translator.encode(response)
assert encoded["library-blob"]["data"] == "already-a-string"
def test_library_metadata_is_not_final(self, response_translator):
response = KnowledgeResponse(
ids=None,
library_metadata=LibraryMetadata(id="doc-1"),
)
_, is_final = response_translator.encode_with_completion(response)
assert is_final is False
def test_library_blob_is_not_final(self, response_translator):
response = KnowledgeResponse(
ids=None,
library_blob=LibraryBlob(id="doc-1", data=b"data"),
)
_, is_final = response_translator.encode_with_completion(response)
assert is_final is False
def test_eos_is_final(self, response_translator):
response = KnowledgeResponse(eos=True)
_, is_final = response_translator.encode_with_completion(response)
assert is_final is True