trustgraph/tests/unit/test_reliability/test_metadata_preservation.py
cybermaggedon 29b4300808
Updated test suite for explainability & provenance (#696)
* Provenance tests

* Embeddings tests

* Test librarian

* Test triples stream

* Test concurrency

* Entity centric graph writes

* Agent tool service tests

* Structured data tests

* RDF tests

* Addition LLM tests

* Reliability tests
2026-03-13 14:27:42 +00:00

144 lines
4.7 KiB
Python

"""
Tests for pipeline metadata preservation: DocumentMetadata and
ProcessingMetadata round-trip through translators, field preservation,
and default handling.
"""
import pytest
from trustgraph.schema import DocumentMetadata, ProcessingMetadata, Triple, Term, IRI
from trustgraph.messaging.translators.metadata import (
DocumentMetadataTranslator,
ProcessingMetadataTranslator,
)
# ---------------------------------------------------------------------------
# DocumentMetadata translator
# ---------------------------------------------------------------------------
class TestDocumentMetadataTranslator:
def setup_method(self):
self.tx = DocumentMetadataTranslator()
def test_full_round_trip(self):
data = {
"id": "doc-123",
"time": 1710000000,
"kind": "application/pdf",
"title": "Test Document",
"comments": "No comments",
"metadata": [],
"user": "alice",
"tags": ["finance", "q4"],
"parent-id": "doc-100",
"document-type": "page",
}
obj = self.tx.to_pulsar(data)
assert obj.id == "doc-123"
assert obj.time == 1710000000
assert obj.kind == "application/pdf"
assert obj.title == "Test Document"
assert obj.user == "alice"
assert obj.tags == ["finance", "q4"]
assert obj.parent_id == "doc-100"
assert obj.document_type == "page"
wire = self.tx.from_pulsar(obj)
assert wire["id"] == "doc-123"
assert wire["user"] == "alice"
assert wire["parent-id"] == "doc-100"
assert wire["document-type"] == "page"
def test_defaults_for_missing_fields(self):
obj = self.tx.to_pulsar({})
assert obj.parent_id == ""
assert obj.document_type == "source"
def test_metadata_triples_preserved(self):
triple_wire = [{
"s": {"t": "i", "i": "http://example.org/s"},
"p": {"t": "i", "i": "http://example.org/p"},
"o": {"t": "i", "i": "http://example.org/o"},
}]
data = {"metadata": triple_wire}
obj = self.tx.to_pulsar(data)
assert len(obj.metadata) == 1
assert obj.metadata[0].s.iri == "http://example.org/s"
def test_none_metadata_handled(self):
data = {"metadata": None}
obj = self.tx.to_pulsar(data)
assert obj.metadata == []
def test_empty_tags_preserved(self):
data = {"tags": []}
obj = self.tx.to_pulsar(data)
wire = self.tx.from_pulsar(obj)
assert wire["tags"] == []
def test_falsy_fields_omitted_from_wire(self):
"""Empty string fields should be omitted from wire format."""
obj = DocumentMetadata(id="", time=0, user="")
wire = self.tx.from_pulsar(obj)
assert "id" not in wire
assert "user" not in wire
# ---------------------------------------------------------------------------
# ProcessingMetadata translator
# ---------------------------------------------------------------------------
class TestProcessingMetadataTranslator:
def setup_method(self):
self.tx = ProcessingMetadataTranslator()
def test_full_round_trip(self):
data = {
"id": "proc-1",
"document-id": "doc-123",
"time": 1710000000,
"flow": "default",
"user": "alice",
"collection": "my-collection",
"tags": ["tag1"],
}
obj = self.tx.to_pulsar(data)
assert obj.id == "proc-1"
assert obj.document_id == "doc-123"
assert obj.flow == "default"
assert obj.user == "alice"
assert obj.collection == "my-collection"
assert obj.tags == ["tag1"]
wire = self.tx.from_pulsar(obj)
assert wire["id"] == "proc-1"
assert wire["document-id"] == "doc-123"
assert wire["user"] == "alice"
assert wire["collection"] == "my-collection"
def test_missing_fields_use_defaults(self):
obj = self.tx.to_pulsar({})
assert obj.id is None
assert obj.user is None
assert obj.collection is None
def test_tags_none_omitted(self):
obj = ProcessingMetadata(tags=None)
wire = self.tx.from_pulsar(obj)
assert "tags" not in wire
def test_tags_empty_list_preserved(self):
obj = ProcessingMetadata(tags=[])
wire = self.tx.from_pulsar(obj)
assert wire["tags"] == []
def test_user_and_collection_preserved(self):
"""Core pipeline routing fields must survive round-trip."""
data = {"user": "bob", "collection": "research"}
obj = self.tx.to_pulsar(data)
wire = self.tx.from_pulsar(obj)
assert wire["user"] == "bob"
assert wire["collection"] == "research"