mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
Fix Metadata/EntityEmbeddings schema migration tail and add regression tests (#777)
The Metadata dataclass dropped its `metadata: list[Triple]` field
and EntityEmbeddings/ChunkEmbeddings settled on a singular
`vector: list[float]` field, but several call sites kept passing
`Metadata(metadata=...)` and `EntityEmbeddings(vectors=...)`. The
bugs were latent until a websocket client first hit
`/api/v1/flow/default/import/entity-contexts`, at which point the
dispatcher TypeError'd on construction.
Production fixes (5 call sites on the same migration tail):
* trustgraph-flow gateway dispatchers entity_contexts_import.py
and graph_embeddings_import.py — drop the stale
Metadata(metadata=...) kwarg; switch graph_embeddings_import
to the singular `vector` wire key.
* trustgraph-base messaging translators knowledge.py and
document_loading.py — fix decode side to read the singular
`"vector"` key, matching what their own encode sides have
always written.
* trustgraph-flow tables/knowledge.py — fix Cassandra row
deserialiser to construct EntityEmbeddings(vector=...)
instead of vectors=.
* trustgraph-flow gateway core_import/core_export — switch the
kg-core msgpack wire format to the singular `"v"`/`"vector"`
key and drop the dead `m["m"]` envelope field that referenced
the removed Metadata.metadata triples list (it was a
guaranteed KeyError on the export side).
Defense-in-depth regression coverage (32 new tests across 7 files):
* tests/contract/test_schema_field_contracts.py — pin the field
set of Metadata, EntityEmbeddings, ChunkEmbeddings,
EntityContext so any future schema rename fails CI loudly
with a clear diff.
* tests/unit/test_translators/test_knowledge_translator_roundtrip.py
and test_document_embeddings_translator_roundtrip.py -
encode→decode round-trip the affected translators end to end,
locking in the singular `"vector"` wire key.
* tests/unit/test_gateway/test_entity_contexts_import_dispatcher.py
and test_graph_embeddings_import_dispatcher.py — exercise the
websocket dispatchers' receive() path with realistic
payloads, the direct regression test for the original
production crash.
* tests/unit/test_gateway/test_core_import_export_roundtrip.py
— pack/unpack the kg-core msgpack format through the real
dispatcher classes (with KnowledgeRequestor mocked),
including a full export→import round-trip.
* tests/unit/test_tables/test_knowledge_table_store.py —
exercise the Cassandra row → schema conversion via __new__ to
bypass the live cluster connection.
Also fixes an unrelated leaked-coroutine RuntimeWarning in
test_gateway/test_service.py::test_run_method_calls_web_run_app: the
mocked aiohttp.web.run_app now closes the coroutine that Api.run() hands
it, mirroring what the real run_app would do, instead of leaving it for
the GC to complain about.
This commit is contained in:
parent
0994d4b05f
commit
c23e28aa66
17 changed files with 1415 additions and 17 deletions
0
tests/unit/test_translators/__init__.py
Normal file
0
tests/unit/test_translators/__init__.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""
|
||||
Round-trip unit tests for DocumentEmbeddingsTranslator.
|
||||
|
||||
Regression coverage: a previous version of the decode side constructed
|
||||
ChunkEmbeddings(vectors=...) — the schema field is `vector` (singular),
|
||||
so any real DocumentEmbeddings message would crash on decode. The encode
|
||||
side already wrote `"vector"`, so encode→decode was asymmetric.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from trustgraph.messaging.translators.document_loading import (
|
||||
DocumentEmbeddingsTranslator,
|
||||
)
|
||||
from trustgraph.schema import (
|
||||
DocumentEmbeddings,
|
||||
ChunkEmbeddings,
|
||||
Metadata,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def translator():
|
||||
return DocumentEmbeddingsTranslator()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample():
|
||||
return DocumentEmbeddings(
|
||||
metadata=Metadata(
|
||||
id="doc-1",
|
||||
root="",
|
||||
user="alice",
|
||||
collection="testcoll",
|
||||
),
|
||||
chunks=[
|
||||
ChunkEmbeddings(chunk_id="c1", vector=[0.1, 0.2, 0.3]),
|
||||
ChunkEmbeddings(chunk_id="c2", vector=[0.4, 0.5, 0.6]),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class TestDocumentEmbeddingsTranslator:
|
||||
|
||||
def test_encode_uses_singular_vector_key(self, translator, sample):
|
||||
encoded = translator.encode(sample)
|
||||
chunks = encoded["chunks"]
|
||||
assert all("vector" in c for c in chunks)
|
||||
assert all("vectors" not in c for c in chunks)
|
||||
assert chunks[0]["vector"] == [0.1, 0.2, 0.3]
|
||||
|
||||
def test_roundtrip_preserves_document_embeddings(self, translator, sample):
|
||||
encoded = translator.encode(sample)
|
||||
decoded = translator.decode(encoded)
|
||||
|
||||
assert isinstance(decoded, DocumentEmbeddings)
|
||||
assert isinstance(decoded.metadata, Metadata)
|
||||
assert decoded.metadata.id == "doc-1"
|
||||
assert decoded.metadata.user == "alice"
|
||||
assert decoded.metadata.collection == "testcoll"
|
||||
|
||||
assert len(decoded.chunks) == 2
|
||||
assert decoded.chunks[0].chunk_id == "c1"
|
||||
assert decoded.chunks[0].vector == [0.1, 0.2, 0.3]
|
||||
assert decoded.chunks[1].chunk_id == "c2"
|
||||
assert decoded.chunks[1].vector == [0.4, 0.5, 0.6]
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
"""
|
||||
Round-trip unit tests for KnowledgeRequestTranslator.
|
||||
|
||||
Regression coverage: a previous version of the decode side constructed
|
||||
EntityEmbeddings(vectors=...) — the schema field is `vector` (singular),
|
||||
so any real graph-embeddings KnowledgeRequest would crash on first
|
||||
message. The encode side already wrote `"vector"`, so encode→decode was
|
||||
asymmetric.
|
||||
|
||||
These tests build a real KnowledgeRequest with graph-embeddings, encode
|
||||
it, decode the result, and assert the round-trip is lossless. They also
|
||||
exercise the triples path so any future schema drift in Metadata or
|
||||
Triples breaks the test.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from trustgraph.messaging.translators.knowledge import KnowledgeRequestTranslator
|
||||
from trustgraph.schema import (
|
||||
KnowledgeRequest,
|
||||
GraphEmbeddings,
|
||||
EntityEmbeddings,
|
||||
Triples,
|
||||
Triple,
|
||||
Metadata,
|
||||
Term,
|
||||
IRI,
|
||||
)
|
||||
|
||||
|
||||
def _term_iri(uri):
|
||||
return Term(type=IRI, iri=uri)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def translator():
|
||||
return KnowledgeRequestTranslator()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def graph_embeddings_request():
|
||||
return KnowledgeRequest(
|
||||
operation="put-kg-core",
|
||||
user="alice",
|
||||
id="doc-1",
|
||||
flow="default",
|
||||
collection="testcoll",
|
||||
graph_embeddings=GraphEmbeddings(
|
||||
metadata=Metadata(
|
||||
id="doc-1",
|
||||
root="",
|
||||
user="alice",
|
||||
collection="testcoll",
|
||||
),
|
||||
entities=[
|
||||
EntityEmbeddings(
|
||||
entity=_term_iri("http://example.org/alice"),
|
||||
vector=[0.1, 0.2, 0.3],
|
||||
),
|
||||
EntityEmbeddings(
|
||||
entity=_term_iri("http://example.org/bob"),
|
||||
vector=[0.4, 0.5, 0.6],
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def triples_request():
|
||||
return KnowledgeRequest(
|
||||
operation="put-kg-core",
|
||||
user="alice",
|
||||
id="doc-1",
|
||||
flow="default",
|
||||
collection="testcoll",
|
||||
triples=Triples(
|
||||
metadata=Metadata(
|
||||
id="doc-1",
|
||||
root="",
|
||||
user="alice",
|
||||
collection="testcoll",
|
||||
),
|
||||
triples=[
|
||||
Triple(
|
||||
s=_term_iri("http://example.org/alice"),
|
||||
p=_term_iri("http://example.org/knows"),
|
||||
o=_term_iri("http://example.org/bob"),
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class TestKnowledgeRequestTranslatorGraphEmbeddings:
|
||||
|
||||
def test_encode_produces_singular_vector_key(
|
||||
self, translator, graph_embeddings_request,
|
||||
):
|
||||
"""The wire key must be `vector`, never `vectors`."""
|
||||
encoded = translator.encode(graph_embeddings_request)
|
||||
entities = encoded["graph-embeddings"]["entities"]
|
||||
assert all("vector" in e for e in entities)
|
||||
assert all("vectors" not in e for e in entities)
|
||||
assert entities[0]["vector"] == [0.1, 0.2, 0.3]
|
||||
|
||||
def test_roundtrip_preserves_graph_embeddings(
|
||||
self, translator, graph_embeddings_request,
|
||||
):
|
||||
"""encode -> decode must be lossless for the GE branch."""
|
||||
encoded = translator.encode(graph_embeddings_request)
|
||||
decoded = translator.decode(encoded)
|
||||
|
||||
assert isinstance(decoded, KnowledgeRequest)
|
||||
assert decoded.operation == "put-kg-core"
|
||||
assert decoded.user == "alice"
|
||||
assert decoded.id == "doc-1"
|
||||
assert decoded.flow == "default"
|
||||
assert decoded.collection == "testcoll"
|
||||
|
||||
assert decoded.graph_embeddings is not None
|
||||
ge = decoded.graph_embeddings
|
||||
assert isinstance(ge, GraphEmbeddings)
|
||||
assert isinstance(ge.metadata, Metadata)
|
||||
assert ge.metadata.id == "doc-1"
|
||||
assert ge.metadata.user == "alice"
|
||||
assert ge.metadata.collection == "testcoll"
|
||||
|
||||
assert len(ge.entities) == 2
|
||||
assert ge.entities[0].vector == [0.1, 0.2, 0.3]
|
||||
assert ge.entities[1].vector == [0.4, 0.5, 0.6]
|
||||
assert ge.entities[0].entity.iri == "http://example.org/alice"
|
||||
assert ge.entities[1].entity.iri == "http://example.org/bob"
|
||||
|
||||
|
||||
class TestKnowledgeRequestTranslatorTriples:
|
||||
|
||||
def test_roundtrip_preserves_triples(self, translator, triples_request):
|
||||
encoded = translator.encode(triples_request)
|
||||
decoded = translator.decode(encoded)
|
||||
|
||||
assert isinstance(decoded, KnowledgeRequest)
|
||||
assert decoded.triples is not None
|
||||
assert isinstance(decoded.triples.metadata, Metadata)
|
||||
assert decoded.triples.metadata.id == "doc-1"
|
||||
assert decoded.triples.metadata.user == "alice"
|
||||
assert decoded.triples.metadata.collection == "testcoll"
|
||||
|
||||
assert len(decoded.triples.triples) == 1
|
||||
t = decoded.triples.triples[0]
|
||||
assert t.s.iri == "http://example.org/alice"
|
||||
assert t.p.iri == "http://example.org/knows"
|
||||
assert t.o.iri == "http://example.org/bob"
|
||||
Loading…
Add table
Add a link
Reference in a new issue