mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-01 19:32:38 +02:00
Fix Metadata/EntityEmbeddings schema migration tail and add regression tests (#777)
The Metadata dataclass dropped its `metadata: list[Triple]` field
and EntityEmbeddings/ChunkEmbeddings settled on a singular
`vector: list[float]` field, but several call sites kept passing
`Metadata(metadata=...)` and `EntityEmbeddings(vectors=...)`. The
bugs were latent until a websocket client first hit
`/api/v1/flow/default/import/entity-contexts`, at which point the
dispatcher TypeError'd on construction.
Production fixes (5 call sites on the same migration tail):
* trustgraph-flow gateway dispatchers entity_contexts_import.py
and graph_embeddings_import.py — drop the stale
Metadata(metadata=...) kwarg; switch graph_embeddings_import
to the singular `vector` wire key.
* trustgraph-base messaging translators knowledge.py and
document_loading.py — fix decode side to read the singular
`"vector"` key, matching what their own encode sides have
always written.
* trustgraph-flow tables/knowledge.py — fix Cassandra row
deserialiser to construct EntityEmbeddings(vector=...)
instead of vectors=.
* trustgraph-flow gateway core_import/core_export — switch the
kg-core msgpack wire format to the singular `"v"`/`"vector"`
key and drop the dead `m["m"]` envelope field that referenced
the removed Metadata.metadata triples list (it was a
guaranteed KeyError on the export side).
Defense-in-depth regression coverage (32 new tests across 7 files):
* tests/contract/test_schema_field_contracts.py — pin the field
set of Metadata, EntityEmbeddings, ChunkEmbeddings,
EntityContext so any future schema rename fails CI loudly
with a clear diff.
* tests/unit/test_translators/test_knowledge_translator_roundtrip.py
and test_document_embeddings_translator_roundtrip.py -
encode→decode round-trip the affected translators end to end,
locking in the singular `"vector"` wire key.
* tests/unit/test_gateway/test_entity_contexts_import_dispatcher.py
and test_graph_embeddings_import_dispatcher.py — exercise the
websocket dispatchers' receive() path with realistic
payloads, the direct regression test for the original
production crash.
* tests/unit/test_gateway/test_core_import_export_roundtrip.py
— pack/unpack the kg-core msgpack format through the real
dispatcher classes (with KnowledgeRequestor mocked),
including a full export→import round-trip.
* tests/unit/test_tables/test_knowledge_table_store.py —
exercise the Cassandra row → schema conversion via __new__ to
bypass the live cluster connection.
Also fixes an unrelated leaked-coroutine RuntimeWarning in
test_gateway/test_service.py::test_run_method_calls_web_run_app: the
mocked aiohttp.web.run_app now closes the coroutine that Api.run() hands
it, mirroring what the real run_app would do, instead of leaving it for
the GC to complain about.
This commit is contained in:
parent
0994d4b05f
commit
c23e28aa66
17 changed files with 1415 additions and 17 deletions
73
tests/contract/test_schema_field_contracts.py
Normal file
73
tests/contract/test_schema_field_contracts.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
"""
|
||||
Contract tests for schema dataclass field sets.
|
||||
|
||||
These pin the *field names* of small, widely-constructed schema dataclasses
|
||||
so that any rename, removal, or accidental addition fails CI loudly instead
|
||||
of waiting for a runtime TypeError on the next websocket message.
|
||||
|
||||
Background: in v2.2 the `Metadata` dataclass dropped a `metadata: list[Triple]`
|
||||
field but several call sites kept passing `Metadata(metadata=...)`. The bug
|
||||
was only discovered when a websocket import dispatcher received its first
|
||||
real message in production. A trivial structural assertion of the kind
|
||||
below would have caught it at unit-test time.
|
||||
|
||||
Add to this file whenever a schema rename burns you. The cost of a frozen
|
||||
field set is a one-line update when you intentionally evolve the schema; the
|
||||
benefit is that every call site is forced to come along for the ride.
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
import pytest
|
||||
|
||||
from trustgraph.schema import (
|
||||
Metadata,
|
||||
EntityContext,
|
||||
EntityEmbeddings,
|
||||
ChunkEmbeddings,
|
||||
)
|
||||
|
||||
|
||||
def _field_names(dc):
|
||||
return {f.name for f in dataclasses.fields(dc)}
|
||||
|
||||
|
||||
@pytest.mark.contract
|
||||
class TestSchemaFieldContracts:
|
||||
"""Pin the field set of dataclasses that get constructed all over the
|
||||
codebase. If you intentionally change one of these, update the
|
||||
expected set in the same commit — that diff will surface every call
|
||||
site that needs to come along."""
|
||||
|
||||
def test_metadata_fields(self):
|
||||
# NOTE: there is no `metadata` field. A previous regression
|
||||
# constructed Metadata(metadata=...) and crashed at runtime.
|
||||
assert _field_names(Metadata) == {
|
||||
"id",
|
||||
"root",
|
||||
"user",
|
||||
"collection",
|
||||
}
|
||||
|
||||
def test_entity_embeddings_fields(self):
|
||||
# NOTE: the embedding field is `vector` (singular, list[float]).
|
||||
# There is no `vectors` field. Several call sites historically
|
||||
# passed `vectors=` and crashed at runtime.
|
||||
assert _field_names(EntityEmbeddings) == {
|
||||
"entity",
|
||||
"vector",
|
||||
"chunk_id",
|
||||
}
|
||||
|
||||
def test_chunk_embeddings_fields(self):
|
||||
# Same `vector` (singular) convention as EntityEmbeddings.
|
||||
assert _field_names(ChunkEmbeddings) == {
|
||||
"chunk_id",
|
||||
"vector",
|
||||
}
|
||||
|
||||
def test_entity_context_fields(self):
|
||||
assert _field_names(EntityContext) == {
|
||||
"entity",
|
||||
"context",
|
||||
"chunk_id",
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue