Fix Metadata/EntityEmbeddings schema migration tail and add regression tests (#777)

The Metadata dataclass dropped its `metadata: list[Triple]` field
and EntityEmbeddings/ChunkEmbeddings settled on a singular
`vector: list[float]` field, but several call sites kept passing
`Metadata(metadata=...)` and `EntityEmbeddings(vectors=...)`. The
bugs were latent until a websocket client first hit
`/api/v1/flow/default/import/entity-contexts`, at which point the
dispatcher TypeError'd on construction.

Production fixes (5 call sites on the same migration tail):

  * trustgraph-flow gateway dispatchers entity_contexts_import.py
    and graph_embeddings_import.py — drop the stale
    Metadata(metadata=...)  kwarg; switch graph_embeddings_import
    to the singular `vector` wire key.
  * trustgraph-base messaging translators knowledge.py and
    document_loading.py — fix decode side to read the singular
    `"vector"` key, matching what their own encode sides have
    always written.
  * trustgraph-flow tables/knowledge.py — fix Cassandra row
    deserialiser to construct EntityEmbeddings(vector=...)
    instead of vectors=.
  * trustgraph-flow gateway core_import/core_export — switch the
    kg-core msgpack wire format to the singular `"v"`/`"vector"`
    key and drop the dead `m["m"]` envelope field that referenced
    the removed Metadata.metadata triples list (it was a
    guaranteed KeyError on the export side).

Defense-in-depth regression coverage (32 new tests across 7 files):

  * tests/contract/test_schema_field_contracts.py — pin the field
    set of Metadata, EntityEmbeddings, ChunkEmbeddings,
    EntityContext so any future schema rename fails CI loudly
    with a clear diff.
  * tests/unit/test_translators/test_knowledge_translator_roundtrip.py
    and test_document_embeddings_translator_roundtrip.py -
    encode→decode round-trip the affected translators end to end,
    locking in the singular `"vector"` wire key.
  * tests/unit/test_gateway/test_entity_contexts_import_dispatcher.py
    and test_graph_embeddings_import_dispatcher.py — exercise the
    websocket dispatchers' receive() path with realistic
    payloads, the direct regression test for the original
    production crash.
  * tests/unit/test_gateway/test_core_import_export_roundtrip.py
    — pack/unpack the kg-core msgpack format through the real
    dispatcher classes (with KnowledgeRequestor mocked),
    including a full export→import round-trip.
  * tests/unit/test_tables/test_knowledge_table_store.py —
    exercise the Cassandra row → schema conversion via __new__ to
    bypass the live cluster connection.

Also fixes an unrelated leaked-coroutine RuntimeWarning in
test_gateway/test_service.py::test_run_method_calls_web_run_app: the
mocked aiohttp.web.run_app now closes the coroutine that Api.run() hands
it, mirroring what the real run_app would do, instead of leaving it for
the GC to complain about.
This commit is contained in:
cybermaggedon 2026-04-10 20:43:45 +01:00 committed by GitHub
parent 0994d4b05f
commit c23e28aa66
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 1415 additions and 17 deletions

View file

@ -40,15 +40,14 @@ class CoreExport:
"ge",
{
"m": {
"i": data["metadata"]["id"],
"m": data["metadata"]["metadata"],
"i": data["metadata"]["id"],
"u": data["metadata"]["user"],
"c": data["metadata"]["collection"],
},
"e": [
{
"e": ent["entity"],
"v": ent["vectors"],
"v": ent["vector"],
}
for ent in data["entities"]
]
@ -65,8 +64,7 @@ class CoreExport:
"t",
{
"m": {
"i": data["metadata"]["id"],
"m": data["metadata"]["metadata"],
"i": data["metadata"]["id"],
"u": data["metadata"]["user"],
"c": data["metadata"]["collection"],
},

View file

@ -48,7 +48,6 @@ class CoreImport:
"triples": {
"metadata": {
"id": id,
"metadata": msg["m"]["m"],
"user": user,
"collection": "default", # Not used?
},
@ -57,7 +56,7 @@ class CoreImport:
}
await kr.process(msg)
elif unpacked[0] == "ge":
msg = unpacked[1]
msg = {
@ -67,14 +66,13 @@ class CoreImport:
"graph-embeddings": {
"metadata": {
"id": id,
"metadata": msg["m"]["m"],
"user": user,
"collection": "default", # Not used?
},
"entities": [
{
"entity": ent["e"],
"vectors": ent["v"],
"vector": ent["v"],
}
for ent in msg["e"]
]

View file

@ -8,7 +8,7 @@ from ... schema import Metadata
from ... schema import EntityContexts, EntityContext
from ... base import Publisher
from . serialize import to_subgraph, to_value
from . serialize import to_value
# Module logger
logger = logging.getLogger(__name__)
@ -48,7 +48,6 @@ class EntityContextsImport:
elt = EntityContexts(
metadata=Metadata(
id=data["metadata"]["id"],
metadata=to_subgraph(data["metadata"]["metadata"]),
user=data["metadata"]["user"],
collection=data["metadata"]["collection"],
),

View file

@ -8,7 +8,7 @@ from ... schema import Metadata
from ... schema import GraphEmbeddings, EntityEmbeddings
from ... base import Publisher
from . serialize import to_subgraph, to_value
from . serialize import to_value
# Module logger
logger = logging.getLogger(__name__)
@ -48,14 +48,13 @@ class GraphEmbeddingsImport:
elt = GraphEmbeddings(
metadata=Metadata(
id=data["metadata"]["id"],
metadata=to_subgraph(data["metadata"]["metadata"]),
user=data["metadata"]["user"],
collection=data["metadata"]["collection"],
),
entities=[
EntityEmbeddings(
entity=to_value(ent["entity"]),
vectors=ent["vectors"],
vector=ent["vector"],
)
for ent in data["entities"]
]

View file

@ -443,7 +443,7 @@ class KnowledgeTableStore:
entities = [
EntityEmbeddings(
entity = tuple_to_term(ent[0][0], ent[0][1]),
vectors = ent[1]
vector = ent[1]
)
for ent in row[3]
]