mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-04 12:52:36 +02:00
Fix Metadata/EntityEmbeddings schema migration tail and add regression tests (#777)
The Metadata dataclass dropped its `metadata: list[Triple]` field
and EntityEmbeddings/ChunkEmbeddings settled on a singular
`vector: list[float]` field, but several call sites kept passing
`Metadata(metadata=...)` and `EntityEmbeddings(vectors=...)`. The
bugs were latent until a websocket client first hit
`/api/v1/flow/default/import/entity-contexts`, at which point the
dispatcher TypeError'd on construction.
Production fixes (5 call sites on the same migration tail):
* trustgraph-flow gateway dispatchers entity_contexts_import.py
and graph_embeddings_import.py — drop the stale
Metadata(metadata=...) kwarg; switch graph_embeddings_import
to the singular `vector` wire key.
* trustgraph-base messaging translators knowledge.py and
document_loading.py — fix decode side to read the singular
`"vector"` key, matching what their own encode sides have
always written.
* trustgraph-flow tables/knowledge.py — fix Cassandra row
deserialiser to construct EntityEmbeddings(vector=...)
instead of vectors=.
* trustgraph-flow gateway core_import/core_export — switch the
kg-core msgpack wire format to the singular `"v"`/`"vector"`
key and drop the dead `m["m"]` envelope field that referenced
the removed Metadata.metadata triples list (it was a
guaranteed KeyError on the export side).
Defense-in-depth regression coverage (32 new tests across 7 files):
* tests/contract/test_schema_field_contracts.py — pin the field
set of Metadata, EntityEmbeddings, ChunkEmbeddings,
EntityContext so any future schema rename fails CI loudly
with a clear diff.
* tests/unit/test_translators/test_knowledge_translator_roundtrip.py
and test_document_embeddings_translator_roundtrip.py -
encode→decode round-trip the affected translators end to end,
locking in the singular `"vector"` wire key.
* tests/unit/test_gateway/test_entity_contexts_import_dispatcher.py
and test_graph_embeddings_import_dispatcher.py — exercise the
websocket dispatchers' receive() path with realistic
payloads, the direct regression test for the original
production crash.
* tests/unit/test_gateway/test_core_import_export_roundtrip.py
— pack/unpack the kg-core msgpack format through the real
dispatcher classes (with KnowledgeRequestor mocked),
including a full export→import round-trip.
* tests/unit/test_tables/test_knowledge_table_store.py —
exercise the Cassandra row → schema conversion via __new__ to
bypass the live cluster connection.
Also fixes an unrelated leaked-coroutine RuntimeWarning in
test_gateway/test_service.py::test_run_method_calls_web_run_app: the
mocked aiohttp.web.run_app now closes the coroutine that Api.run() hands
it, mirroring what the real run_app would do, instead of leaving it for
the GC to complain about.
This commit is contained in:
parent
0994d4b05f
commit
c23e28aa66
17 changed files with 1415 additions and 17 deletions
|
|
@ -40,15 +40,14 @@ class CoreExport:
|
|||
"ge",
|
||||
{
|
||||
"m": {
|
||||
"i": data["metadata"]["id"],
|
||||
"m": data["metadata"]["metadata"],
|
||||
"i": data["metadata"]["id"],
|
||||
"u": data["metadata"]["user"],
|
||||
"c": data["metadata"]["collection"],
|
||||
},
|
||||
"e": [
|
||||
{
|
||||
"e": ent["entity"],
|
||||
"v": ent["vectors"],
|
||||
"v": ent["vector"],
|
||||
}
|
||||
for ent in data["entities"]
|
||||
]
|
||||
|
|
@ -65,8 +64,7 @@ class CoreExport:
|
|||
"t",
|
||||
{
|
||||
"m": {
|
||||
"i": data["metadata"]["id"],
|
||||
"m": data["metadata"]["metadata"],
|
||||
"i": data["metadata"]["id"],
|
||||
"u": data["metadata"]["user"],
|
||||
"c": data["metadata"]["collection"],
|
||||
},
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ class CoreImport:
|
|||
"triples": {
|
||||
"metadata": {
|
||||
"id": id,
|
||||
"metadata": msg["m"]["m"],
|
||||
"user": user,
|
||||
"collection": "default", # Not used?
|
||||
},
|
||||
|
|
@ -57,7 +56,7 @@ class CoreImport:
|
|||
}
|
||||
|
||||
await kr.process(msg)
|
||||
|
||||
|
||||
elif unpacked[0] == "ge":
|
||||
msg = unpacked[1]
|
||||
msg = {
|
||||
|
|
@ -67,14 +66,13 @@ class CoreImport:
|
|||
"graph-embeddings": {
|
||||
"metadata": {
|
||||
"id": id,
|
||||
"metadata": msg["m"]["m"],
|
||||
"user": user,
|
||||
"collection": "default", # Not used?
|
||||
},
|
||||
"entities": [
|
||||
{
|
||||
"entity": ent["e"],
|
||||
"vectors": ent["v"],
|
||||
"vector": ent["v"],
|
||||
}
|
||||
for ent in msg["e"]
|
||||
]
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from ... schema import Metadata
|
|||
from ... schema import EntityContexts, EntityContext
|
||||
from ... base import Publisher
|
||||
|
||||
from . serialize import to_subgraph, to_value
|
||||
from . serialize import to_value
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -48,7 +48,6 @@ class EntityContextsImport:
|
|||
elt = EntityContexts(
|
||||
metadata=Metadata(
|
||||
id=data["metadata"]["id"],
|
||||
metadata=to_subgraph(data["metadata"]["metadata"]),
|
||||
user=data["metadata"]["user"],
|
||||
collection=data["metadata"]["collection"],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from ... schema import Metadata
|
|||
from ... schema import GraphEmbeddings, EntityEmbeddings
|
||||
from ... base import Publisher
|
||||
|
||||
from . serialize import to_subgraph, to_value
|
||||
from . serialize import to_value
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -48,14 +48,13 @@ class GraphEmbeddingsImport:
|
|||
elt = GraphEmbeddings(
|
||||
metadata=Metadata(
|
||||
id=data["metadata"]["id"],
|
||||
metadata=to_subgraph(data["metadata"]["metadata"]),
|
||||
user=data["metadata"]["user"],
|
||||
collection=data["metadata"]["collection"],
|
||||
),
|
||||
entities=[
|
||||
EntityEmbeddings(
|
||||
entity=to_value(ent["entity"]),
|
||||
vectors=ent["vectors"],
|
||||
vector=ent["vector"],
|
||||
)
|
||||
for ent in data["entities"]
|
||||
]
|
||||
|
|
|
|||
|
|
@ -443,7 +443,7 @@ class KnowledgeTableStore:
|
|||
entities = [
|
||||
EntityEmbeddings(
|
||||
entity = tuple_to_term(ent[0][0], ent[0][1]),
|
||||
vectors = ent[1]
|
||||
vector = ent[1]
|
||||
)
|
||||
for ent in row[3]
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue