mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-26 07:45:13 +02:00
Remove redundant metadata (#685)
The metadata field (list of triples) in the pipeline Metadata class was redundant. Document metadata triples already flow directly from librarian to triple-store via emit_document_provenance() - they don't need to pass through the extraction pipeline. Additionally, chunker and PDF decoder were overwriting metadata to [] anyway, so any metadata passed through the pipeline was being discarded. Changes: - Remove metadata field from Metadata dataclass (schema/core/metadata.py) - Update all Metadata instantiations to remove metadata=[] parameter - Remove metadata handling from translators (document_loading, knowledge) - Remove metadata consumption from extractors (ontology, agent) - Update gateway serializers and import handlers - Update all unit, integration, and contract tests
This commit is contained in:
parent
1837d73f34
commit
aa4f5c6c00
37 changed files with 106 additions and 343 deletions
|
|
@ -178,7 +178,6 @@ class Processor(ChunkingService):
|
|||
await flow("triples").send(Triples(
|
||||
metadata=Metadata(
|
||||
id=chunk_uri,
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
@ -189,7 +188,6 @@ class Processor(ChunkingService):
|
|||
r = Chunk(
|
||||
metadata=Metadata(
|
||||
id=chunk_uri,
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -302,7 +302,6 @@ class Processor(FlowProcessor):
|
|||
await flow("triples").send(Triples(
|
||||
metadata=Metadata(
|
||||
id=pg_uri,
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
@ -314,7 +313,6 @@ class Processor(FlowProcessor):
|
|||
r = TextDocument(
|
||||
metadata=Metadata(
|
||||
id=pg_uri,
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -104,7 +104,6 @@ class Processor(FlowProcessor):
|
|||
tpls = Triples(
|
||||
metadata = Metadata(
|
||||
id = metadata.id,
|
||||
metadata = [],
|
||||
user = metadata.user,
|
||||
collection = metadata.collection,
|
||||
),
|
||||
|
|
@ -117,7 +116,6 @@ class Processor(FlowProcessor):
|
|||
ecs = EntityContexts(
|
||||
metadata = Metadata(
|
||||
id = metadata.id,
|
||||
metadata = [],
|
||||
user = metadata.user,
|
||||
collection = metadata.collection,
|
||||
),
|
||||
|
|
@ -216,10 +214,6 @@ class Processor(FlowProcessor):
|
|||
extraction_data, v.metadata
|
||||
)
|
||||
|
||||
# Put document metadata into triples
|
||||
for t in v.metadata.metadata:
|
||||
triples.append(t)
|
||||
|
||||
# Emit outputs
|
||||
if triples:
|
||||
await self.emit_triples(flow("triples"), v.metadata, triples)
|
||||
|
|
|
|||
|
|
@ -218,7 +218,6 @@ class Processor(FlowProcessor):
|
|||
flow("triples"),
|
||||
Metadata(
|
||||
id=v.metadata.id,
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
@ -232,7 +231,6 @@ class Processor(FlowProcessor):
|
|||
flow("entity-contexts"),
|
||||
Metadata(
|
||||
id=v.metadata.id,
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -306,10 +306,6 @@ class Processor(FlowProcessor):
|
|||
flow, chunk, ontology_subset, prompt_variables
|
||||
)
|
||||
|
||||
# Add metadata triples
|
||||
for t in v.metadata.metadata:
|
||||
triples.append(t)
|
||||
|
||||
# Generate ontology definition triples
|
||||
ontology_triples = self.build_ontology_triples(ontology_subset)
|
||||
|
||||
|
|
@ -558,7 +554,6 @@ class Processor(FlowProcessor):
|
|||
t = Triples(
|
||||
metadata=Metadata(
|
||||
id=metadata.id,
|
||||
metadata=[],
|
||||
user=metadata.user,
|
||||
collection=metadata.collection,
|
||||
),
|
||||
|
|
@ -571,7 +566,6 @@ class Processor(FlowProcessor):
|
|||
ec = EntityContexts(
|
||||
metadata=Metadata(
|
||||
id=metadata.id,
|
||||
metadata=[],
|
||||
user=metadata.user,
|
||||
collection=metadata.collection,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -219,7 +219,6 @@ class Processor(FlowProcessor):
|
|||
flow("triples"),
|
||||
Metadata(
|
||||
id=v.metadata.id,
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -272,7 +272,6 @@ class Processor(FlowProcessor):
|
|||
extracted = ExtractedObject(
|
||||
metadata=Metadata(
|
||||
id=f"{v.metadata.id}:{schema_name}",
|
||||
metadata=[],
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
|
|
|
|||
|
|
@ -53,7 +53,6 @@ class RowsImport:
|
|||
elt = ExtractedObject(
|
||||
metadata=Metadata(
|
||||
id=data["metadata"]["id"],
|
||||
metadata=to_subgraph(data["metadata"].get("metadata", [])),
|
||||
user=data["metadata"]["user"],
|
||||
collection=data["metadata"]["collection"],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -37,18 +37,17 @@ def serialize_triples(message):
|
|||
return {
|
||||
"metadata": {
|
||||
"id": message.metadata.id,
|
||||
"metadata": serialize_subgraph(message.metadata.metadata),
|
||||
"user": message.metadata.user,
|
||||
"collection": message.metadata.collection,
|
||||
},
|
||||
"triples": serialize_subgraph(message.triples),
|
||||
}
|
||||
|
||||
|
||||
|
||||
def serialize_graph_embeddings(message):
|
||||
return {
|
||||
"metadata": {
|
||||
"id": message.metadata.id,
|
||||
"metadata": serialize_subgraph(message.metadata.metadata),
|
||||
"user": message.metadata.user,
|
||||
"collection": message.metadata.collection,
|
||||
},
|
||||
|
|
@ -61,11 +60,11 @@ def serialize_graph_embeddings(message):
|
|||
],
|
||||
}
|
||||
|
||||
|
||||
def serialize_entity_contexts(message):
|
||||
return {
|
||||
"metadata": {
|
||||
"id": message.metadata.id,
|
||||
"metadata": serialize_subgraph(message.metadata.metadata),
|
||||
"user": message.metadata.user,
|
||||
"collection": message.metadata.collection,
|
||||
},
|
||||
|
|
@ -78,11 +77,11 @@ def serialize_entity_contexts(message):
|
|||
],
|
||||
}
|
||||
|
||||
|
||||
def serialize_document_embeddings(message):
|
||||
return {
|
||||
"metadata": {
|
||||
"id": message.metadata.id,
|
||||
"metadata": serialize_subgraph(message.metadata.metadata),
|
||||
"user": message.metadata.user,
|
||||
"collection": message.metadata.collection,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ class TriplesImport:
|
|||
elt = Triples(
|
||||
metadata=Metadata(
|
||||
id=data["metadata"]["id"],
|
||||
metadata=to_subgraph(data["metadata"]["metadata"]),
|
||||
user=data["metadata"]["user"],
|
||||
collection=data["metadata"]["collection"],
|
||||
),
|
||||
|
|
|
|||
|
|
@ -334,7 +334,6 @@ class Processor(AsyncProcessor):
|
|||
triples_msg = Triples(
|
||||
metadata=Metadata(
|
||||
id=doc_uri,
|
||||
metadata=[],
|
||||
user=processing.user,
|
||||
collection=processing.collection,
|
||||
),
|
||||
|
|
@ -381,7 +380,6 @@ class Processor(AsyncProcessor):
|
|||
doc = TextDocument(
|
||||
metadata = Metadata(
|
||||
id = document.id,
|
||||
metadata = document.metadata,
|
||||
user = processing.user,
|
||||
collection = processing.collection
|
||||
),
|
||||
|
|
@ -392,7 +390,6 @@ class Processor(AsyncProcessor):
|
|||
doc = TextDocument(
|
||||
metadata = Metadata(
|
||||
id = document.id,
|
||||
metadata = document.metadata,
|
||||
user = processing.user,
|
||||
collection = processing.collection
|
||||
),
|
||||
|
|
@ -408,7 +405,6 @@ class Processor(AsyncProcessor):
|
|||
doc = Document(
|
||||
metadata = Metadata(
|
||||
id = document.id,
|
||||
metadata = document.metadata,
|
||||
user = processing.user,
|
||||
collection = processing.collection
|
||||
),
|
||||
|
|
@ -419,7 +415,6 @@ class Processor(AsyncProcessor):
|
|||
doc = Document(
|
||||
metadata = Metadata(
|
||||
id = document.id,
|
||||
metadata = document.metadata,
|
||||
user = processing.user,
|
||||
collection = processing.collection
|
||||
),
|
||||
|
|
|
|||
|
|
@ -243,7 +243,6 @@ class Processor(FlowProcessor):
|
|||
await flow("explainability").send(Triples(
|
||||
metadata=Metadata(
|
||||
id=explain_id,
|
||||
metadata=[],
|
||||
user=v.user,
|
||||
collection=v.collection, # Store in user's collection, not separate explainability collection
|
||||
),
|
||||
|
|
|
|||
|
|
@ -218,16 +218,6 @@ class KnowledgeTableStore:
|
|||
|
||||
when = int(time.time() * 1000)
|
||||
|
||||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
triples = [
|
||||
(
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
|
|
@ -244,7 +234,7 @@ class KnowledgeTableStore:
|
|||
(
|
||||
uuid.uuid4(), m.metadata.user,
|
||||
m.metadata.id, when,
|
||||
metadata, triples,
|
||||
[], triples,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -259,16 +249,6 @@ class KnowledgeTableStore:
|
|||
|
||||
when = int(time.time() * 1000)
|
||||
|
||||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
entities = [
|
||||
(
|
||||
term_to_tuple(v.entity),
|
||||
|
|
@ -286,7 +266,7 @@ class KnowledgeTableStore:
|
|||
(
|
||||
uuid.uuid4(), m.metadata.user,
|
||||
m.metadata.id, when,
|
||||
metadata, entities,
|
||||
[], entities,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -301,16 +281,6 @@ class KnowledgeTableStore:
|
|||
|
||||
when = int(time.time() * 1000)
|
||||
|
||||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
chunks = [
|
||||
(
|
||||
v.chunk_id,
|
||||
|
|
@ -328,7 +298,7 @@ class KnowledgeTableStore:
|
|||
(
|
||||
uuid.uuid4(), m.metadata.user,
|
||||
m.metadata.id, when,
|
||||
metadata, chunks,
|
||||
[], chunks,
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -423,18 +393,6 @@ class KnowledgeTableStore:
|
|||
|
||||
for row in resp:
|
||||
|
||||
if row[2]:
|
||||
metadata = [
|
||||
Triple(
|
||||
s = tuple_to_term(elt[0], elt[1]),
|
||||
p = tuple_to_term(elt[2], elt[3]),
|
||||
o = tuple_to_term(elt[4], elt[5]),
|
||||
)
|
||||
for elt in row[2]
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
if row[3]:
|
||||
triples = [
|
||||
Triple(
|
||||
|
|
@ -453,7 +411,6 @@ class KnowledgeTableStore:
|
|||
id = document_id,
|
||||
user = user,
|
||||
collection = "default", # FIXME: What to put here?
|
||||
metadata = metadata,
|
||||
),
|
||||
triples = triples
|
||||
)
|
||||
|
|
@ -482,18 +439,6 @@ class KnowledgeTableStore:
|
|||
|
||||
for row in resp:
|
||||
|
||||
if row[2]:
|
||||
metadata = [
|
||||
Triple(
|
||||
s = tuple_to_term(elt[0], elt[1]),
|
||||
p = tuple_to_term(elt[2], elt[3]),
|
||||
o = tuple_to_term(elt[4], elt[5]),
|
||||
)
|
||||
for elt in row[2]
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
if row[3]:
|
||||
entities = [
|
||||
EntityEmbeddings(
|
||||
|
|
@ -511,7 +456,6 @@ class KnowledgeTableStore:
|
|||
id = document_id,
|
||||
user = user,
|
||||
collection = "default", # FIXME: What to put here?
|
||||
metadata = metadata,
|
||||
),
|
||||
entities = entities
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue