Protect from null embeddings in cores (#626)

* Don't emit graph embeddings if there aren't any.

* Don't store graph embeddings in a knowledge store if there's an empty list.

* Translate between Cassandra's 'null' representing an empty list and an
empty list which is what the surrounding code wants (and stored in the
first place).

* Avoid emitting empty embedding lists

* Avoid output empty triple lists

* Fix tests
This commit is contained in:
cybermaggedon 2026-02-09 14:07:07 +00:00 committed by GitHub
parent e214eb4e02
commit ca626c8471
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 88 additions and 91 deletions

View file

@ -73,12 +73,13 @@ class Processor(FlowProcessor):
)
)
r = GraphEmbeddings(
metadata=v.metadata,
entities=entities,
)
if entities:
r = GraphEmbeddings(
metadata=v.metadata,
entities=entities,
)
await flow("output").send(r)
await flow("output").send(r)
except Exception as e:
logger.error("Exception occurred", exc_info=True)

View file

@ -168,27 +168,29 @@ class Processor(FlowProcessor):
entities.append(ec)
await self.emit_triples(
flow("triples"),
Metadata(
id=v.metadata.id,
metadata=[],
user=v.metadata.user,
collection=v.metadata.collection,
),
triples
)
if triples:
await self.emit_triples(
flow("triples"),
Metadata(
id=v.metadata.id,
metadata=[],
user=v.metadata.user,
collection=v.metadata.collection,
),
triples
)
await self.emit_ecs(
flow("entity-contexts"),
Metadata(
id=v.metadata.id,
metadata=[],
user=v.metadata.user,
collection=v.metadata.collection,
),
entities
)
if entities:
await self.emit_ecs(
flow("entity-contexts"),
Metadata(
id=v.metadata.id,
metadata=[],
user=v.metadata.user,
collection=v.metadata.collection,
),
entities
)
except Exception as e:
logger.error(f"Definitions extraction exception: {e}", exc_info=True)

View file

@ -274,17 +274,6 @@ class Processor(FlowProcessor):
if not ontology_subsets:
logger.warning("No relevant ontology elements found for chunk")
# Emit empty outputs
await self.emit_triples(
flow("triples"),
v.metadata,
[]
)
await self.emit_entity_contexts(
flow("entity-contexts"),
v.metadata,
[]
)
return
# Merge subsets if multiple ontologies matched
@ -319,35 +308,26 @@ class Processor(FlowProcessor):
entity_contexts = self.build_entity_contexts(all_triples)
# Emit all triples (extracted + ontology definitions)
await self.emit_triples(
flow("triples"),
v.metadata,
all_triples
)
if all_triples:
await self.emit_triples(
flow("triples"),
v.metadata,
all_triples
)
# Emit entity contexts
await self.emit_entity_contexts(
flow("entity-contexts"),
v.metadata,
entity_contexts
)
if entity_contexts:
await self.emit_entity_contexts(
flow("entity-contexts"),
v.metadata,
entity_contexts
)
logger.info(f"Extracted {len(triples)} content triples + {len(ontology_triples)} ontology triples "
f"= {len(all_triples)} total triples and {len(entity_contexts)} entity contexts")
except Exception as e:
logger.error(f"OntoRAG extraction exception: {e}", exc_info=True)
# Emit empty outputs on error
await self.emit_triples(
flow("triples"),
v.metadata,
[]
)
await self.emit_entity_contexts(
flow("entity-contexts"),
v.metadata,
[]
)
async def extract_with_simplified_format(
self,

View file

@ -181,16 +181,17 @@ class Processor(FlowProcessor):
o=Value(value=v.metadata.id, is_uri=True)
))
await self.emit_triples(
flow("triples"),
Metadata(
id=v.metadata.id,
metadata=[],
user=v.metadata.user,
collection=v.metadata.collection,
),
triples
)
if triples:
await self.emit_triples(
flow("triples"),
Metadata(
id=v.metadata.id,
metadata=[],
user=v.metadata.user,
collection=v.metadata.collection,
),
triples
)
except Exception as e:
logger.error(f"Relationship extraction exception: {e}", exc_info=True)

View file

@ -64,12 +64,14 @@ class Processor(FlowProcessor):
async def on_triples(self, msg, consumer, flow):
v = msg.value()
await self.table_store.add_triples(v)
if v.triples:
await self.table_store.add_triples(v)
async def on_graph_embeddings(self, msg, consumer, flow):
v = msg.value()
await self.table_store.add_graph_embeddings(v)
if v.entities:
await self.table_store.add_graph_embeddings(v)
@staticmethod
def add_args(parser):

View file

@ -423,14 +423,17 @@ class KnowledgeTableStore:
else:
metadata = []
triples = [
Triple(
s = Value(value = elt[0], is_uri = elt[1]),
p = Value(value = elt[2], is_uri = elt[3]),
o = Value(value = elt[4], is_uri = elt[5]),
)
for elt in row[3]
]
if row[3]:
triples = [
Triple(
s = Value(value = elt[0], is_uri = elt[1]),
p = Value(value = elt[2], is_uri = elt[3]),
o = Value(value = elt[4], is_uri = elt[5]),
)
for elt in row[3]
]
else:
triples = []
await receiver(
Triples(
@ -479,13 +482,16 @@ class KnowledgeTableStore:
else:
metadata = []
entities = [
EntityEmbeddings(
entity = Value(value = ent[0][0], is_uri = ent[0][1]),
vectors = ent[1]
)
for ent in row[3]
]
if row[3]:
entities = [
EntityEmbeddings(
entity = Value(value = ent[0][0], is_uri = ent[0][1]),
vectors = ent[1]
)
for ent in row[3]
]
else:
entities = []
await receiver(
GraphEmbeddings(