Changed schema for Value -> Term, majorly breaking change (#622)

* Changed schema for Value -> Term, majorly breaking change

* Following the schema change, Value -> Term into all processing

* Updated Cassandra for g, p, s, o index patterns (7 indexes)

* Reviewed and updated all tests

* Neo4j, Memgraph and FalkorDB remain broken, will look at once settled down
This commit is contained in:
cybermaggedon 2026-01-27 13:48:08 +00:00 committed by GitHub
parent e061f2c633
commit cf0daedefa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
86 changed files with 2458 additions and 1764 deletions

View file

@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
readme = "README.md"
requires-python = ">=3.8"
dependencies = [
"trustgraph-base>=1.9,<1.10",
"trustgraph-base>=2.0,<2.1",
"aiohttp",
"anthropic",
"scylla-driver",

View file

@ -11,7 +11,24 @@ _active_clusters = []
logger = logging.getLogger(__name__)
# Sentinel value for wildcard graph queries
GRAPH_WILDCARD = "*"
# Default graph stored as empty string
DEFAULT_GRAPH = ""
class KnowledgeGraph:
"""
Cassandra-backed knowledge graph supporting quads (s, p, o, g).
Uses 7 tables to support all 16 query patterns efficiently:
- Family A (g-wildcard): SPOG, POSG, OSPG
- Family B (g-specified): GSPO, GPOS, GOSP
- Collection table: COLL (for iteration/deletion)
Plus a metadata table for tracking collections.
"""
def __init__(
self, hosts=None,
@ -24,12 +41,22 @@ class KnowledgeGraph:
self.keyspace = keyspace
self.username = username
# Optimized multi-table schema with collection deletion support
self.subject_table = "triples_s"
self.po_table = "triples_p"
self.object_table = "triples_o"
self.collection_table = "triples_collection" # For SPO queries and deletion
self.collection_metadata_table = "collection_metadata" # For tracking which collections exist
# 7-table schema for quads with full query pattern support
# Family A: g-wildcard queries (g in clustering columns)
self.spog_table = "quads_spog" # partition (collection, s), cluster (p, o, g)
self.posg_table = "quads_posg" # partition (collection, p), cluster (o, s, g)
self.ospg_table = "quads_ospg" # partition (collection, o), cluster (s, p, g)
# Family B: g-specified queries (g in partition key)
self.gspo_table = "quads_gspo" # partition (collection, g, s), cluster (p, o)
self.gpos_table = "quads_gpos" # partition (collection, g, p), cluster (o, s)
self.gosp_table = "quads_gosp" # partition (collection, g, o), cluster (s, p)
# Collection table for iteration and bulk deletion
self.coll_table = "quads_coll" # partition (collection), cluster (g, s, p, o)
# Collection metadata tracking
self.collection_metadata_table = "collection_metadata"
if username and password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
@ -46,237 +73,376 @@ class KnowledgeGraph:
self.prepare_statements()
def clear(self):
self.session.execute(f"""
drop keyspace if exists {self.keyspace};
""");
""")
self.init()
def init(self):
self.session.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
}};
""");
""")
self.session.set_keyspace(self.keyspace)
self.init_optimized_schema()
self.init_quad_schema()
def init_quad_schema(self):
"""Initialize 7-table schema for quads with full query pattern support"""
def init_optimized_schema(self):
"""Initialize optimized multi-table schema for performance"""
# Table 1: Subject-centric queries (get_s, get_sp, get_os)
# Compound partition key for optimal data distribution
# Family A: g-wildcard queries (g in clustering columns)
# SPOG: partition (collection, s), cluster (p, o, g)
# Supports: (?, s, ?, ?), (?, s, p, ?), (?, s, p, o)
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.subject_table} (
CREATE TABLE IF NOT EXISTS {self.spog_table} (
collection text,
s text,
p text,
o text,
PRIMARY KEY ((collection, s), p, o)
g text,
PRIMARY KEY ((collection, s), p, o, g)
);
""");
""")
# Table 2: Predicate-Object queries (get_p, get_po) - eliminates ALLOW FILTERING!
# Compound partition key for optimal data distribution
# POSG: partition (collection, p), cluster (o, s, g)
# Supports: (?, ?, p, ?), (?, ?, p, o)
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.po_table} (
CREATE TABLE IF NOT EXISTS {self.posg_table} (
collection text,
p text,
o text,
s text,
PRIMARY KEY ((collection, p), o, s)
g text,
PRIMARY KEY ((collection, p), o, s, g)
);
""");
""")
# Table 3: Object-centric queries (get_o)
# Compound partition key for optimal data distribution
# OSPG: partition (collection, o), cluster (s, p, g)
# Supports: (?, ?, ?, o), (?, s, ?, o)
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.object_table} (
CREATE TABLE IF NOT EXISTS {self.ospg_table} (
collection text,
o text,
s text,
p text,
PRIMARY KEY ((collection, o), s, p)
g text,
PRIMARY KEY ((collection, o), s, p, g)
);
""");
""")
# Table 4: Collection management and SPO queries (get_spo)
# Simple partition key enables efficient collection deletion
# Family B: g-specified queries (g in partition key)
# GSPO: partition (collection, g, s), cluster (p, o)
# Supports: (g, s, ?, ?), (g, s, p, ?), (g, s, p, o)
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.collection_table} (
CREATE TABLE IF NOT EXISTS {self.gspo_table} (
collection text,
g text,
s text,
p text,
o text,
PRIMARY KEY (collection, s, p, o)
PRIMARY KEY ((collection, g, s), p, o)
);
""");
""")
# Table 5: Collection metadata tracking
# Tracks which collections exist without polluting triple data
# GPOS: partition (collection, g, p), cluster (o, s)
# Supports: (g, ?, p, ?), (g, ?, p, o)
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.gpos_table} (
collection text,
g text,
p text,
o text,
s text,
PRIMARY KEY ((collection, g, p), o, s)
);
""")
# GOSP: partition (collection, g, o), cluster (s, p)
# Supports: (g, ?, ?, o), (g, s, ?, o)
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.gosp_table} (
collection text,
g text,
o text,
s text,
p text,
PRIMARY KEY ((collection, g, o), s, p)
);
""")
# Collection table for iteration and bulk deletion
# COLL: partition (collection), cluster (g, s, p, o)
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.coll_table} (
collection text,
g text,
s text,
p text,
o text,
PRIMARY KEY (collection, g, s, p, o)
);
""")
# Collection metadata tracking
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.collection_metadata_table} (
collection text,
created_at timestamp,
PRIMARY KEY (collection)
);
""");
""")
logger.info("Optimized multi-table schema initialized (5 tables)")
logger.info("Quad schema initialized (7 tables + metadata)")
def prepare_statements(self):
"""Prepare statements for optimal performance"""
# Insert statements for batch operations
self.insert_subject_stmt = self.session.prepare(
f"INSERT INTO {self.subject_table} (collection, s, p, o) VALUES (?, ?, ?, ?)"
"""Prepare statements for all 7 tables"""
# Insert statements
self.insert_spog_stmt = self.session.prepare(
f"INSERT INTO {self.spog_table} (collection, s, p, o, g) VALUES (?, ?, ?, ?, ?)"
)
self.insert_posg_stmt = self.session.prepare(
f"INSERT INTO {self.posg_table} (collection, p, o, s, g) VALUES (?, ?, ?, ?, ?)"
)
self.insert_ospg_stmt = self.session.prepare(
f"INSERT INTO {self.ospg_table} (collection, o, s, p, g) VALUES (?, ?, ?, ?, ?)"
)
self.insert_gspo_stmt = self.session.prepare(
f"INSERT INTO {self.gspo_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)"
)
self.insert_gpos_stmt = self.session.prepare(
f"INSERT INTO {self.gpos_table} (collection, g, p, o, s) VALUES (?, ?, ?, ?, ?)"
)
self.insert_gosp_stmt = self.session.prepare(
f"INSERT INTO {self.gosp_table} (collection, g, o, s, p) VALUES (?, ?, ?, ?, ?)"
)
self.insert_coll_stmt = self.session.prepare(
f"INSERT INTO {self.coll_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)"
)
self.insert_po_stmt = self.session.prepare(
f"INSERT INTO {self.po_table} (collection, p, o, s) VALUES (?, ?, ?, ?)"
# Delete statements (for single quad deletion)
self.delete_spog_stmt = self.session.prepare(
f"DELETE FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? AND g = ?"
)
self.delete_posg_stmt = self.session.prepare(
f"DELETE FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? AND s = ? AND g = ?"
)
self.delete_ospg_stmt = self.session.prepare(
f"DELETE FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? AND p = ? AND g = ?"
)
self.delete_gspo_stmt = self.session.prepare(
f"DELETE FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?"
)
self.delete_gpos_stmt = self.session.prepare(
f"DELETE FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? AND s = ?"
)
self.delete_gosp_stmt = self.session.prepare(
f"DELETE FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? AND p = ?"
)
self.delete_coll_stmt = self.session.prepare(
f"DELETE FROM {self.coll_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?"
)
self.insert_object_stmt = self.session.prepare(
f"INSERT INTO {self.object_table} (collection, o, s, p) VALUES (?, ?, ?, ?)"
# Query statements - Family A (g-wildcard, g in clustering)
# SPOG table queries
self.get_s_wildcard_stmt = self.session.prepare(
f"SELECT p, o, g FROM {self.spog_table} WHERE collection = ? AND s = ? LIMIT ?"
)
self.get_sp_wildcard_stmt = self.session.prepare(
f"SELECT o, g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?"
)
self.get_spo_wildcard_stmt = self.session.prepare(
f"SELECT g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
)
self.insert_collection_stmt = self.session.prepare(
f"INSERT INTO {self.collection_table} (collection, s, p, o) VALUES (?, ?, ?, ?)"
# POSG table queries
self.get_p_wildcard_stmt = self.session.prepare(
f"SELECT o, s, g FROM {self.posg_table} WHERE collection = ? AND p = ? LIMIT ?"
)
self.get_po_wildcard_stmt = self.session.prepare(
f"SELECT s, g FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?"
)
# Query statements for optimized access
# OSPG table queries
self.get_o_wildcard_stmt = self.session.prepare(
f"SELECT s, p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? LIMIT ?"
)
self.get_os_wildcard_stmt = self.session.prepare(
f"SELECT p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?"
)
# Query statements - Family B (g-specified, g in partition)
# GSPO table queries
self.get_gs_stmt = self.session.prepare(
f"SELECT p, o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? LIMIT ?"
)
self.get_gsp_stmt = self.session.prepare(
f"SELECT o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? LIMIT ?"
)
self.get_gspo_stmt = self.session.prepare(
f"SELECT s FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
)
# GPOS table queries
self.get_gp_stmt = self.session.prepare(
f"SELECT o, s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? LIMIT ?"
)
self.get_gpo_stmt = self.session.prepare(
f"SELECT s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? LIMIT ?"
)
# GOSP table queries
self.get_go_stmt = self.session.prepare(
f"SELECT s, p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? LIMIT ?"
)
self.get_gos_stmt = self.session.prepare(
f"SELECT p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? LIMIT ?"
)
# Collection table query (for get_all and iteration)
self.get_all_stmt = self.session.prepare(
f"SELECT s, p, o FROM {self.subject_table} WHERE collection = ? LIMIT ? ALLOW FILTERING"
f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = ? LIMIT ?"
)
self.get_g_stmt = self.session.prepare(
f"SELECT s, p, o FROM {self.coll_table} WHERE collection = ? AND g = ? LIMIT ?"
)
self.get_s_stmt = self.session.prepare(
f"SELECT p, o FROM {self.subject_table} WHERE collection = ? AND s = ? LIMIT ?"
)
logger.info("Prepared statements initialized for quad schema (7 tables)")
self.get_p_stmt = self.session.prepare(
f"SELECT s, o FROM {self.po_table} WHERE collection = ? AND p = ? LIMIT ?"
)
def insert(self, collection, s, p, o, g=None):
"""Insert a quad into all 7 tables"""
# Default graph stored as empty string
if g is None:
g = DEFAULT_GRAPH
self.get_o_stmt = self.session.prepare(
f"SELECT s, p FROM {self.object_table} WHERE collection = ? AND o = ? LIMIT ?"
)
self.get_sp_stmt = self.session.prepare(
f"SELECT o FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?"
)
# The critical optimization: get_po without ALLOW FILTERING!
self.get_po_stmt = self.session.prepare(
f"SELECT s FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?"
)
self.get_os_stmt = self.session.prepare(
f"SELECT p FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?"
)
self.get_spo_stmt = self.session.prepare(
f"SELECT s as x FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
)
# Delete statements for collection deletion
self.delete_subject_stmt = self.session.prepare(
f"DELETE FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?"
)
self.delete_po_stmt = self.session.prepare(
f"DELETE FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? AND s = ?"
)
self.delete_object_stmt = self.session.prepare(
f"DELETE FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? AND p = ?"
)
self.delete_collection_stmt = self.session.prepare(
f"DELETE FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?"
)
logger.info("Prepared statements initialized for optimal performance (4 tables)")
def insert(self, collection, s, p, o):
# Batch write to all four tables for consistency
batch = BatchStatement()
# Insert into subject table
batch.add(self.insert_subject_stmt, (collection, s, p, o))
# Family A tables
batch.add(self.insert_spog_stmt, (collection, s, p, o, g))
batch.add(self.insert_posg_stmt, (collection, p, o, s, g))
batch.add(self.insert_ospg_stmt, (collection, o, s, p, g))
# Insert into predicate-object table (column order: collection, p, o, s)
batch.add(self.insert_po_stmt, (collection, p, o, s))
# Family B tables
batch.add(self.insert_gspo_stmt, (collection, g, s, p, o))
batch.add(self.insert_gpos_stmt, (collection, g, p, o, s))
batch.add(self.insert_gosp_stmt, (collection, g, o, s, p))
# Insert into object table (column order: collection, o, s, p)
batch.add(self.insert_object_stmt, (collection, o, s, p))
# Insert into collection table for SPO queries and deletion tracking
batch.add(self.insert_collection_stmt, (collection, s, p, o))
# Collection table
batch.add(self.insert_coll_stmt, (collection, g, s, p, o))
self.session.execute(batch)
def delete_quad(self, collection, s, p, o, g=None):
"""Delete a single quad from all 7 tables"""
if g is None:
g = DEFAULT_GRAPH
batch = BatchStatement()
batch.add(self.delete_spog_stmt, (collection, s, p, o, g))
batch.add(self.delete_posg_stmt, (collection, p, o, s, g))
batch.add(self.delete_ospg_stmt, (collection, o, s, p, g))
batch.add(self.delete_gspo_stmt, (collection, g, s, p, o))
batch.add(self.delete_gpos_stmt, (collection, g, p, o, s))
batch.add(self.delete_gosp_stmt, (collection, g, o, s, p))
batch.add(self.delete_coll_stmt, (collection, g, s, p, o))
self.session.execute(batch)
# ========================================================================
# Query methods
# g=None means default graph, g="*" means all graphs
# ========================================================================
def get_all(self, collection, limit=50):
# Use subject table for get_all queries
return self.session.execute(
self.get_all_stmt,
(collection, limit)
)
"""Get all quads in collection"""
return self.session.execute(self.get_all_stmt, (collection, limit))
def get_s(self, collection, s, limit=10):
# Optimized: Direct partition access with (collection, s)
return self.session.execute(
self.get_s_stmt,
(collection, s, limit)
)
def get_s(self, collection, s, g=None, limit=10):
"""Query by subject. g=None: default graph, g='*': all graphs"""
if g is None or g == DEFAULT_GRAPH:
# Default graph - use GSPO table
return self.session.execute(self.get_gs_stmt, (collection, DEFAULT_GRAPH, s, limit))
elif g == GRAPH_WILDCARD:
# All graphs - use SPOG table
return self.session.execute(self.get_s_wildcard_stmt, (collection, s, limit))
else:
# Specific graph - use GSPO table
return self.session.execute(self.get_gs_stmt, (collection, g, s, limit))
def get_p(self, collection, p, limit=10):
# Optimized: Use po_table for direct partition access
return self.session.execute(
self.get_p_stmt,
(collection, p, limit)
)
def get_p(self, collection, p, g=None, limit=10):
"""Query by predicate"""
if g is None or g == DEFAULT_GRAPH:
return self.session.execute(self.get_gp_stmt, (collection, DEFAULT_GRAPH, p, limit))
elif g == GRAPH_WILDCARD:
return self.session.execute(self.get_p_wildcard_stmt, (collection, p, limit))
else:
return self.session.execute(self.get_gp_stmt, (collection, g, p, limit))
def get_o(self, collection, o, limit=10):
# Optimized: Use object_table for direct partition access
return self.session.execute(
self.get_o_stmt,
(collection, o, limit)
)
def get_o(self, collection, o, g=None, limit=10):
"""Query by object"""
if g is None or g == DEFAULT_GRAPH:
return self.session.execute(self.get_go_stmt, (collection, DEFAULT_GRAPH, o, limit))
elif g == GRAPH_WILDCARD:
return self.session.execute(self.get_o_wildcard_stmt, (collection, o, limit))
else:
return self.session.execute(self.get_go_stmt, (collection, g, o, limit))
def get_sp(self, collection, s, p, limit=10):
# Optimized: Use subject_table with clustering key access
return self.session.execute(
self.get_sp_stmt,
(collection, s, p, limit)
)
def get_sp(self, collection, s, p, g=None, limit=10):
"""Query by subject and predicate"""
if g is None or g == DEFAULT_GRAPH:
return self.session.execute(self.get_gsp_stmt, (collection, DEFAULT_GRAPH, s, p, limit))
elif g == GRAPH_WILDCARD:
return self.session.execute(self.get_sp_wildcard_stmt, (collection, s, p, limit))
else:
return self.session.execute(self.get_gsp_stmt, (collection, g, s, p, limit))
def get_po(self, collection, p, o, limit=10):
# CRITICAL OPTIMIZATION: Use po_table - NO MORE ALLOW FILTERING!
return self.session.execute(
self.get_po_stmt,
(collection, p, o, limit)
)
def get_po(self, collection, p, o, g=None, limit=10):
"""Query by predicate and object"""
if g is None or g == DEFAULT_GRAPH:
return self.session.execute(self.get_gpo_stmt, (collection, DEFAULT_GRAPH, p, o, limit))
elif g == GRAPH_WILDCARD:
return self.session.execute(self.get_po_wildcard_stmt, (collection, p, o, limit))
else:
return self.session.execute(self.get_gpo_stmt, (collection, g, p, o, limit))
def get_os(self, collection, o, s, limit=10):
# Optimized: Use subject_table with clustering access (no more ALLOW FILTERING)
return self.session.execute(
self.get_os_stmt,
(collection, s, o, limit)
)
def get_os(self, collection, o, s, g=None, limit=10):
"""Query by object and subject"""
if g is None or g == DEFAULT_GRAPH:
return self.session.execute(self.get_gos_stmt, (collection, DEFAULT_GRAPH, o, s, limit))
elif g == GRAPH_WILDCARD:
return self.session.execute(self.get_os_wildcard_stmt, (collection, o, s, limit))
else:
return self.session.execute(self.get_gos_stmt, (collection, g, o, s, limit))
def get_spo(self, collection, s, p, o, limit=10):
# Optimized: Use collection_table for exact key lookup
return self.session.execute(
self.get_spo_stmt,
(collection, s, p, o, limit)
)
def get_spo(self, collection, s, p, o, g=None, limit=10):
"""Query by subject, predicate, object (find which graphs)"""
if g is None or g == DEFAULT_GRAPH:
return self.session.execute(self.get_gspo_stmt, (collection, DEFAULT_GRAPH, s, p, o, limit))
elif g == GRAPH_WILDCARD:
return self.session.execute(self.get_spo_wildcard_stmt, (collection, s, p, o, limit))
else:
return self.session.execute(self.get_gspo_stmt, (collection, g, s, p, o, limit))
def get_g(self, collection, g, limit=50):
"""Get all quads in a specific graph"""
if g is None:
g = DEFAULT_GRAPH
return self.session.execute(self.get_g_stmt, (collection, g, limit))
# ========================================================================
# Collection management
# ========================================================================
def collection_exists(self, collection):
"""Check if collection exists by querying collection_metadata table"""
"""Check if collection exists"""
try:
result = self.session.execute(
f"SELECT collection FROM {self.collection_metadata_table} WHERE collection = %s LIMIT 1",
@ -301,63 +467,52 @@ class KnowledgeGraph:
raise e
def delete_collection(self, collection):
"""Delete all triples for a specific collection
Uses collection_table to enumerate all triples, then deletes from all 4 tables
using full partition keys for optimal performance with compound keys.
"""
# Step 1: Read all triples from collection_table (single partition read)
"""Delete all quads for a collection from all 7 tables"""
# Read all quads from collection table
rows = self.session.execute(
f"SELECT s, p, o FROM {self.collection_table} WHERE collection = %s",
f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = %s",
(collection,)
)
# Step 2: Delete each triple from all 4 tables using full partition keys
# Batch deletions for efficiency
batch = BatchStatement()
count = 0
for row in rows:
s, p, o = row.s, row.p, row.o
g, s, p, o = row.g, row.s, row.p, row.o
# Delete from subject table (partition key: collection, s)
batch.add(self.delete_subject_stmt, (collection, s, p, o))
# Delete from predicate-object table (partition key: collection, p)
batch.add(self.delete_po_stmt, (collection, p, o, s))
# Delete from object table (partition key: collection, o)
batch.add(self.delete_object_stmt, (collection, o, s, p))
# Delete from collection table (partition key: collection only)
batch.add(self.delete_collection_stmt, (collection, s, p, o))
# Delete from all 7 tables
batch.add(self.delete_spog_stmt, (collection, s, p, o, g))
batch.add(self.delete_posg_stmt, (collection, p, o, s, g))
batch.add(self.delete_ospg_stmt, (collection, o, s, p, g))
batch.add(self.delete_gspo_stmt, (collection, g, s, p, o))
batch.add(self.delete_gpos_stmt, (collection, g, p, o, s))
batch.add(self.delete_gosp_stmt, (collection, g, o, s, p))
batch.add(self.delete_coll_stmt, (collection, g, s, p, o))
count += 1
# Execute batch every 25 triples to avoid oversized batches
# (Each triple adds ~4 statements, so 25 triples = ~100 statements)
if count % 25 == 0:
# Execute batch every 15 quads (7 deletes each = 105 statements)
if count % 15 == 0:
self.session.execute(batch)
batch = BatchStatement()
# Execute remaining deletions
if count % 25 != 0:
# Execute remaining
if count % 15 != 0:
self.session.execute(batch)
# Step 3: Delete collection metadata
# Delete collection metadata
self.session.execute(
f"DELETE FROM {self.collection_metadata_table} WHERE collection = %s",
(collection,)
)
logger.info(f"Deleted {count} triples from collection {collection}")
logger.info(f"Deleted {count} quads from collection {collection}")
def close(self):
"""Close the Cassandra session and cluster connections properly"""
"""Close connections"""
if hasattr(self, 'session') and self.session:
self.session.shutdown()
if hasattr(self, 'cluster') and self.cluster:
self.cluster.shutdown()
# Remove from global tracking
if self.cluster in _active_clusters:
_active_clusters.remove(self.cluster)

View file

@ -3,7 +3,7 @@ import json
import urllib.parse
import logging
from ....schema import Chunk, Triple, Triples, Metadata, Value
from ....schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
from ....schema import EntityContext, EntityContexts
from ....rdf import TRUSTGRAPH_ENTITIES, RDF_LABEL, SUBJECT_OF, DEFINITION
@ -253,32 +253,32 @@ class Processor(FlowProcessor):
for defn in definitions:
entity_uri = self.to_uri(defn["entity"])
# Add entity label
triples.append(Triple(
s = Value(value=entity_uri, is_uri=True),
p = Value(value=RDF_LABEL, is_uri=True),
o = Value(value=defn["entity"], is_uri=False),
s = Term(type=IRI, iri=entity_uri),
p = Term(type=IRI, iri=RDF_LABEL),
o = Term(type=LITERAL, value=defn["entity"]),
))
# Add definition
triples.append(Triple(
s = Value(value=entity_uri, is_uri=True),
p = Value(value=DEFINITION, is_uri=True),
o = Value(value=defn["definition"], is_uri=False),
s = Term(type=IRI, iri=entity_uri),
p = Term(type=IRI, iri=DEFINITION),
o = Term(type=LITERAL, value=defn["definition"]),
))
# Add subject-of relationship to document
if metadata.id:
triples.append(Triple(
s = Value(value=entity_uri, is_uri=True),
p = Value(value=SUBJECT_OF, is_uri=True),
o = Value(value=metadata.id, is_uri=True),
s = Term(type=IRI, iri=entity_uri),
p = Term(type=IRI, iri=SUBJECT_OF),
o = Term(type=IRI, iri=metadata.id),
))
# Create entity context for embeddings
entity_contexts.append(EntityContext(
entity=Value(value=entity_uri, is_uri=True),
entity=Term(type=IRI, iri=entity_uri),
context=defn["definition"]
))
@ -288,61 +288,61 @@ class Processor(FlowProcessor):
subject_uri = self.to_uri(rel["subject"])
predicate_uri = self.to_uri(rel["predicate"])
subject_value = Value(value=subject_uri, is_uri=True)
predicate_value = Value(value=predicate_uri, is_uri=True)
subject_value = Term(type=IRI, iri=subject_uri)
predicate_value = Term(type=IRI, iri=predicate_uri)
if rel.get("object-entity", True):
object_uri = self.to_uri(rel["object"])
object_value = Value(value=object_uri, is_uri=True)
object_value = Term(type=IRI, iri=object_uri)
else:
object_value = Value(value=rel["object"], is_uri=False)
object_value = Term(type=LITERAL, value=rel["object"])
# Add subject and predicate labels
triples.append(Triple(
s = subject_value,
p = Value(value=RDF_LABEL, is_uri=True),
o = Value(value=rel["subject"], is_uri=False),
p = Term(type=IRI, iri=RDF_LABEL),
o = Term(type=LITERAL, value=rel["subject"]),
))
triples.append(Triple(
s = predicate_value,
p = Value(value=RDF_LABEL, is_uri=True),
o = Value(value=rel["predicate"], is_uri=False),
p = Term(type=IRI, iri=RDF_LABEL),
o = Term(type=LITERAL, value=rel["predicate"]),
))
# Handle object (entity vs literal)
if rel.get("object-entity", True):
triples.append(Triple(
s = object_value,
p = Value(value=RDF_LABEL, is_uri=True),
o = Value(value=rel["object"], is_uri=True),
p = Term(type=IRI, iri=RDF_LABEL),
o = Term(type=LITERAL, value=rel["object"]),
))
# Add the main relationship triple
triples.append(Triple(
s = subject_value,
p = predicate_value,
o = object_value
))
# Add subject-of relationships to document
if metadata.id:
triples.append(Triple(
s = subject_value,
p = Value(value=SUBJECT_OF, is_uri=True),
o = Value(value=metadata.id, is_uri=True),
p = Term(type=IRI, iri=SUBJECT_OF),
o = Term(type=IRI, iri=metadata.id),
))
triples.append(Triple(
s = predicate_value,
p = Value(value=SUBJECT_OF, is_uri=True),
o = Value(value=metadata.id, is_uri=True),
p = Term(type=IRI, iri=SUBJECT_OF),
o = Term(type=IRI, iri=metadata.id),
))
if rel.get("object-entity", True):
triples.append(Triple(
s = object_value,
p = Value(value=SUBJECT_OF, is_uri=True),
o = Value(value=metadata.id, is_uri=True),
p = Term(type=IRI, iri=SUBJECT_OF),
o = Term(type=IRI, iri=metadata.id),
))
return triples, entity_contexts

View file

@ -9,7 +9,7 @@ import json
import urllib.parse
import logging
from .... schema import Chunk, Triple, Triples, Metadata, Value
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
@ -20,9 +20,9 @@ from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
from .... base import PromptClientSpec
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION)
RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL)
SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF)
default_ident = "kg-extract-definitions"
default_concurrency = 1
@ -142,13 +142,13 @@ class Processor(FlowProcessor):
s_uri = self.to_uri(s)
s_value = Value(value=str(s_uri), is_uri=True)
o_value = Value(value=str(o), is_uri=False)
s_value = Term(type=IRI, iri=str(s_uri))
o_value = Term(type=LITERAL, value=str(o))
triples.append(Triple(
s=s_value,
p=RDF_LABEL_VALUE,
o=Value(value=s, is_uri=False),
o=Term(type=LITERAL, value=s),
))
triples.append(Triple(
@ -158,7 +158,7 @@ class Processor(FlowProcessor):
triples.append(Triple(
s=s_value,
p=SUBJECT_OF_VALUE,
o=Value(value=v.metadata.id, is_uri=True)
o=Term(type=IRI, iri=v.metadata.id)
))
ec = EntityContext(

View file

@ -8,7 +8,7 @@ import logging
import asyncio
from typing import List, Dict, Any, Optional
from .... schema import Chunk, Triple, Triples, Metadata, Value
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
from .... schema import EntityContext, EntityContexts
from .... schema import PromptRequest, PromptResponse
from .... rdf import TRUSTGRAPH_ENTITIES, RDF_TYPE, RDF_LABEL, DEFINITION
@ -39,6 +39,14 @@ URI_PREFIXES = {
}
def make_term(v, is_uri):
"""Helper to create Term from value and is_uri flag."""
if is_uri:
return Term(type=IRI, iri=v)
else:
return Term(type=LITERAL, value=v)
class Processor(FlowProcessor):
"""Main OntoRAG extraction processor."""
@ -446,9 +454,9 @@ class Processor(FlowProcessor):
is_object_uri = False
# Create Triple object with expanded URIs
s_value = Value(value=subject_uri, is_uri=True)
p_value = Value(value=predicate_uri, is_uri=True)
o_value = Value(value=object_uri, is_uri=is_object_uri)
s_value = make_term(subject_uri, is_uri=True)
p_value = make_term(predicate_uri, is_uri=True)
o_value = make_term(object_uri, is_uri=is_object_uri)
validated_triples.append(Triple(
s=s_value,
@ -609,9 +617,9 @@ class Processor(FlowProcessor):
# rdf:type owl:Class
ontology_triples.append(Triple(
s=Value(value=class_uri, is_uri=True),
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=Value(value="http://www.w3.org/2002/07/owl#Class", is_uri=True)
s=make_term(class_uri, is_uri=True),
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=make_term("http://www.w3.org/2002/07/owl#Class", is_uri=True)
))
# rdfs:label (stored as 'labels' in OntologyClass.__dict__)
@ -620,18 +628,18 @@ class Processor(FlowProcessor):
if isinstance(labels, list) and labels:
label_val = labels[0].get('value', class_id) if isinstance(labels[0], dict) else str(labels[0])
ontology_triples.append(Triple(
s=Value(value=class_uri, is_uri=True),
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=label_val, is_uri=False)
s=make_term(class_uri, is_uri=True),
p=make_term(RDF_LABEL, is_uri=True),
o=make_term(label_val, is_uri=False)
))
# rdfs:comment (stored as 'comment' in OntologyClass.__dict__)
if isinstance(class_def, dict) and 'comment' in class_def and class_def['comment']:
comment = class_def['comment']
ontology_triples.append(Triple(
s=Value(value=class_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
o=Value(value=comment, is_uri=False)
s=make_term(class_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
o=make_term(comment, is_uri=False)
))
# rdfs:subClassOf (stored as 'subclass_of' in OntologyClass.__dict__)
@ -648,9 +656,9 @@ class Processor(FlowProcessor):
parent_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{parent}"
ontology_triples.append(Triple(
s=Value(value=class_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True),
o=Value(value=parent_uri, is_uri=True)
s=make_term(class_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True),
o=make_term(parent_uri, is_uri=True)
))
# Generate triples for object properties
@ -663,9 +671,9 @@ class Processor(FlowProcessor):
# rdf:type owl:ObjectProperty
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=Value(value="http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=make_term("http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True)
))
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
@ -674,18 +682,18 @@ class Processor(FlowProcessor):
if isinstance(labels, list) and labels:
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=label_val, is_uri=False)
s=make_term(prop_uri, is_uri=True),
p=make_term(RDF_LABEL, is_uri=True),
o=make_term(label_val, is_uri=False)
))
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
comment = prop_def['comment']
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
o=Value(value=comment, is_uri=False)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
o=make_term(comment, is_uri=False)
))
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
@ -702,9 +710,9 @@ class Processor(FlowProcessor):
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
o=Value(value=domain_uri, is_uri=True)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
o=make_term(domain_uri, is_uri=True)
))
# rdfs:range (stored as 'range' in OntologyProperty.__dict__)
@ -721,9 +729,9 @@ class Processor(FlowProcessor):
range_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{range_val}"
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
o=Value(value=range_uri, is_uri=True)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
o=make_term(range_uri, is_uri=True)
))
# Generate triples for datatype properties
@ -736,9 +744,9 @@ class Processor(FlowProcessor):
# rdf:type owl:DatatypeProperty
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=Value(value="http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
o=make_term("http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True)
))
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
@ -747,18 +755,18 @@ class Processor(FlowProcessor):
if isinstance(labels, list) and labels:
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=label_val, is_uri=False)
s=make_term(prop_uri, is_uri=True),
p=make_term(RDF_LABEL, is_uri=True),
o=make_term(label_val, is_uri=False)
))
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
comment = prop_def['comment']
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
o=Value(value=comment, is_uri=False)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
o=make_term(comment, is_uri=False)
))
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
@ -775,9 +783,9 @@ class Processor(FlowProcessor):
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
o=Value(value=domain_uri, is_uri=True)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
o=make_term(domain_uri, is_uri=True)
))
# rdfs:range (datatype)
@ -790,9 +798,9 @@ class Processor(FlowProcessor):
range_uri = range_val
ontology_triples.append(Triple(
s=Value(value=prop_uri, is_uri=True),
p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
o=Value(value=range_uri, is_uri=True)
s=make_term(prop_uri, is_uri=True),
p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
o=make_term(range_uri, is_uri=True)
))
logger.info(f"Generated {len(ontology_triples)} triples describing ontology elements")
@ -814,9 +822,9 @@ class Processor(FlowProcessor):
entity_data = {} # subject_uri -> {labels: [], definitions: []}
for triple in triples:
subject_uri = triple.s.value
predicate_uri = triple.p.value
object_val = triple.o.value
subject_uri = triple.s.iri if triple.s.type == IRI else triple.s.value
predicate_uri = triple.p.iri if triple.p.type == IRI else triple.p.value
object_val = triple.o.value if triple.o.type == LITERAL else triple.o.iri
# Initialize entity data if not exists
if subject_uri not in entity_data:
@ -824,12 +832,12 @@ class Processor(FlowProcessor):
# Collect labels (rdfs:label)
if predicate_uri == RDF_LABEL:
if not triple.o.is_uri: # Labels are literals
if triple.o.type == LITERAL: # Labels are literals
entity_data[subject_uri]['labels'].append(object_val)
# Collect definitions (skos:definition, schema:description)
elif predicate_uri == DEFINITION or predicate_uri == "https://schema.org/description":
if not triple.o.is_uri:
if triple.o.type == LITERAL:
entity_data[subject_uri]['definitions'].append(object_val)
# Build EntityContext objects
@ -848,7 +856,7 @@ class Processor(FlowProcessor):
if context_parts:
context_text = ". ".join(context_parts)
entity_contexts.append(EntityContext(
entity=Value(value=subject_uri, is_uri=True),
entity=make_term(subject_uri, is_uri=True),
context=context_text
))

View file

@ -8,7 +8,7 @@ with full URIs and correct is_uri flags.
import logging
from typing import List, Optional
from .... schema import Triple, Value
from .... schema import Triple, Term, IRI, LITERAL
from .... rdf import RDF_TYPE, RDF_LABEL
from .simplified_parser import Entity, Relationship, Attribute, ExtractionResult
@ -87,17 +87,17 @@ class TripleConverter:
# Generate type triple: entity rdf:type ClassURI
type_triple = Triple(
s=Value(value=entity_uri, is_uri=True),
p=Value(value=RDF_TYPE, is_uri=True),
o=Value(value=class_uri, is_uri=True)
s=Term(type=IRI, iri=entity_uri),
p=Term(type=IRI, iri=RDF_TYPE),
o=Term(type=IRI, iri=class_uri)
)
triples.append(type_triple)
# Generate label triple: entity rdfs:label "entity name"
label_triple = Triple(
s=Value(value=entity_uri, is_uri=True),
p=Value(value=RDF_LABEL, is_uri=True),
o=Value(value=entity.entity, is_uri=False) # Literal!
s=Term(type=IRI, iri=entity_uri),
p=Term(type=IRI, iri=RDF_LABEL),
o=Term(type=LITERAL, value=entity.entity) # Literal!
)
triples.append(label_triple)
@ -131,9 +131,9 @@ class TripleConverter:
# Generate triple: subject property object
return Triple(
s=Value(value=subject_uri, is_uri=True),
p=Value(value=property_uri, is_uri=True),
o=Value(value=object_uri, is_uri=True)
s=Term(type=IRI, iri=subject_uri),
p=Term(type=IRI, iri=property_uri),
o=Term(type=IRI, iri=object_uri)
)
def convert_attribute(self, attribute: Attribute) -> Optional[Triple]:
@ -159,9 +159,9 @@ class TripleConverter:
# Generate triple: entity property "literal value"
return Triple(
s=Value(value=entity_uri, is_uri=True),
p=Value(value=property_uri, is_uri=True),
o=Value(value=attribute.value, is_uri=False) # Literal!
s=Term(type=IRI, iri=entity_uri),
p=Term(type=IRI, iri=property_uri),
o=Term(type=LITERAL, value=attribute.value) # Literal!
)
def _get_class_uri(self, class_id: str) -> Optional[str]:

View file

@ -13,15 +13,15 @@ import urllib.parse
logger = logging.getLogger(__name__)
from .... schema import Chunk, Triple, Triples
from .... schema import Metadata, Value
from .... schema import Metadata, Term, IRI, LITERAL
from .... schema import PromptRequest, PromptResponse
from .... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES, SUBJECT_OF
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
from .... base import PromptClientSpec
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL)
SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF)
default_ident = "kg-extract-relationships"
default_concurrency = 1
@ -127,16 +127,16 @@ class Processor(FlowProcessor):
if o is None: continue
s_uri = self.to_uri(s)
s_value = Value(value=str(s_uri), is_uri=True)
s_value = Term(type=IRI, iri=str(s_uri))
p_uri = self.to_uri(p)
p_value = Value(value=str(p_uri), is_uri=True)
p_value = Term(type=IRI, iri=str(p_uri))
if rel["object-entity"]:
if rel["object-entity"]:
o_uri = self.to_uri(o)
o_value = Value(value=str(o_uri), is_uri=True)
o_value = Term(type=IRI, iri=str(o_uri))
else:
o_value = Value(value=str(o), is_uri=False)
o_value = Term(type=LITERAL, value=str(o))
triples.append(Triple(
s=s_value,
@ -148,14 +148,14 @@ class Processor(FlowProcessor):
triples.append(Triple(
s=s_value,
p=RDF_LABEL_VALUE,
o=Value(value=str(s), is_uri=False)
o=Term(type=LITERAL, value=str(s))
))
# Label for p
triples.append(Triple(
s=p_value,
p=RDF_LABEL_VALUE,
o=Value(value=str(p), is_uri=False)
o=Term(type=LITERAL, value=str(p))
))
if rel["object-entity"]:
@ -163,14 +163,14 @@ class Processor(FlowProcessor):
triples.append(Triple(
s=o_value,
p=RDF_LABEL_VALUE,
o=Value(value=str(o), is_uri=False)
o=Term(type=LITERAL, value=str(o))
))
# 'Subject of' for s
triples.append(Triple(
s=s_value,
p=SUBJECT_OF_VALUE,
o=Value(value=v.metadata.id, is_uri=True)
o=Term(type=IRI, iri=v.metadata.id)
))
if rel["object-entity"]:
@ -178,7 +178,7 @@ class Processor(FlowProcessor):
triples.append(Triple(
s=o_value,
p=SUBJECT_OF_VALUE,
o=Value(value=v.metadata.id, is_uri=True)
o=Term(type=IRI, iri=v.metadata.id)
))
await self.emit_triples(

View file

@ -11,7 +11,7 @@ import logging
# Module logger
logger = logging.getLogger(__name__)
from .... schema import Chunk, Triple, Triples, Metadata, Value
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
from .... schema import chunk_ingest_queue, triples_store_queue
from .... schema import prompt_request_queue
from .... schema import prompt_response_queue
@ -20,7 +20,7 @@ from .... clients.prompt_client import PromptClient
from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION
from .... base import ConsumerProducer
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION)
module = "kg-extract-topics"
@ -106,8 +106,8 @@ class Processor(ConsumerProducer):
s_uri = self.to_uri(s)
s_value = Value(value=str(s_uri), is_uri=True)
o_value = Value(value=str(o), is_uri=False)
s_value = Term(type=IRI, iri=str(s_uri))
o_value = Term(type=LITERAL, value=str(o))
await self.emit_edge(
v.metadata, s_value, DEFINITION_VALUE, o_value

View file

@ -1,46 +1,37 @@
import base64
from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata
from ... schema import Term, Triple, DocumentMetadata, ProcessingMetadata
from ... messaging.translators.primitives import TermTranslator, TripleTranslator
# Singleton translator instances
_term_translator = TermTranslator()
_triple_translator = TripleTranslator()
# DEPRECATED: These functions have been moved to trustgraph.... messaging.translators
# Use the new messaging translation system instead for consistency and reusability.
# Examples:
# from trustgraph.... messaging.translators.primitives import ValueTranslator
# value_translator = ValueTranslator()
# pulsar_value = value_translator.to_pulsar({"v": "example", "e": True})
def to_value(x):
return Value(value=x["v"], is_uri=x["e"])
"""Convert dict to Term. Delegates to TermTranslator."""
return _term_translator.to_pulsar(x)
def to_subgraph(x):
return [
Triple(
s=to_value(t["s"]),
p=to_value(t["p"]),
o=to_value(t["o"])
)
for t in x
]
"""Convert list of dicts to list of Triples. Delegates to TripleTranslator."""
return [_triple_translator.to_pulsar(t) for t in x]
def serialize_value(v):
return {
"v": v.value,
"e": v.is_uri,
}
"""Convert Term to dict. Delegates to TermTranslator."""
return _term_translator.from_pulsar(v)
def serialize_triple(t):
return {
"s": serialize_value(t.s),
"p": serialize_value(t.p),
"o": serialize_value(t.o)
}
"""Convert Triple to dict. Delegates to TripleTranslator."""
return _triple_translator.from_pulsar(t)
def serialize_subgraph(sg):
return [
serialize_triple(t)
for t in sg
]
"""Convert list of Triples to list of dicts."""
return [serialize_triple(t) for t in sg]
def serialize_triples(message):
return {

View file

@ -8,7 +8,7 @@ import logging
from .... direct.milvus_doc_embeddings import DocVectors
from .... schema import DocumentEmbeddingsResponse
from .... schema import Error, Value
from .... schema import Error
from .... base import DocumentEmbeddingsQueryService
# Module logger

View file

@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
from .... schema import DocumentEmbeddingsResponse
from .... schema import Error, Value
from .... schema import Error
from .... base import DocumentEmbeddingsQueryService
# Module logger

View file

@ -8,7 +8,7 @@ import logging
from .... direct.milvus_graph_embeddings import EntityVectors
from .... schema import GraphEmbeddingsResponse
from .... schema import Error, Value
from .... schema import Error, Term, IRI, LITERAL
from .... base import GraphEmbeddingsQueryService
# Module logger
@ -33,9 +33,9 @@ class Processor(GraphEmbeddingsQueryService):
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
else:
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
async def query_graph_embeddings(self, msg):

View file

@ -12,7 +12,7 @@ from pinecone import Pinecone, ServerlessSpec
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
from .... schema import GraphEmbeddingsResponse
from .... schema import Error, Value
from .... schema import Error, Term, IRI, LITERAL
from .... base import GraphEmbeddingsQueryService
# Module logger
@ -51,9 +51,9 @@ class Processor(GraphEmbeddingsQueryService):
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
else:
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
async def query_graph_embeddings(self, msg):

View file

@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct
from qdrant_client.models import Distance, VectorParams
from .... schema import GraphEmbeddingsResponse
from .... schema import Error, Value
from .... schema import Error, Term, IRI, LITERAL
from .... base import GraphEmbeddingsQueryService
# Module logger
@ -67,9 +67,9 @@ class Processor(GraphEmbeddingsQueryService):
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
else:
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
async def query_graph_embeddings(self, msg):

View file

@ -1,14 +1,14 @@
"""
Triples query service. Input is a (s, p, o) triple, some values may be
null. Output is a list of triples.
Triples query service. Input is a (s, p, o, g) quad pattern, some values may be
null. Output is a list of quads.
"""
import logging
from .... direct.cassandra_kg import KnowledgeGraph
from .... direct.cassandra_kg import KnowledgeGraph, GRAPH_WILDCARD, DEFAULT_GRAPH
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... schema import Term, Triple, IRI, LITERAL
from .... base import TriplesQueryService
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
@ -18,6 +18,27 @@ logger = logging.getLogger(__name__)
default_ident = "triples-query"
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
def create_term(value):
"""Create a Term from a string value"""
if value.startswith("http://") or value.startswith("https://"):
return Term(type=IRI, iri=value)
else:
return Term(type=LITERAL, value=value)
class Processor(TriplesQueryService):
def __init__(self, **params):
@ -46,12 +67,6 @@ class Processor(TriplesQueryService):
self.cassandra_password = password
self.table = None
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
else:
return Value(value=ent, is_uri=False)
async def query_triples(self, query):
try:
@ -72,77 +87,103 @@ class Processor(TriplesQueryService):
)
self.table = user
triples = []
# Extract values from query
s_val = get_term_value(query.s)
p_val = get_term_value(query.p)
o_val = get_term_value(query.o)
g_val = query.g # Already a string or None
if query.s is not None:
if query.p is not None:
if query.o is not None:
quads = []
# Route to appropriate query method based on which fields are specified
if s_val is not None:
if p_val is not None:
if o_val is not None:
# SPO specified - find matching graphs
resp = self.tg.get_spo(
query.collection, query.s.value, query.p.value, query.o.value,
query.collection, s_val, p_val, o_val, g=g_val,
limit=query.limit
)
triples.append((query.s.value, query.p.value, query.o.value))
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, p_val, o_val, g))
else:
# SP specified
resp = self.tg.get_sp(
query.collection, query.s.value, query.p.value,
query.collection, s_val, p_val, g=g_val,
limit=query.limit
)
for t in resp:
triples.append((query.s.value, query.p.value, t.o))
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, p_val, t.o, g))
else:
if query.o is not None:
if o_val is not None:
# SO specified
resp = self.tg.get_os(
query.collection, query.o.value, query.s.value,
query.collection, o_val, s_val, g=g_val,
limit=query.limit
)
for t in resp:
triples.append((query.s.value, t.p, query.o.value))
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, t.p, o_val, g))
else:
# S only
resp = self.tg.get_s(
query.collection, query.s.value,
query.collection, s_val, g=g_val,
limit=query.limit
)
for t in resp:
triples.append((query.s.value, t.p, t.o))
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, t.p, t.o, g))
else:
if query.p is not None:
if query.o is not None:
if p_val is not None:
if o_val is not None:
# PO specified
resp = self.tg.get_po(
query.collection, query.p.value, query.o.value,
query.collection, p_val, o_val, g=g_val,
limit=query.limit
)
for t in resp:
triples.append((t.s, query.p.value, query.o.value))
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, p_val, o_val, g))
else:
# P only
resp = self.tg.get_p(
query.collection, query.p.value,
query.collection, p_val, g=g_val,
limit=query.limit
)
for t in resp:
triples.append((t.s, query.p.value, t.o))
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, p_val, t.o, g))
else:
if query.o is not None:
if o_val is not None:
# O only
resp = self.tg.get_o(
query.collection, query.o.value,
query.collection, o_val, g=g_val,
limit=query.limit
)
for t in resp:
triples.append((t.s, t.p, query.o.value))
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, t.p, o_val, g))
else:
# Nothing specified - get all
resp = self.tg.get_all(
query.collection,
limit=query.limit
)
for t in resp:
triples.append((t.s, t.p, t.o))
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, t.p, t.o, g))
# Convert to Triple objects (with g field)
triples = [
Triple(
s=self.create_value(t[0]),
p=self.create_value(t[1]),
o=self.create_value(t[2])
s=create_term(q[0]),
p=create_term(q[1]),
o=create_term(q[2]),
g=q[3] if q[3] != DEFAULT_GRAPH else None
)
for t in triples
for q in quads
]
return triples
@ -162,4 +203,3 @@ class Processor(TriplesQueryService):
def run():
Processor.launch(default_ident, __doc__)

View file

@ -10,12 +10,24 @@ import logging
from falkordb import FalkorDB
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... schema import Term, Triple, IRI, LITERAL
from .... base import TriplesQueryService
# Module logger
logger = logging.getLogger(__name__)
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
return term.id or term.value
default_ident = "triples-query"
default_graph_url = 'falkor://falkordb:6379'
@ -42,9 +54,9 @@ class Processor(TriplesQueryService):
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
else:
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
async def query_triples(self, query):
@ -63,28 +75,28 @@ class Processor(TriplesQueryService):
"RETURN $src as src "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"rel": query.p.value,
"value": query.o.value,
"src": get_term_value(query.s),
"rel": get_term_value(query.p),
"value": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((query.s.value, query.p.value, query.o.value))
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
records = self.io.query(
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node {uri: $uri}) "
"RETURN $src as src "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"rel": query.p.value,
"uri": query.o.value,
"src": get_term_value(query.s),
"rel": get_term_value(query.p),
"uri": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((query.s.value, query.p.value, query.o.value))
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
else:
@ -95,26 +107,26 @@ class Processor(TriplesQueryService):
"RETURN dest.value as dest "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"rel": query.p.value,
"src": get_term_value(query.s),
"rel": get_term_value(query.p),
},
).result_set
for rec in records:
triples.append((query.s.value, query.p.value, rec[0]))
triples.append((get_term_value(query.s), get_term_value(query.p), rec[0]))
records = self.io.query(
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node) "
"RETURN dest.uri as dest "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"rel": query.p.value,
"src": get_term_value(query.s),
"rel": get_term_value(query.p),
},
).result_set
for rec in records:
triples.append((query.s.value, query.p.value, rec[0]))
triples.append((get_term_value(query.s), get_term_value(query.p), rec[0]))
else:
@ -127,26 +139,26 @@ class Processor(TriplesQueryService):
"RETURN rel.uri as rel "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"value": query.o.value,
"src": get_term_value(query.s),
"value": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((query.s.value, rec[0], query.o.value))
triples.append((get_term_value(query.s), rec[0], get_term_value(query.o)))
records = self.io.query(
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node {uri: $uri}) "
"RETURN rel.uri as rel "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"uri": query.o.value,
"src": get_term_value(query.s),
"uri": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((query.s.value, rec[0], query.o.value))
triples.append((get_term_value(query.s), rec[0], get_term_value(query.o)))
else:
@ -157,24 +169,24 @@ class Processor(TriplesQueryService):
"RETURN rel.uri as rel, dest.value as dest "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"src": get_term_value(query.s),
},
).result_set
for rec in records:
triples.append((query.s.value, rec[0], rec[1]))
triples.append((get_term_value(query.s), rec[0], rec[1]))
records = self.io.query(
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node) "
"RETURN rel.uri as rel, dest.uri as dest "
"LIMIT " + str(query.limit),
params={
"src": query.s.value,
"src": get_term_value(query.s),
},
).result_set
for rec in records:
triples.append((query.s.value, rec[0], rec[1]))
triples.append((get_term_value(query.s), rec[0], rec[1]))
else:
@ -190,26 +202,26 @@ class Processor(TriplesQueryService):
"RETURN src.uri as src "
"LIMIT " + str(query.limit),
params={
"uri": query.p.value,
"value": query.o.value,
"uri": get_term_value(query.p),
"value": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((rec[0], query.p.value, query.o.value))
triples.append((rec[0], get_term_value(query.p), get_term_value(query.o)))
records = self.io.query(
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node {uri: $dest}) "
"RETURN src.uri as src "
"LIMIT " + str(query.limit),
params={
"uri": query.p.value,
"dest": query.o.value,
"uri": get_term_value(query.p),
"dest": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((rec[0], query.p.value, query.o.value))
triples.append((rec[0], get_term_value(query.p), get_term_value(query.o)))
else:
@ -220,24 +232,24 @@ class Processor(TriplesQueryService):
"RETURN src.uri as src, dest.value as dest "
"LIMIT " + str(query.limit),
params={
"uri": query.p.value,
"uri": get_term_value(query.p),
},
).result_set
for rec in records:
triples.append((rec[0], query.p.value, rec[1]))
triples.append((rec[0], get_term_value(query.p), rec[1]))
records = self.io.query(
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node) "
"RETURN src.uri as src, dest.uri as dest "
"LIMIT " + str(query.limit),
params={
"uri": query.p.value,
"uri": get_term_value(query.p),
},
).result_set
for rec in records:
triples.append((rec[0], query.p.value, rec[1]))
triples.append((rec[0], get_term_value(query.p), rec[1]))
else:
@ -250,24 +262,24 @@ class Processor(TriplesQueryService):
"RETURN src.uri as src, rel.uri as rel "
"LIMIT " + str(query.limit),
params={
"value": query.o.value,
"value": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((rec[0], rec[1], query.o.value))
triples.append((rec[0], rec[1], get_term_value(query.o)))
records = self.io.query(
"MATCH (src:Node)-[rel:Rel]->(dest:Node {uri: $uri}) "
"RETURN src.uri as src, rel.uri as rel "
"LIMIT " + str(query.limit),
params={
"uri": query.o.value,
"uri": get_term_value(query.o),
},
).result_set
for rec in records:
triples.append((rec[0], rec[1], query.o.value))
triples.append((rec[0], rec[1], get_term_value(query.o)))
else:

View file

@ -10,12 +10,24 @@ import logging
from neo4j import GraphDatabase
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... schema import Term, Triple, IRI, LITERAL
from .... base import TriplesQueryService
# Module logger
logger = logging.getLogger(__name__)
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
return term.id or term.value
default_ident = "triples-query"
default_graph_host = 'bolt://memgraph:7687'
@ -47,9 +59,9 @@ class Processor(TriplesQueryService):
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
else:
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
async def query_triples(self, query):
@ -73,13 +85,13 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN $src as src "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value, value=query.o.value,
src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
triples.append((query.s.value, query.p.value, query.o.value))
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -87,13 +99,13 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
"RETURN $src as src "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value, uri=query.o.value,
src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
triples.append((query.s.value, query.p.value, query.o.value))
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
else:
@ -105,14 +117,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN dest.value as dest "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value,
src=get_term_value(query.s), rel=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, query.p.value, data["dest"]))
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -120,14 +132,14 @@ class Processor(TriplesQueryService):
"(dest:Node {user: $user, collection: $collection}) "
"RETURN dest.uri as dest "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value,
src=get_term_value(query.s), rel=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, query.p.value, data["dest"]))
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
else:
@ -141,14 +153,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN rel.uri as rel "
"LIMIT " + str(query.limit),
src=query.s.value, value=query.o.value,
src=get_term_value(query.s), value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], query.o.value))
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -156,14 +168,14 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
"RETURN rel.uri as rel "
"LIMIT " + str(query.limit),
src=query.s.value, uri=query.o.value,
src=get_term_value(query.s), uri=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], query.o.value))
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
else:
@ -175,14 +187,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN rel.uri as rel, dest.value as dest "
"LIMIT " + str(query.limit),
src=query.s.value,
src=get_term_value(query.s),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], data["dest"]))
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -190,14 +202,14 @@ class Processor(TriplesQueryService):
"(dest:Node {user: $user, collection: $collection}) "
"RETURN rel.uri as rel, dest.uri as dest "
"LIMIT " + str(query.limit),
src=query.s.value,
src=get_term_value(query.s),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], data["dest"]))
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
else:
@ -214,14 +226,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN src.uri as src "
"LIMIT " + str(query.limit),
uri=query.p.value, value=query.o.value,
uri=get_term_value(query.p), value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, query.o.value))
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {user: $user, collection: $collection})-"
@ -229,14 +241,14 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
"RETURN src.uri as src "
"LIMIT " + str(query.limit),
uri=query.p.value, dest=query.o.value,
uri=get_term_value(query.p), dest=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, query.o.value))
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
else:
@ -248,14 +260,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN src.uri as src, dest.value as dest "
"LIMIT " + str(query.limit),
uri=query.p.value,
uri=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, data["dest"]))
triples.append((data["src"], get_term_value(query.p), data["dest"]))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {user: $user, collection: $collection})-"
@ -263,14 +275,14 @@ class Processor(TriplesQueryService):
"(dest:Node {user: $user, collection: $collection}) "
"RETURN src.uri as src, dest.uri as dest "
"LIMIT " + str(query.limit),
uri=query.p.value,
uri=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, data["dest"]))
triples.append((data["src"], get_term_value(query.p), data["dest"]))
else:
@ -284,14 +296,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN src.uri as src, rel.uri as rel "
"LIMIT " + str(query.limit),
value=query.o.value,
value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], data["rel"], query.o.value))
triples.append((data["src"], data["rel"], get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {user: $user, collection: $collection})-"
@ -299,14 +311,14 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
"RETURN src.uri as src, rel.uri as rel "
"LIMIT " + str(query.limit),
uri=query.o.value,
uri=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], data["rel"], query.o.value))
triples.append((data["src"], data["rel"], get_term_value(query.o)))
else:

View file

@ -10,12 +10,24 @@ import logging
from neo4j import GraphDatabase
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Value, Triple
from .... schema import Term, Triple, IRI, LITERAL
from .... base import TriplesQueryService
# Module logger
logger = logging.getLogger(__name__)
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
return term.id or term.value
default_ident = "triples-query"
default_graph_host = 'bolt://neo4j:7687'
@ -47,9 +59,9 @@ class Processor(TriplesQueryService):
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
return Value(value=ent, is_uri=True)
return Term(type=IRI, iri=ent)
else:
return Value(value=ent, is_uri=False)
return Term(type=LITERAL, value=ent)
async def query_triples(self, query):
@ -73,13 +85,13 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN $src as src "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value, value=query.o.value,
src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
triples.append((query.s.value, query.p.value, query.o.value))
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -87,13 +99,13 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
"RETURN $src as src "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value, uri=query.o.value,
src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
triples.append((query.s.value, query.p.value, query.o.value))
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
else:
@ -105,14 +117,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN dest.value as dest "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value,
src=get_term_value(query.s), rel=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, query.p.value, data["dest"]))
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -120,14 +132,14 @@ class Processor(TriplesQueryService):
"(dest:Node {user: $user, collection: $collection}) "
"RETURN dest.uri as dest "
"LIMIT " + str(query.limit),
src=query.s.value, rel=query.p.value,
src=get_term_value(query.s), rel=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, query.p.value, data["dest"]))
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
else:
@ -141,14 +153,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN rel.uri as rel "
"LIMIT " + str(query.limit),
src=query.s.value, value=query.o.value,
src=get_term_value(query.s), value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], query.o.value))
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -156,14 +168,14 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
"RETURN rel.uri as rel "
"LIMIT " + str(query.limit),
src=query.s.value, uri=query.o.value,
src=get_term_value(query.s), uri=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], query.o.value))
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
else:
@ -175,14 +187,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN rel.uri as rel, dest.value as dest "
"LIMIT " + str(query.limit),
src=query.s.value,
src=get_term_value(query.s),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], data["dest"]))
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
@ -190,14 +202,14 @@ class Processor(TriplesQueryService):
"(dest:Node {user: $user, collection: $collection}) "
"RETURN rel.uri as rel, dest.uri as dest "
"LIMIT " + str(query.limit),
src=query.s.value,
src=get_term_value(query.s),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((query.s.value, data["rel"], data["dest"]))
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
else:
@ -214,14 +226,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN src.uri as src "
"LIMIT " + str(query.limit),
uri=query.p.value, value=query.o.value,
uri=get_term_value(query.p), value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, query.o.value))
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {user: $user, collection: $collection})-"
@ -229,14 +241,14 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
"RETURN src.uri as src "
"LIMIT " + str(query.limit),
uri=query.p.value, dest=query.o.value,
uri=get_term_value(query.p), dest=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, query.o.value))
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
else:
@ -248,14 +260,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {user: $user, collection: $collection}) "
"RETURN src.uri as src, dest.value as dest "
"LIMIT " + str(query.limit),
uri=query.p.value,
uri=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, data["dest"]))
triples.append((data["src"], get_term_value(query.p), data["dest"]))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {user: $user, collection: $collection})-"
@ -263,14 +275,14 @@ class Processor(TriplesQueryService):
"(dest:Node {user: $user, collection: $collection}) "
"RETURN src.uri as src, dest.uri as dest "
"LIMIT " + str(query.limit),
uri=query.p.value,
uri=get_term_value(query.p),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], query.p.value, data["dest"]))
triples.append((data["src"], get_term_value(query.p), data["dest"]))
else:
@ -284,14 +296,14 @@ class Processor(TriplesQueryService):
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
"RETURN src.uri as src, rel.uri as rel "
"LIMIT " + str(query.limit),
value=query.o.value,
value=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], data["rel"], query.o.value))
triples.append((data["src"], data["rel"], get_term_value(query.o)))
records, summary, keys = self.io.execute_query(
"MATCH (src:Node {user: $user, collection: $collection})-"
@ -299,14 +311,14 @@ class Processor(TriplesQueryService):
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
"RETURN src.uri as src, rel.uri as rel "
"LIMIT " + str(query.limit),
uri=query.o.value,
uri=get_term_value(query.o),
user=user, collection=collection,
database_=self.db,
)
for rec in records:
data = rec.data()
triples.append((data["src"], data["rel"], query.o.value))
triples.append((data["src"], data["rel"], get_term_value(query.o)))
else:

View file

@ -9,10 +9,24 @@ from .... direct.milvus_graph_embeddings import EntityVectors
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... schema import IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
default_ident = "ge-write"
default_store_uri = 'http://localhost:19530'
@ -36,11 +50,12 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
async def store_graph_embeddings(self, message):
for entity in message.entities:
entity_value = get_term_value(entity.entity)
if entity.entity.value != "" and entity.entity.value is not None:
if entity_value != "" and entity_value is not None:
for vec in entity.vectors:
self.vecstore.insert(
vec, entity.entity.value,
vec, entity_value,
message.metadata.user,
message.metadata.collection
)

View file

@ -14,10 +14,24 @@ import logging
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... schema import IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
default_ident = "ge-write"
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
default_cloud = "aws"
@ -100,8 +114,9 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
return
for entity in message.entities:
entity_value = get_term_value(entity.entity)
if entity.entity.value == "" or entity.entity.value is None:
if entity_value == "" or entity_value is None:
continue
for vec in entity.vectors:
@ -126,7 +141,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
{
"id": vector_id,
"values": vec,
"metadata": { "entity": entity.entity.value },
"metadata": { "entity": entity_value },
}
]

View file

@ -12,10 +12,25 @@ import logging
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... schema import IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
default_ident = "ge-write"
default_store_uri = 'http://localhost:6333'
@ -51,8 +66,10 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
return
for entity in message.entities:
entity_value = get_term_value(entity.entity)
if entity.entity.value == "" or entity.entity.value is None: return
if entity_value == "" or entity_value is None:
continue
for vec in entity.vectors:
@ -80,7 +97,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
id=str(uuid.uuid4()),
vector=vec,
payload={
"entity": entity.entity.value,
"entity": entity_value,
}
)
]

View file

@ -10,11 +10,12 @@ import argparse
import time
import logging
from .... direct.cassandra_kg import KnowledgeGraph
from .... direct.cassandra_kg import KnowledgeGraph, DEFAULT_GRAPH
from .... base import TriplesStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
from .... schema import IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
@ -22,6 +23,19 @@ logger = logging.getLogger(__name__)
default_ident = "triples-write"
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
class Processor(CollectionConfigHandler, TriplesStoreService):
def __init__(self, **params):
@ -84,11 +98,19 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
self.table = user
for t in message.triples:
# Extract values from Term objects
s_val = get_term_value(t.s)
p_val = get_term_value(t.p)
o_val = get_term_value(t.o)
# t.g is None for default graph, or a graph IRI
g_val = t.g if t.g is not None else DEFAULT_GRAPH
self.tg.insert(
message.metadata.collection,
t.s.value,
t.p.value,
t.o.value
s_val,
p_val,
o_val,
g=g_val
)
async def create_collection(self, user: str, collection: str, metadata: dict):

View file

@ -15,12 +15,27 @@ from falkordb import FalkorDB
from .... base import TriplesStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... schema import IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
default_graph_url = 'falkor://falkordb:6379'
default_database = 'falkordb'
@ -164,14 +179,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
for t in message.triples:
self.create_node(t.s.value, user, collection)
s_val = get_term_value(t.s)
p_val = get_term_value(t.p)
o_val = get_term_value(t.o)
if t.o.is_uri:
self.create_node(t.o.value, user, collection)
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
self.create_node(s_val, user, collection)
if t.o.type == IRI:
self.create_node(o_val, user, collection)
self.relate_node(s_val, p_val, o_val, user, collection)
else:
self.create_literal(t.o.value, user, collection)
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
self.create_literal(o_val, user, collection)
self.relate_literal(s_val, p_val, o_val, user, collection)
@staticmethod
def add_args(parser):

View file

@ -15,12 +15,27 @@ from neo4j import GraphDatabase
from .... base import TriplesStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... schema import IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
default_graph_host = 'bolt://memgraph:7687'
default_username = 'memgraph'
default_password = 'password'
@ -204,40 +219,44 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
def create_triple(self, tx, t, user, collection):
s_val = get_term_value(t.s)
p_val = get_term_value(t.p)
o_val = get_term_value(t.o)
# Create new s node with given uri, if not exists
result = tx.run(
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri=t.s.value, user=user, collection=collection
uri=s_val, user=user, collection=collection
)
if t.o.is_uri:
if t.o.type == IRI:
# Create new o node with given uri, if not exists
result = tx.run(
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
uri=t.o.value, user=user, collection=collection
uri=o_val, user=user, collection=collection
)
result = tx.run(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection,
src=s_val, dest=o_val, uri=p_val, user=user, collection=collection,
)
else:
# Create new o literal with given uri, if not exists
result = tx.run(
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
value=t.o.value, user=user, collection=collection
value=o_val, user=user, collection=collection
)
result = tx.run(
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection,
src=s_val, dest=o_val, uri=p_val, user=user, collection=collection,
)
async def store_triples(self, message):
@ -257,14 +276,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
for t in message.triples:
self.create_node(t.s.value, user, collection)
s_val = get_term_value(t.s)
p_val = get_term_value(t.p)
o_val = get_term_value(t.o)
if t.o.is_uri:
self.create_node(t.o.value, user, collection)
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
self.create_node(s_val, user, collection)
if t.o.type == IRI:
self.create_node(o_val, user, collection)
self.relate_node(s_val, p_val, o_val, user, collection)
else:
self.create_literal(t.o.value, user, collection)
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
self.create_literal(o_val, user, collection)
self.relate_literal(s_val, p_val, o_val, user, collection)
# Alternative implementation using transactions
# with self.io.session(database=self.db) as session:

View file

@ -14,12 +14,27 @@ from neo4j import GraphDatabase
from .... base import TriplesStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... schema import IRI, LITERAL
# Module logger
logger = logging.getLogger(__name__)
default_ident = "triples-write"
def get_term_value(term):
"""Extract the string value from a Term"""
if term is None:
return None
if term.type == IRI:
return term.iri
elif term.type == LITERAL:
return term.value
else:
# For blank nodes or other types, use id or value
return term.id or term.value
default_graph_host = 'bolt://neo4j:7687'
default_username = 'neo4j'
default_password = 'password'
@ -212,14 +227,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
for t in message.triples:
self.create_node(t.s.value, user, collection)
s_val = get_term_value(t.s)
p_val = get_term_value(t.p)
o_val = get_term_value(t.o)
if t.o.is_uri:
self.create_node(t.o.value, user, collection)
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
self.create_node(s_val, user, collection)
if t.o.type == IRI:
self.create_node(o_val, user, collection)
self.relate_node(s_val, p_val, o_val, user, collection)
else:
self.create_literal(t.o.value, user, collection)
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
self.create_literal(o_val, user, collection)
self.relate_literal(s_val, p_val, o_val, user, collection)
@staticmethod
def add_args(parser):

View file

@ -1,6 +1,6 @@
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
from .. schema import Metadata, Value, GraphEmbeddings
from .. schema import Metadata, GraphEmbeddings
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

View file

@ -1,8 +1,24 @@
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
from .. schema import Metadata, Value, GraphEmbeddings
from .. schema import Metadata, Term, IRI, LITERAL, GraphEmbeddings
from cassandra.cluster import Cluster
def term_to_tuple(term):
"""Convert Term to (value, is_uri) tuple for database storage."""
if term.type == IRI:
return (term.iri, True)
else: # LITERAL
return (term.value, False)
def tuple_to_term(value, is_uri):
"""Convert (value, is_uri) tuple from database to Term."""
if is_uri:
return Term(type=IRI, iri=value)
else:
return Term(type=LITERAL, value=value)
from cassandra.auth import PlainTextAuthProvider
from ssl import SSLContext, PROTOCOL_TLSv1_2
@ -205,8 +221,7 @@ class KnowledgeTableStore:
if m.metadata.metadata:
metadata = [
(
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
v.o.value, v.o.is_uri
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
)
for v in m.metadata.metadata
]
@ -215,8 +230,7 @@ class KnowledgeTableStore:
triples = [
(
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
v.o.value, v.o.is_uri
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
)
for v in m.triples
]
@ -248,8 +262,7 @@ class KnowledgeTableStore:
if m.metadata.metadata:
metadata = [
(
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
v.o.value, v.o.is_uri
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
)
for v in m.metadata.metadata
]
@ -258,7 +271,7 @@ class KnowledgeTableStore:
entities = [
(
(v.entity.value, v.entity.is_uri),
term_to_tuple(v.entity),
v.vectors
)
for v in m.entities
@ -291,8 +304,7 @@ class KnowledgeTableStore:
if m.metadata.metadata:
metadata = [
(
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
v.o.value, v.o.is_uri
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
)
for v in m.metadata.metadata
]
@ -414,9 +426,9 @@ class KnowledgeTableStore:
if row[2]:
metadata = [
Triple(
s = Value(value = elt[0], is_uri = elt[1]),
p = Value(value = elt[2], is_uri = elt[3]),
o = Value(value = elt[4], is_uri = elt[5]),
s = tuple_to_term(elt[0], elt[1]),
p = tuple_to_term(elt[2], elt[3]),
o = tuple_to_term(elt[4], elt[5]),
)
for elt in row[2]
]
@ -425,9 +437,9 @@ class KnowledgeTableStore:
triples = [
Triple(
s = Value(value = elt[0], is_uri = elt[1]),
p = Value(value = elt[2], is_uri = elt[3]),
o = Value(value = elt[4], is_uri = elt[5]),
s = tuple_to_term(elt[0], elt[1]),
p = tuple_to_term(elt[2], elt[3]),
o = tuple_to_term(elt[4], elt[5]),
)
for elt in row[3]
]
@ -470,9 +482,9 @@ class KnowledgeTableStore:
if row[2]:
metadata = [
Triple(
s = Value(value = elt[0], is_uri = elt[1]),
p = Value(value = elt[2], is_uri = elt[3]),
o = Value(value = elt[4], is_uri = elt[5]),
s = tuple_to_term(elt[0], elt[1]),
p = tuple_to_term(elt[2], elt[3]),
o = tuple_to_term(elt[4], elt[5]),
)
for elt in row[2]
]
@ -481,7 +493,7 @@ class KnowledgeTableStore:
entities = [
EntityEmbeddings(
entity = Value(value = ent[0][0], is_uri = ent[0][1]),
entity = tuple_to_term(ent[0][0], ent[0][1]),
vectors = ent[1]
)
for ent in row[3]

View file

@ -1,8 +1,24 @@
from .. schema import LibrarianRequest, LibrarianResponse
from .. schema import DocumentMetadata, ProcessingMetadata
from .. schema import Error, Triple, Value
from .. schema import Error, Triple, Term, IRI, LITERAL
from .. knowledge import hash
def term_to_tuple(term):
"""Convert Term to (value, is_uri) tuple for database storage."""
if term.type == IRI:
return (term.iri, True)
else: # LITERAL
return (term.value, False)
def tuple_to_term(value, is_uri):
"""Convert (value, is_uri) tuple from database to Term."""
if is_uri:
return Term(type=IRI, iri=value)
else:
return Term(type=LITERAL, value=value)
from .. exceptions import RequestError
from cassandra.cluster import Cluster
@ -215,8 +231,7 @@ class LibraryTableStore:
metadata = [
(
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
v.o.value, v.o.is_uri
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
)
for v in document.metadata
]
@ -249,8 +264,7 @@ class LibraryTableStore:
metadata = [
(
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
v.o.value, v.o.is_uri
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
)
for v in document.metadata
]
@ -331,9 +345,9 @@ class LibraryTableStore:
comments = row[4],
metadata = [
Triple(
s=Value(value=m[0], is_uri=m[1]),
p=Value(value=m[2], is_uri=m[3]),
o=Value(value=m[4], is_uri=m[5])
s=tuple_to_term(m[0], m[1]),
p=tuple_to_term(m[2], m[3]),
o=tuple_to_term(m[4], m[5])
)
for m in row[5]
],
@ -376,9 +390,9 @@ class LibraryTableStore:
comments = row[3],
metadata = [
Triple(
s=Value(value=m[0], is_uri=m[1]),
p=Value(value=m[2], is_uri=m[3]),
o=Value(value=m[4], is_uri=m[5])
s=tuple_to_term(m[0], m[1]),
p=tuple_to_term(m[2], m[3]),
o=tuple_to_term(m[4], m[5])
)
for m in row[4]
],