mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-30 19:06:21 +02:00
Changed schema for Value -> Term, majorly breaking change (#622)
* Changed schema for Value -> Term, majorly breaking change * Following the schema change, Value -> Term into all processing * Updated Cassandra for g, p, s, o index patterns (7 indexes) * Reviewed and updated all tests * Neo4j, Memgraph and FalkorDB remain broken, will look at once settled down
This commit is contained in:
parent
e061f2c633
commit
cf0daedefa
86 changed files with 2458 additions and 1764 deletions
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
dependencies = [
|
||||
"trustgraph-base>=1.9,<1.10",
|
||||
"trustgraph-base>=2.0,<2.1",
|
||||
"aiohttp",
|
||||
"anthropic",
|
||||
"scylla-driver",
|
||||
|
|
|
|||
|
|
@ -11,7 +11,24 @@ _active_clusters = []
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Sentinel value for wildcard graph queries
|
||||
GRAPH_WILDCARD = "*"
|
||||
|
||||
# Default graph stored as empty string
|
||||
DEFAULT_GRAPH = ""
|
||||
|
||||
|
||||
class KnowledgeGraph:
|
||||
"""
|
||||
Cassandra-backed knowledge graph supporting quads (s, p, o, g).
|
||||
|
||||
Uses 7 tables to support all 16 query patterns efficiently:
|
||||
- Family A (g-wildcard): SPOG, POSG, OSPG
|
||||
- Family B (g-specified): GSPO, GPOS, GOSP
|
||||
- Collection table: COLL (for iteration/deletion)
|
||||
|
||||
Plus a metadata table for tracking collections.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, hosts=None,
|
||||
|
|
@ -24,12 +41,22 @@ class KnowledgeGraph:
|
|||
self.keyspace = keyspace
|
||||
self.username = username
|
||||
|
||||
# Optimized multi-table schema with collection deletion support
|
||||
self.subject_table = "triples_s"
|
||||
self.po_table = "triples_p"
|
||||
self.object_table = "triples_o"
|
||||
self.collection_table = "triples_collection" # For SPO queries and deletion
|
||||
self.collection_metadata_table = "collection_metadata" # For tracking which collections exist
|
||||
# 7-table schema for quads with full query pattern support
|
||||
# Family A: g-wildcard queries (g in clustering columns)
|
||||
self.spog_table = "quads_spog" # partition (collection, s), cluster (p, o, g)
|
||||
self.posg_table = "quads_posg" # partition (collection, p), cluster (o, s, g)
|
||||
self.ospg_table = "quads_ospg" # partition (collection, o), cluster (s, p, g)
|
||||
|
||||
# Family B: g-specified queries (g in partition key)
|
||||
self.gspo_table = "quads_gspo" # partition (collection, g, s), cluster (p, o)
|
||||
self.gpos_table = "quads_gpos" # partition (collection, g, p), cluster (o, s)
|
||||
self.gosp_table = "quads_gosp" # partition (collection, g, o), cluster (s, p)
|
||||
|
||||
# Collection table for iteration and bulk deletion
|
||||
self.coll_table = "quads_coll" # partition (collection), cluster (g, s, p, o)
|
||||
|
||||
# Collection metadata tracking
|
||||
self.collection_metadata_table = "collection_metadata"
|
||||
|
||||
if username and password:
|
||||
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
|
||||
|
|
@ -46,237 +73,376 @@ class KnowledgeGraph:
|
|||
self.prepare_statements()
|
||||
|
||||
def clear(self):
|
||||
|
||||
self.session.execute(f"""
|
||||
drop keyspace if exists {self.keyspace};
|
||||
""");
|
||||
|
||||
""")
|
||||
self.init()
|
||||
|
||||
def init(self):
|
||||
|
||||
self.session.execute(f"""
|
||||
create keyspace if not exists {self.keyspace}
|
||||
with replication = {{
|
||||
'class' : 'SimpleStrategy',
|
||||
'replication_factor' : 1
|
||||
}};
|
||||
""");
|
||||
""")
|
||||
|
||||
self.session.set_keyspace(self.keyspace)
|
||||
self.init_optimized_schema()
|
||||
self.init_quad_schema()
|
||||
|
||||
def init_quad_schema(self):
|
||||
"""Initialize 7-table schema for quads with full query pattern support"""
|
||||
|
||||
def init_optimized_schema(self):
|
||||
"""Initialize optimized multi-table schema for performance"""
|
||||
# Table 1: Subject-centric queries (get_s, get_sp, get_os)
|
||||
# Compound partition key for optimal data distribution
|
||||
# Family A: g-wildcard queries (g in clustering columns)
|
||||
|
||||
# SPOG: partition (collection, s), cluster (p, o, g)
|
||||
# Supports: (?, s, ?, ?), (?, s, p, ?), (?, s, p, o)
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.subject_table} (
|
||||
CREATE TABLE IF NOT EXISTS {self.spog_table} (
|
||||
collection text,
|
||||
s text,
|
||||
p text,
|
||||
o text,
|
||||
PRIMARY KEY ((collection, s), p, o)
|
||||
g text,
|
||||
PRIMARY KEY ((collection, s), p, o, g)
|
||||
);
|
||||
""");
|
||||
""")
|
||||
|
||||
# Table 2: Predicate-Object queries (get_p, get_po) - eliminates ALLOW FILTERING!
|
||||
# Compound partition key for optimal data distribution
|
||||
# POSG: partition (collection, p), cluster (o, s, g)
|
||||
# Supports: (?, ?, p, ?), (?, ?, p, o)
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.po_table} (
|
||||
CREATE TABLE IF NOT EXISTS {self.posg_table} (
|
||||
collection text,
|
||||
p text,
|
||||
o text,
|
||||
s text,
|
||||
PRIMARY KEY ((collection, p), o, s)
|
||||
g text,
|
||||
PRIMARY KEY ((collection, p), o, s, g)
|
||||
);
|
||||
""");
|
||||
""")
|
||||
|
||||
# Table 3: Object-centric queries (get_o)
|
||||
# Compound partition key for optimal data distribution
|
||||
# OSPG: partition (collection, o), cluster (s, p, g)
|
||||
# Supports: (?, ?, ?, o), (?, s, ?, o)
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.object_table} (
|
||||
CREATE TABLE IF NOT EXISTS {self.ospg_table} (
|
||||
collection text,
|
||||
o text,
|
||||
s text,
|
||||
p text,
|
||||
PRIMARY KEY ((collection, o), s, p)
|
||||
g text,
|
||||
PRIMARY KEY ((collection, o), s, p, g)
|
||||
);
|
||||
""");
|
||||
""")
|
||||
|
||||
# Table 4: Collection management and SPO queries (get_spo)
|
||||
# Simple partition key enables efficient collection deletion
|
||||
# Family B: g-specified queries (g in partition key)
|
||||
|
||||
# GSPO: partition (collection, g, s), cluster (p, o)
|
||||
# Supports: (g, s, ?, ?), (g, s, p, ?), (g, s, p, o)
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.collection_table} (
|
||||
CREATE TABLE IF NOT EXISTS {self.gspo_table} (
|
||||
collection text,
|
||||
g text,
|
||||
s text,
|
||||
p text,
|
||||
o text,
|
||||
PRIMARY KEY (collection, s, p, o)
|
||||
PRIMARY KEY ((collection, g, s), p, o)
|
||||
);
|
||||
""");
|
||||
""")
|
||||
|
||||
# Table 5: Collection metadata tracking
|
||||
# Tracks which collections exist without polluting triple data
|
||||
# GPOS: partition (collection, g, p), cluster (o, s)
|
||||
# Supports: (g, ?, p, ?), (g, ?, p, o)
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.gpos_table} (
|
||||
collection text,
|
||||
g text,
|
||||
p text,
|
||||
o text,
|
||||
s text,
|
||||
PRIMARY KEY ((collection, g, p), o, s)
|
||||
);
|
||||
""")
|
||||
|
||||
# GOSP: partition (collection, g, o), cluster (s, p)
|
||||
# Supports: (g, ?, ?, o), (g, s, ?, o)
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.gosp_table} (
|
||||
collection text,
|
||||
g text,
|
||||
o text,
|
||||
s text,
|
||||
p text,
|
||||
PRIMARY KEY ((collection, g, o), s, p)
|
||||
);
|
||||
""")
|
||||
|
||||
# Collection table for iteration and bulk deletion
|
||||
# COLL: partition (collection), cluster (g, s, p, o)
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.coll_table} (
|
||||
collection text,
|
||||
g text,
|
||||
s text,
|
||||
p text,
|
||||
o text,
|
||||
PRIMARY KEY (collection, g, s, p, o)
|
||||
);
|
||||
""")
|
||||
|
||||
# Collection metadata tracking
|
||||
self.session.execute(f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.collection_metadata_table} (
|
||||
collection text,
|
||||
created_at timestamp,
|
||||
PRIMARY KEY (collection)
|
||||
);
|
||||
""");
|
||||
""")
|
||||
|
||||
logger.info("Optimized multi-table schema initialized (5 tables)")
|
||||
logger.info("Quad schema initialized (7 tables + metadata)")
|
||||
|
||||
def prepare_statements(self):
|
||||
"""Prepare statements for optimal performance"""
|
||||
# Insert statements for batch operations
|
||||
self.insert_subject_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.subject_table} (collection, s, p, o) VALUES (?, ?, ?, ?)"
|
||||
"""Prepare statements for all 7 tables"""
|
||||
|
||||
# Insert statements
|
||||
self.insert_spog_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.spog_table} (collection, s, p, o, g) VALUES (?, ?, ?, ?, ?)"
|
||||
)
|
||||
self.insert_posg_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.posg_table} (collection, p, o, s, g) VALUES (?, ?, ?, ?, ?)"
|
||||
)
|
||||
self.insert_ospg_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.ospg_table} (collection, o, s, p, g) VALUES (?, ?, ?, ?, ?)"
|
||||
)
|
||||
self.insert_gspo_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.gspo_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)"
|
||||
)
|
||||
self.insert_gpos_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.gpos_table} (collection, g, p, o, s) VALUES (?, ?, ?, ?, ?)"
|
||||
)
|
||||
self.insert_gosp_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.gosp_table} (collection, g, o, s, p) VALUES (?, ?, ?, ?, ?)"
|
||||
)
|
||||
self.insert_coll_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.coll_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)"
|
||||
)
|
||||
|
||||
self.insert_po_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.po_table} (collection, p, o, s) VALUES (?, ?, ?, ?)"
|
||||
# Delete statements (for single quad deletion)
|
||||
self.delete_spog_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? AND g = ?"
|
||||
)
|
||||
self.delete_posg_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? AND s = ? AND g = ?"
|
||||
)
|
||||
self.delete_ospg_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? AND p = ? AND g = ?"
|
||||
)
|
||||
self.delete_gspo_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?"
|
||||
)
|
||||
self.delete_gpos_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? AND s = ?"
|
||||
)
|
||||
self.delete_gosp_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? AND p = ?"
|
||||
)
|
||||
self.delete_coll_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.coll_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?"
|
||||
)
|
||||
|
||||
self.insert_object_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.object_table} (collection, o, s, p) VALUES (?, ?, ?, ?)"
|
||||
# Query statements - Family A (g-wildcard, g in clustering)
|
||||
|
||||
# SPOG table queries
|
||||
self.get_s_wildcard_stmt = self.session.prepare(
|
||||
f"SELECT p, o, g FROM {self.spog_table} WHERE collection = ? AND s = ? LIMIT ?"
|
||||
)
|
||||
self.get_sp_wildcard_stmt = self.session.prepare(
|
||||
f"SELECT o, g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?"
|
||||
)
|
||||
self.get_spo_wildcard_stmt = self.session.prepare(
|
||||
f"SELECT g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
|
||||
self.insert_collection_stmt = self.session.prepare(
|
||||
f"INSERT INTO {self.collection_table} (collection, s, p, o) VALUES (?, ?, ?, ?)"
|
||||
# POSG table queries
|
||||
self.get_p_wildcard_stmt = self.session.prepare(
|
||||
f"SELECT o, s, g FROM {self.posg_table} WHERE collection = ? AND p = ? LIMIT ?"
|
||||
)
|
||||
self.get_po_wildcard_stmt = self.session.prepare(
|
||||
f"SELECT s, g FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
|
||||
# Query statements for optimized access
|
||||
# OSPG table queries
|
||||
self.get_o_wildcard_stmt = self.session.prepare(
|
||||
f"SELECT s, p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
self.get_os_wildcard_stmt = self.session.prepare(
|
||||
f"SELECT p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?"
|
||||
)
|
||||
|
||||
# Query statements - Family B (g-specified, g in partition)
|
||||
|
||||
# GSPO table queries
|
||||
self.get_gs_stmt = self.session.prepare(
|
||||
f"SELECT p, o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? LIMIT ?"
|
||||
)
|
||||
self.get_gsp_stmt = self.session.prepare(
|
||||
f"SELECT o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? LIMIT ?"
|
||||
)
|
||||
self.get_gspo_stmt = self.session.prepare(
|
||||
f"SELECT s FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
|
||||
# GPOS table queries
|
||||
self.get_gp_stmt = self.session.prepare(
|
||||
f"SELECT o, s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? LIMIT ?"
|
||||
)
|
||||
self.get_gpo_stmt = self.session.prepare(
|
||||
f"SELECT s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
|
||||
# GOSP table queries
|
||||
self.get_go_stmt = self.session.prepare(
|
||||
f"SELECT s, p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
self.get_gos_stmt = self.session.prepare(
|
||||
f"SELECT p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? LIMIT ?"
|
||||
)
|
||||
|
||||
# Collection table query (for get_all and iteration)
|
||||
self.get_all_stmt = self.session.prepare(
|
||||
f"SELECT s, p, o FROM {self.subject_table} WHERE collection = ? LIMIT ? ALLOW FILTERING"
|
||||
f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = ? LIMIT ?"
|
||||
)
|
||||
self.get_g_stmt = self.session.prepare(
|
||||
f"SELECT s, p, o FROM {self.coll_table} WHERE collection = ? AND g = ? LIMIT ?"
|
||||
)
|
||||
|
||||
self.get_s_stmt = self.session.prepare(
|
||||
f"SELECT p, o FROM {self.subject_table} WHERE collection = ? AND s = ? LIMIT ?"
|
||||
)
|
||||
logger.info("Prepared statements initialized for quad schema (7 tables)")
|
||||
|
||||
self.get_p_stmt = self.session.prepare(
|
||||
f"SELECT s, o FROM {self.po_table} WHERE collection = ? AND p = ? LIMIT ?"
|
||||
)
|
||||
def insert(self, collection, s, p, o, g=None):
|
||||
"""Insert a quad into all 7 tables"""
|
||||
# Default graph stored as empty string
|
||||
if g is None:
|
||||
g = DEFAULT_GRAPH
|
||||
|
||||
self.get_o_stmt = self.session.prepare(
|
||||
f"SELECT s, p FROM {self.object_table} WHERE collection = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
|
||||
self.get_sp_stmt = self.session.prepare(
|
||||
f"SELECT o FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?"
|
||||
)
|
||||
|
||||
# The critical optimization: get_po without ALLOW FILTERING!
|
||||
self.get_po_stmt = self.session.prepare(
|
||||
f"SELECT s FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
|
||||
self.get_os_stmt = self.session.prepare(
|
||||
f"SELECT p FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?"
|
||||
)
|
||||
|
||||
self.get_spo_stmt = self.session.prepare(
|
||||
f"SELECT s as x FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
|
||||
)
|
||||
|
||||
# Delete statements for collection deletion
|
||||
self.delete_subject_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?"
|
||||
)
|
||||
|
||||
self.delete_po_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? AND s = ?"
|
||||
)
|
||||
|
||||
self.delete_object_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? AND p = ?"
|
||||
)
|
||||
|
||||
self.delete_collection_stmt = self.session.prepare(
|
||||
f"DELETE FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?"
|
||||
)
|
||||
|
||||
logger.info("Prepared statements initialized for optimal performance (4 tables)")
|
||||
|
||||
def insert(self, collection, s, p, o):
|
||||
# Batch write to all four tables for consistency
|
||||
batch = BatchStatement()
|
||||
|
||||
# Insert into subject table
|
||||
batch.add(self.insert_subject_stmt, (collection, s, p, o))
|
||||
# Family A tables
|
||||
batch.add(self.insert_spog_stmt, (collection, s, p, o, g))
|
||||
batch.add(self.insert_posg_stmt, (collection, p, o, s, g))
|
||||
batch.add(self.insert_ospg_stmt, (collection, o, s, p, g))
|
||||
|
||||
# Insert into predicate-object table (column order: collection, p, o, s)
|
||||
batch.add(self.insert_po_stmt, (collection, p, o, s))
|
||||
# Family B tables
|
||||
batch.add(self.insert_gspo_stmt, (collection, g, s, p, o))
|
||||
batch.add(self.insert_gpos_stmt, (collection, g, p, o, s))
|
||||
batch.add(self.insert_gosp_stmt, (collection, g, o, s, p))
|
||||
|
||||
# Insert into object table (column order: collection, o, s, p)
|
||||
batch.add(self.insert_object_stmt, (collection, o, s, p))
|
||||
|
||||
# Insert into collection table for SPO queries and deletion tracking
|
||||
batch.add(self.insert_collection_stmt, (collection, s, p, o))
|
||||
# Collection table
|
||||
batch.add(self.insert_coll_stmt, (collection, g, s, p, o))
|
||||
|
||||
self.session.execute(batch)
|
||||
|
||||
def delete_quad(self, collection, s, p, o, g=None):
|
||||
"""Delete a single quad from all 7 tables"""
|
||||
if g is None:
|
||||
g = DEFAULT_GRAPH
|
||||
|
||||
batch = BatchStatement()
|
||||
|
||||
batch.add(self.delete_spog_stmt, (collection, s, p, o, g))
|
||||
batch.add(self.delete_posg_stmt, (collection, p, o, s, g))
|
||||
batch.add(self.delete_ospg_stmt, (collection, o, s, p, g))
|
||||
batch.add(self.delete_gspo_stmt, (collection, g, s, p, o))
|
||||
batch.add(self.delete_gpos_stmt, (collection, g, p, o, s))
|
||||
batch.add(self.delete_gosp_stmt, (collection, g, o, s, p))
|
||||
batch.add(self.delete_coll_stmt, (collection, g, s, p, o))
|
||||
|
||||
self.session.execute(batch)
|
||||
|
||||
# ========================================================================
|
||||
# Query methods
|
||||
# g=None means default graph, g="*" means all graphs
|
||||
# ========================================================================
|
||||
|
||||
def get_all(self, collection, limit=50):
|
||||
# Use subject table for get_all queries
|
||||
return self.session.execute(
|
||||
self.get_all_stmt,
|
||||
(collection, limit)
|
||||
)
|
||||
"""Get all quads in collection"""
|
||||
return self.session.execute(self.get_all_stmt, (collection, limit))
|
||||
|
||||
def get_s(self, collection, s, limit=10):
|
||||
# Optimized: Direct partition access with (collection, s)
|
||||
return self.session.execute(
|
||||
self.get_s_stmt,
|
||||
(collection, s, limit)
|
||||
)
|
||||
def get_s(self, collection, s, g=None, limit=10):
|
||||
"""Query by subject. g=None: default graph, g='*': all graphs"""
|
||||
if g is None or g == DEFAULT_GRAPH:
|
||||
# Default graph - use GSPO table
|
||||
return self.session.execute(self.get_gs_stmt, (collection, DEFAULT_GRAPH, s, limit))
|
||||
elif g == GRAPH_WILDCARD:
|
||||
# All graphs - use SPOG table
|
||||
return self.session.execute(self.get_s_wildcard_stmt, (collection, s, limit))
|
||||
else:
|
||||
# Specific graph - use GSPO table
|
||||
return self.session.execute(self.get_gs_stmt, (collection, g, s, limit))
|
||||
|
||||
def get_p(self, collection, p, limit=10):
|
||||
# Optimized: Use po_table for direct partition access
|
||||
return self.session.execute(
|
||||
self.get_p_stmt,
|
||||
(collection, p, limit)
|
||||
)
|
||||
def get_p(self, collection, p, g=None, limit=10):
|
||||
"""Query by predicate"""
|
||||
if g is None or g == DEFAULT_GRAPH:
|
||||
return self.session.execute(self.get_gp_stmt, (collection, DEFAULT_GRAPH, p, limit))
|
||||
elif g == GRAPH_WILDCARD:
|
||||
return self.session.execute(self.get_p_wildcard_stmt, (collection, p, limit))
|
||||
else:
|
||||
return self.session.execute(self.get_gp_stmt, (collection, g, p, limit))
|
||||
|
||||
def get_o(self, collection, o, limit=10):
|
||||
# Optimized: Use object_table for direct partition access
|
||||
return self.session.execute(
|
||||
self.get_o_stmt,
|
||||
(collection, o, limit)
|
||||
)
|
||||
def get_o(self, collection, o, g=None, limit=10):
|
||||
"""Query by object"""
|
||||
if g is None or g == DEFAULT_GRAPH:
|
||||
return self.session.execute(self.get_go_stmt, (collection, DEFAULT_GRAPH, o, limit))
|
||||
elif g == GRAPH_WILDCARD:
|
||||
return self.session.execute(self.get_o_wildcard_stmt, (collection, o, limit))
|
||||
else:
|
||||
return self.session.execute(self.get_go_stmt, (collection, g, o, limit))
|
||||
|
||||
def get_sp(self, collection, s, p, limit=10):
|
||||
# Optimized: Use subject_table with clustering key access
|
||||
return self.session.execute(
|
||||
self.get_sp_stmt,
|
||||
(collection, s, p, limit)
|
||||
)
|
||||
def get_sp(self, collection, s, p, g=None, limit=10):
|
||||
"""Query by subject and predicate"""
|
||||
if g is None or g == DEFAULT_GRAPH:
|
||||
return self.session.execute(self.get_gsp_stmt, (collection, DEFAULT_GRAPH, s, p, limit))
|
||||
elif g == GRAPH_WILDCARD:
|
||||
return self.session.execute(self.get_sp_wildcard_stmt, (collection, s, p, limit))
|
||||
else:
|
||||
return self.session.execute(self.get_gsp_stmt, (collection, g, s, p, limit))
|
||||
|
||||
def get_po(self, collection, p, o, limit=10):
|
||||
# CRITICAL OPTIMIZATION: Use po_table - NO MORE ALLOW FILTERING!
|
||||
return self.session.execute(
|
||||
self.get_po_stmt,
|
||||
(collection, p, o, limit)
|
||||
)
|
||||
def get_po(self, collection, p, o, g=None, limit=10):
|
||||
"""Query by predicate and object"""
|
||||
if g is None or g == DEFAULT_GRAPH:
|
||||
return self.session.execute(self.get_gpo_stmt, (collection, DEFAULT_GRAPH, p, o, limit))
|
||||
elif g == GRAPH_WILDCARD:
|
||||
return self.session.execute(self.get_po_wildcard_stmt, (collection, p, o, limit))
|
||||
else:
|
||||
return self.session.execute(self.get_gpo_stmt, (collection, g, p, o, limit))
|
||||
|
||||
def get_os(self, collection, o, s, limit=10):
|
||||
# Optimized: Use subject_table with clustering access (no more ALLOW FILTERING)
|
||||
return self.session.execute(
|
||||
self.get_os_stmt,
|
||||
(collection, s, o, limit)
|
||||
)
|
||||
def get_os(self, collection, o, s, g=None, limit=10):
|
||||
"""Query by object and subject"""
|
||||
if g is None or g == DEFAULT_GRAPH:
|
||||
return self.session.execute(self.get_gos_stmt, (collection, DEFAULT_GRAPH, o, s, limit))
|
||||
elif g == GRAPH_WILDCARD:
|
||||
return self.session.execute(self.get_os_wildcard_stmt, (collection, o, s, limit))
|
||||
else:
|
||||
return self.session.execute(self.get_gos_stmt, (collection, g, o, s, limit))
|
||||
|
||||
def get_spo(self, collection, s, p, o, limit=10):
|
||||
# Optimized: Use collection_table for exact key lookup
|
||||
return self.session.execute(
|
||||
self.get_spo_stmt,
|
||||
(collection, s, p, o, limit)
|
||||
)
|
||||
def get_spo(self, collection, s, p, o, g=None, limit=10):
|
||||
"""Query by subject, predicate, object (find which graphs)"""
|
||||
if g is None or g == DEFAULT_GRAPH:
|
||||
return self.session.execute(self.get_gspo_stmt, (collection, DEFAULT_GRAPH, s, p, o, limit))
|
||||
elif g == GRAPH_WILDCARD:
|
||||
return self.session.execute(self.get_spo_wildcard_stmt, (collection, s, p, o, limit))
|
||||
else:
|
||||
return self.session.execute(self.get_gspo_stmt, (collection, g, s, p, o, limit))
|
||||
|
||||
def get_g(self, collection, g, limit=50):
|
||||
"""Get all quads in a specific graph"""
|
||||
if g is None:
|
||||
g = DEFAULT_GRAPH
|
||||
return self.session.execute(self.get_g_stmt, (collection, g, limit))
|
||||
|
||||
# ========================================================================
|
||||
# Collection management
|
||||
# ========================================================================
|
||||
|
||||
def collection_exists(self, collection):
|
||||
"""Check if collection exists by querying collection_metadata table"""
|
||||
"""Check if collection exists"""
|
||||
try:
|
||||
result = self.session.execute(
|
||||
f"SELECT collection FROM {self.collection_metadata_table} WHERE collection = %s LIMIT 1",
|
||||
|
|
@ -301,63 +467,52 @@ class KnowledgeGraph:
|
|||
raise e
|
||||
|
||||
def delete_collection(self, collection):
|
||||
"""Delete all triples for a specific collection
|
||||
|
||||
Uses collection_table to enumerate all triples, then deletes from all 4 tables
|
||||
using full partition keys for optimal performance with compound keys.
|
||||
"""
|
||||
# Step 1: Read all triples from collection_table (single partition read)
|
||||
"""Delete all quads for a collection from all 7 tables"""
|
||||
# Read all quads from collection table
|
||||
rows = self.session.execute(
|
||||
f"SELECT s, p, o FROM {self.collection_table} WHERE collection = %s",
|
||||
f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = %s",
|
||||
(collection,)
|
||||
)
|
||||
|
||||
# Step 2: Delete each triple from all 4 tables using full partition keys
|
||||
# Batch deletions for efficiency
|
||||
batch = BatchStatement()
|
||||
count = 0
|
||||
|
||||
for row in rows:
|
||||
s, p, o = row.s, row.p, row.o
|
||||
g, s, p, o = row.g, row.s, row.p, row.o
|
||||
|
||||
# Delete from subject table (partition key: collection, s)
|
||||
batch.add(self.delete_subject_stmt, (collection, s, p, o))
|
||||
|
||||
# Delete from predicate-object table (partition key: collection, p)
|
||||
batch.add(self.delete_po_stmt, (collection, p, o, s))
|
||||
|
||||
# Delete from object table (partition key: collection, o)
|
||||
batch.add(self.delete_object_stmt, (collection, o, s, p))
|
||||
|
||||
# Delete from collection table (partition key: collection only)
|
||||
batch.add(self.delete_collection_stmt, (collection, s, p, o))
|
||||
# Delete from all 7 tables
|
||||
batch.add(self.delete_spog_stmt, (collection, s, p, o, g))
|
||||
batch.add(self.delete_posg_stmt, (collection, p, o, s, g))
|
||||
batch.add(self.delete_ospg_stmt, (collection, o, s, p, g))
|
||||
batch.add(self.delete_gspo_stmt, (collection, g, s, p, o))
|
||||
batch.add(self.delete_gpos_stmt, (collection, g, p, o, s))
|
||||
batch.add(self.delete_gosp_stmt, (collection, g, o, s, p))
|
||||
batch.add(self.delete_coll_stmt, (collection, g, s, p, o))
|
||||
|
||||
count += 1
|
||||
|
||||
# Execute batch every 25 triples to avoid oversized batches
|
||||
# (Each triple adds ~4 statements, so 25 triples = ~100 statements)
|
||||
if count % 25 == 0:
|
||||
# Execute batch every 15 quads (7 deletes each = 105 statements)
|
||||
if count % 15 == 0:
|
||||
self.session.execute(batch)
|
||||
batch = BatchStatement()
|
||||
|
||||
# Execute remaining deletions
|
||||
if count % 25 != 0:
|
||||
# Execute remaining
|
||||
if count % 15 != 0:
|
||||
self.session.execute(batch)
|
||||
|
||||
# Step 3: Delete collection metadata
|
||||
# Delete collection metadata
|
||||
self.session.execute(
|
||||
f"DELETE FROM {self.collection_metadata_table} WHERE collection = %s",
|
||||
(collection,)
|
||||
)
|
||||
|
||||
logger.info(f"Deleted {count} triples from collection {collection}")
|
||||
logger.info(f"Deleted {count} quads from collection {collection}")
|
||||
|
||||
def close(self):
|
||||
"""Close the Cassandra session and cluster connections properly"""
|
||||
"""Close connections"""
|
||||
if hasattr(self, 'session') and self.session:
|
||||
self.session.shutdown()
|
||||
if hasattr(self, 'cluster') and self.cluster:
|
||||
self.cluster.shutdown()
|
||||
# Remove from global tracking
|
||||
if self.cluster in _active_clusters:
|
||||
_active_clusters.remove(self.cluster)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import json
|
|||
import urllib.parse
|
||||
import logging
|
||||
|
||||
from ....schema import Chunk, Triple, Triples, Metadata, Value
|
||||
from ....schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||
from ....schema import EntityContext, EntityContexts
|
||||
|
||||
from ....rdf import TRUSTGRAPH_ENTITIES, RDF_LABEL, SUBJECT_OF, DEFINITION
|
||||
|
|
@ -253,32 +253,32 @@ class Processor(FlowProcessor):
|
|||
for defn in definitions:
|
||||
|
||||
entity_uri = self.to_uri(defn["entity"])
|
||||
|
||||
|
||||
# Add entity label
|
||||
triples.append(Triple(
|
||||
s = Value(value=entity_uri, is_uri=True),
|
||||
p = Value(value=RDF_LABEL, is_uri=True),
|
||||
o = Value(value=defn["entity"], is_uri=False),
|
||||
s = Term(type=IRI, iri=entity_uri),
|
||||
p = Term(type=IRI, iri=RDF_LABEL),
|
||||
o = Term(type=LITERAL, value=defn["entity"]),
|
||||
))
|
||||
|
||||
|
||||
# Add definition
|
||||
triples.append(Triple(
|
||||
s = Value(value=entity_uri, is_uri=True),
|
||||
p = Value(value=DEFINITION, is_uri=True),
|
||||
o = Value(value=defn["definition"], is_uri=False),
|
||||
s = Term(type=IRI, iri=entity_uri),
|
||||
p = Term(type=IRI, iri=DEFINITION),
|
||||
o = Term(type=LITERAL, value=defn["definition"]),
|
||||
))
|
||||
|
||||
|
||||
# Add subject-of relationship to document
|
||||
if metadata.id:
|
||||
triples.append(Triple(
|
||||
s = Value(value=entity_uri, is_uri=True),
|
||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
||||
o = Value(value=metadata.id, is_uri=True),
|
||||
s = Term(type=IRI, iri=entity_uri),
|
||||
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||
o = Term(type=IRI, iri=metadata.id),
|
||||
))
|
||||
|
||||
|
||||
# Create entity context for embeddings
|
||||
entity_contexts.append(EntityContext(
|
||||
entity=Value(value=entity_uri, is_uri=True),
|
||||
entity=Term(type=IRI, iri=entity_uri),
|
||||
context=defn["definition"]
|
||||
))
|
||||
|
||||
|
|
@ -288,61 +288,61 @@ class Processor(FlowProcessor):
|
|||
subject_uri = self.to_uri(rel["subject"])
|
||||
predicate_uri = self.to_uri(rel["predicate"])
|
||||
|
||||
subject_value = Value(value=subject_uri, is_uri=True)
|
||||
predicate_value = Value(value=predicate_uri, is_uri=True)
|
||||
subject_value = Term(type=IRI, iri=subject_uri)
|
||||
predicate_value = Term(type=IRI, iri=predicate_uri)
|
||||
if rel.get("object-entity", True):
|
||||
object_uri = self.to_uri(rel["object"])
|
||||
object_value = Value(value=object_uri, is_uri=True)
|
||||
object_value = Term(type=IRI, iri=object_uri)
|
||||
else:
|
||||
object_value = Value(value=rel["object"], is_uri=False)
|
||||
|
||||
object_value = Term(type=LITERAL, value=rel["object"])
|
||||
|
||||
# Add subject and predicate labels
|
||||
triples.append(Triple(
|
||||
s = subject_value,
|
||||
p = Value(value=RDF_LABEL, is_uri=True),
|
||||
o = Value(value=rel["subject"], is_uri=False),
|
||||
p = Term(type=IRI, iri=RDF_LABEL),
|
||||
o = Term(type=LITERAL, value=rel["subject"]),
|
||||
))
|
||||
|
||||
|
||||
triples.append(Triple(
|
||||
s = predicate_value,
|
||||
p = Value(value=RDF_LABEL, is_uri=True),
|
||||
o = Value(value=rel["predicate"], is_uri=False),
|
||||
p = Term(type=IRI, iri=RDF_LABEL),
|
||||
o = Term(type=LITERAL, value=rel["predicate"]),
|
||||
))
|
||||
|
||||
|
||||
# Handle object (entity vs literal)
|
||||
if rel.get("object-entity", True):
|
||||
triples.append(Triple(
|
||||
s = object_value,
|
||||
p = Value(value=RDF_LABEL, is_uri=True),
|
||||
o = Value(value=rel["object"], is_uri=True),
|
||||
p = Term(type=IRI, iri=RDF_LABEL),
|
||||
o = Term(type=LITERAL, value=rel["object"]),
|
||||
))
|
||||
|
||||
|
||||
# Add the main relationship triple
|
||||
triples.append(Triple(
|
||||
s = subject_value,
|
||||
p = predicate_value,
|
||||
o = object_value
|
||||
))
|
||||
|
||||
|
||||
# Add subject-of relationships to document
|
||||
if metadata.id:
|
||||
triples.append(Triple(
|
||||
s = subject_value,
|
||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
||||
o = Value(value=metadata.id, is_uri=True),
|
||||
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||
o = Term(type=IRI, iri=metadata.id),
|
||||
))
|
||||
|
||||
|
||||
triples.append(Triple(
|
||||
s = predicate_value,
|
||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
||||
o = Value(value=metadata.id, is_uri=True),
|
||||
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||
o = Term(type=IRI, iri=metadata.id),
|
||||
))
|
||||
|
||||
|
||||
if rel.get("object-entity", True):
|
||||
triples.append(Triple(
|
||||
s = object_value,
|
||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
||||
o = Value(value=metadata.id, is_uri=True),
|
||||
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||
o = Term(type=IRI, iri=metadata.id),
|
||||
))
|
||||
|
||||
return triples, entity_contexts
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import json
|
|||
import urllib.parse
|
||||
import logging
|
||||
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -20,9 +20,9 @@ from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
|||
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from .... base import PromptClientSpec
|
||||
|
||||
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
|
||||
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
|
||||
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
|
||||
DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION)
|
||||
RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL)
|
||||
SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF)
|
||||
|
||||
default_ident = "kg-extract-definitions"
|
||||
default_concurrency = 1
|
||||
|
|
@ -142,13 +142,13 @@ class Processor(FlowProcessor):
|
|||
|
||||
s_uri = self.to_uri(s)
|
||||
|
||||
s_value = Value(value=str(s_uri), is_uri=True)
|
||||
o_value = Value(value=str(o), is_uri=False)
|
||||
s_value = Term(type=IRI, iri=str(s_uri))
|
||||
o_value = Term(type=LITERAL, value=str(o))
|
||||
|
||||
triples.append(Triple(
|
||||
s=s_value,
|
||||
p=RDF_LABEL_VALUE,
|
||||
o=Value(value=s, is_uri=False),
|
||||
o=Term(type=LITERAL, value=s),
|
||||
))
|
||||
|
||||
triples.append(Triple(
|
||||
|
|
@ -158,7 +158,7 @@ class Processor(FlowProcessor):
|
|||
triples.append(Triple(
|
||||
s=s_value,
|
||||
p=SUBJECT_OF_VALUE,
|
||||
o=Value(value=v.metadata.id, is_uri=True)
|
||||
o=Term(type=IRI, iri=v.metadata.id)
|
||||
))
|
||||
|
||||
ec = EntityContext(
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import logging
|
|||
import asyncio
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||
from .... schema import EntityContext, EntityContexts
|
||||
from .... schema import PromptRequest, PromptResponse
|
||||
from .... rdf import TRUSTGRAPH_ENTITIES, RDF_TYPE, RDF_LABEL, DEFINITION
|
||||
|
|
@ -39,6 +39,14 @@ URI_PREFIXES = {
|
|||
}
|
||||
|
||||
|
||||
def make_term(v, is_uri):
|
||||
"""Helper to create Term from value and is_uri flag."""
|
||||
if is_uri:
|
||||
return Term(type=IRI, iri=v)
|
||||
else:
|
||||
return Term(type=LITERAL, value=v)
|
||||
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
"""Main OntoRAG extraction processor."""
|
||||
|
||||
|
|
@ -446,9 +454,9 @@ class Processor(FlowProcessor):
|
|||
is_object_uri = False
|
||||
|
||||
# Create Triple object with expanded URIs
|
||||
s_value = Value(value=subject_uri, is_uri=True)
|
||||
p_value = Value(value=predicate_uri, is_uri=True)
|
||||
o_value = Value(value=object_uri, is_uri=is_object_uri)
|
||||
s_value = make_term(subject_uri, is_uri=True)
|
||||
p_value = make_term(predicate_uri, is_uri=True)
|
||||
o_value = make_term(object_uri, is_uri=is_object_uri)
|
||||
|
||||
validated_triples.append(Triple(
|
||||
s=s_value,
|
||||
|
|
@ -609,9 +617,9 @@ class Processor(FlowProcessor):
|
|||
|
||||
# rdf:type owl:Class
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=class_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||
o=Value(value="http://www.w3.org/2002/07/owl#Class", is_uri=True)
|
||||
s=make_term(class_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||
o=make_term("http://www.w3.org/2002/07/owl#Class", is_uri=True)
|
||||
))
|
||||
|
||||
# rdfs:label (stored as 'labels' in OntologyClass.__dict__)
|
||||
|
|
@ -620,18 +628,18 @@ class Processor(FlowProcessor):
|
|||
if isinstance(labels, list) and labels:
|
||||
label_val = labels[0].get('value', class_id) if isinstance(labels[0], dict) else str(labels[0])
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=class_uri, is_uri=True),
|
||||
p=Value(value=RDF_LABEL, is_uri=True),
|
||||
o=Value(value=label_val, is_uri=False)
|
||||
s=make_term(class_uri, is_uri=True),
|
||||
p=make_term(RDF_LABEL, is_uri=True),
|
||||
o=make_term(label_val, is_uri=False)
|
||||
))
|
||||
|
||||
# rdfs:comment (stored as 'comment' in OntologyClass.__dict__)
|
||||
if isinstance(class_def, dict) and 'comment' in class_def and class_def['comment']:
|
||||
comment = class_def['comment']
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=class_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||
o=Value(value=comment, is_uri=False)
|
||||
s=make_term(class_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||
o=make_term(comment, is_uri=False)
|
||||
))
|
||||
|
||||
# rdfs:subClassOf (stored as 'subclass_of' in OntologyClass.__dict__)
|
||||
|
|
@ -648,9 +656,9 @@ class Processor(FlowProcessor):
|
|||
parent_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{parent}"
|
||||
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=class_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True),
|
||||
o=Value(value=parent_uri, is_uri=True)
|
||||
s=make_term(class_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True),
|
||||
o=make_term(parent_uri, is_uri=True)
|
||||
))
|
||||
|
||||
# Generate triples for object properties
|
||||
|
|
@ -663,9 +671,9 @@ class Processor(FlowProcessor):
|
|||
|
||||
# rdf:type owl:ObjectProperty
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||
o=Value(value="http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||
o=make_term("http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True)
|
||||
))
|
||||
|
||||
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
|
||||
|
|
@ -674,18 +682,18 @@ class Processor(FlowProcessor):
|
|||
if isinstance(labels, list) and labels:
|
||||
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value=RDF_LABEL, is_uri=True),
|
||||
o=Value(value=label_val, is_uri=False)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term(RDF_LABEL, is_uri=True),
|
||||
o=make_term(label_val, is_uri=False)
|
||||
))
|
||||
|
||||
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
|
||||
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
|
||||
comment = prop_def['comment']
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||
o=Value(value=comment, is_uri=False)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||
o=make_term(comment, is_uri=False)
|
||||
))
|
||||
|
||||
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
|
||||
|
|
@ -702,9 +710,9 @@ class Processor(FlowProcessor):
|
|||
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
|
||||
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
||||
o=Value(value=domain_uri, is_uri=True)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
||||
o=make_term(domain_uri, is_uri=True)
|
||||
))
|
||||
|
||||
# rdfs:range (stored as 'range' in OntologyProperty.__dict__)
|
||||
|
|
@ -721,9 +729,9 @@ class Processor(FlowProcessor):
|
|||
range_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{range_val}"
|
||||
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
||||
o=Value(value=range_uri, is_uri=True)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
||||
o=make_term(range_uri, is_uri=True)
|
||||
))
|
||||
|
||||
# Generate triples for datatype properties
|
||||
|
|
@ -736,9 +744,9 @@ class Processor(FlowProcessor):
|
|||
|
||||
# rdf:type owl:DatatypeProperty
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||
o=Value(value="http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||
o=make_term("http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True)
|
||||
))
|
||||
|
||||
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
|
||||
|
|
@ -747,18 +755,18 @@ class Processor(FlowProcessor):
|
|||
if isinstance(labels, list) and labels:
|
||||
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value=RDF_LABEL, is_uri=True),
|
||||
o=Value(value=label_val, is_uri=False)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term(RDF_LABEL, is_uri=True),
|
||||
o=make_term(label_val, is_uri=False)
|
||||
))
|
||||
|
||||
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
|
||||
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
|
||||
comment = prop_def['comment']
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||
o=Value(value=comment, is_uri=False)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||
o=make_term(comment, is_uri=False)
|
||||
))
|
||||
|
||||
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
|
||||
|
|
@ -775,9 +783,9 @@ class Processor(FlowProcessor):
|
|||
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
|
||||
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
||||
o=Value(value=domain_uri, is_uri=True)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
||||
o=make_term(domain_uri, is_uri=True)
|
||||
))
|
||||
|
||||
# rdfs:range (datatype)
|
||||
|
|
@ -790,9 +798,9 @@ class Processor(FlowProcessor):
|
|||
range_uri = range_val
|
||||
|
||||
ontology_triples.append(Triple(
|
||||
s=Value(value=prop_uri, is_uri=True),
|
||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
||||
o=Value(value=range_uri, is_uri=True)
|
||||
s=make_term(prop_uri, is_uri=True),
|
||||
p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
||||
o=make_term(range_uri, is_uri=True)
|
||||
))
|
||||
|
||||
logger.info(f"Generated {len(ontology_triples)} triples describing ontology elements")
|
||||
|
|
@ -814,9 +822,9 @@ class Processor(FlowProcessor):
|
|||
entity_data = {} # subject_uri -> {labels: [], definitions: []}
|
||||
|
||||
for triple in triples:
|
||||
subject_uri = triple.s.value
|
||||
predicate_uri = triple.p.value
|
||||
object_val = triple.o.value
|
||||
subject_uri = triple.s.iri if triple.s.type == IRI else triple.s.value
|
||||
predicate_uri = triple.p.iri if triple.p.type == IRI else triple.p.value
|
||||
object_val = triple.o.value if triple.o.type == LITERAL else triple.o.iri
|
||||
|
||||
# Initialize entity data if not exists
|
||||
if subject_uri not in entity_data:
|
||||
|
|
@ -824,12 +832,12 @@ class Processor(FlowProcessor):
|
|||
|
||||
# Collect labels (rdfs:label)
|
||||
if predicate_uri == RDF_LABEL:
|
||||
if not triple.o.is_uri: # Labels are literals
|
||||
if triple.o.type == LITERAL: # Labels are literals
|
||||
entity_data[subject_uri]['labels'].append(object_val)
|
||||
|
||||
# Collect definitions (skos:definition, schema:description)
|
||||
elif predicate_uri == DEFINITION or predicate_uri == "https://schema.org/description":
|
||||
if not triple.o.is_uri:
|
||||
if triple.o.type == LITERAL:
|
||||
entity_data[subject_uri]['definitions'].append(object_val)
|
||||
|
||||
# Build EntityContext objects
|
||||
|
|
@ -848,7 +856,7 @@ class Processor(FlowProcessor):
|
|||
if context_parts:
|
||||
context_text = ". ".join(context_parts)
|
||||
entity_contexts.append(EntityContext(
|
||||
entity=Value(value=subject_uri, is_uri=True),
|
||||
entity=make_term(subject_uri, is_uri=True),
|
||||
context=context_text
|
||||
))
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ with full URIs and correct is_uri flags.
|
|||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
from .... schema import Triple, Value
|
||||
from .... schema import Triple, Term, IRI, LITERAL
|
||||
from .... rdf import RDF_TYPE, RDF_LABEL
|
||||
|
||||
from .simplified_parser import Entity, Relationship, Attribute, ExtractionResult
|
||||
|
|
@ -87,17 +87,17 @@ class TripleConverter:
|
|||
|
||||
# Generate type triple: entity rdf:type ClassURI
|
||||
type_triple = Triple(
|
||||
s=Value(value=entity_uri, is_uri=True),
|
||||
p=Value(value=RDF_TYPE, is_uri=True),
|
||||
o=Value(value=class_uri, is_uri=True)
|
||||
s=Term(type=IRI, iri=entity_uri),
|
||||
p=Term(type=IRI, iri=RDF_TYPE),
|
||||
o=Term(type=IRI, iri=class_uri)
|
||||
)
|
||||
triples.append(type_triple)
|
||||
|
||||
# Generate label triple: entity rdfs:label "entity name"
|
||||
label_triple = Triple(
|
||||
s=Value(value=entity_uri, is_uri=True),
|
||||
p=Value(value=RDF_LABEL, is_uri=True),
|
||||
o=Value(value=entity.entity, is_uri=False) # Literal!
|
||||
s=Term(type=IRI, iri=entity_uri),
|
||||
p=Term(type=IRI, iri=RDF_LABEL),
|
||||
o=Term(type=LITERAL, value=entity.entity) # Literal!
|
||||
)
|
||||
triples.append(label_triple)
|
||||
|
||||
|
|
@ -131,9 +131,9 @@ class TripleConverter:
|
|||
|
||||
# Generate triple: subject property object
|
||||
return Triple(
|
||||
s=Value(value=subject_uri, is_uri=True),
|
||||
p=Value(value=property_uri, is_uri=True),
|
||||
o=Value(value=object_uri, is_uri=True)
|
||||
s=Term(type=IRI, iri=subject_uri),
|
||||
p=Term(type=IRI, iri=property_uri),
|
||||
o=Term(type=IRI, iri=object_uri)
|
||||
)
|
||||
|
||||
def convert_attribute(self, attribute: Attribute) -> Optional[Triple]:
|
||||
|
|
@ -159,9 +159,9 @@ class TripleConverter:
|
|||
|
||||
# Generate triple: entity property "literal value"
|
||||
return Triple(
|
||||
s=Value(value=entity_uri, is_uri=True),
|
||||
p=Value(value=property_uri, is_uri=True),
|
||||
o=Value(value=attribute.value, is_uri=False) # Literal!
|
||||
s=Term(type=IRI, iri=entity_uri),
|
||||
p=Term(type=IRI, iri=property_uri),
|
||||
o=Term(type=LITERAL, value=attribute.value) # Literal!
|
||||
)
|
||||
|
||||
def _get_class_uri(self, class_id: str) -> Optional[str]:
|
||||
|
|
|
|||
|
|
@ -13,15 +13,15 @@ import urllib.parse
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... schema import Chunk, Triple, Triples
|
||||
from .... schema import Metadata, Value
|
||||
from .... schema import Metadata, Term, IRI, LITERAL
|
||||
from .... schema import PromptRequest, PromptResponse
|
||||
from .... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES, SUBJECT_OF
|
||||
|
||||
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from .... base import PromptClientSpec
|
||||
|
||||
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
|
||||
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
|
||||
RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL)
|
||||
SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF)
|
||||
|
||||
default_ident = "kg-extract-relationships"
|
||||
default_concurrency = 1
|
||||
|
|
@ -127,16 +127,16 @@ class Processor(FlowProcessor):
|
|||
if o is None: continue
|
||||
|
||||
s_uri = self.to_uri(s)
|
||||
s_value = Value(value=str(s_uri), is_uri=True)
|
||||
s_value = Term(type=IRI, iri=str(s_uri))
|
||||
|
||||
p_uri = self.to_uri(p)
|
||||
p_value = Value(value=str(p_uri), is_uri=True)
|
||||
p_value = Term(type=IRI, iri=str(p_uri))
|
||||
|
||||
if rel["object-entity"]:
|
||||
if rel["object-entity"]:
|
||||
o_uri = self.to_uri(o)
|
||||
o_value = Value(value=str(o_uri), is_uri=True)
|
||||
o_value = Term(type=IRI, iri=str(o_uri))
|
||||
else:
|
||||
o_value = Value(value=str(o), is_uri=False)
|
||||
o_value = Term(type=LITERAL, value=str(o))
|
||||
|
||||
triples.append(Triple(
|
||||
s=s_value,
|
||||
|
|
@ -148,14 +148,14 @@ class Processor(FlowProcessor):
|
|||
triples.append(Triple(
|
||||
s=s_value,
|
||||
p=RDF_LABEL_VALUE,
|
||||
o=Value(value=str(s), is_uri=False)
|
||||
o=Term(type=LITERAL, value=str(s))
|
||||
))
|
||||
|
||||
# Label for p
|
||||
triples.append(Triple(
|
||||
s=p_value,
|
||||
p=RDF_LABEL_VALUE,
|
||||
o=Value(value=str(p), is_uri=False)
|
||||
o=Term(type=LITERAL, value=str(p))
|
||||
))
|
||||
|
||||
if rel["object-entity"]:
|
||||
|
|
@ -163,14 +163,14 @@ class Processor(FlowProcessor):
|
|||
triples.append(Triple(
|
||||
s=o_value,
|
||||
p=RDF_LABEL_VALUE,
|
||||
o=Value(value=str(o), is_uri=False)
|
||||
o=Term(type=LITERAL, value=str(o))
|
||||
))
|
||||
|
||||
# 'Subject of' for s
|
||||
triples.append(Triple(
|
||||
s=s_value,
|
||||
p=SUBJECT_OF_VALUE,
|
||||
o=Value(value=v.metadata.id, is_uri=True)
|
||||
o=Term(type=IRI, iri=v.metadata.id)
|
||||
))
|
||||
|
||||
if rel["object-entity"]:
|
||||
|
|
@ -178,7 +178,7 @@ class Processor(FlowProcessor):
|
|||
triples.append(Triple(
|
||||
s=o_value,
|
||||
p=SUBJECT_OF_VALUE,
|
||||
o=Value(value=v.metadata.id, is_uri=True)
|
||||
o=Term(type=IRI, iri=v.metadata.id)
|
||||
))
|
||||
|
||||
await self.emit_triples(
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import logging
|
|||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||
from .... schema import chunk_ingest_queue, triples_store_queue
|
||||
from .... schema import prompt_request_queue
|
||||
from .... schema import prompt_response_queue
|
||||
|
|
@ -20,7 +20,7 @@ from .... clients.prompt_client import PromptClient
|
|||
from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION
|
||||
from .... base import ConsumerProducer
|
||||
|
||||
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
|
||||
DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION)
|
||||
|
||||
module = "kg-extract-topics"
|
||||
|
||||
|
|
@ -106,8 +106,8 @@ class Processor(ConsumerProducer):
|
|||
|
||||
s_uri = self.to_uri(s)
|
||||
|
||||
s_value = Value(value=str(s_uri), is_uri=True)
|
||||
o_value = Value(value=str(o), is_uri=False)
|
||||
s_value = Term(type=IRI, iri=str(s_uri))
|
||||
o_value = Term(type=LITERAL, value=str(o))
|
||||
|
||||
await self.emit_edge(
|
||||
v.metadata, s_value, DEFINITION_VALUE, o_value
|
||||
|
|
|
|||
|
|
@ -1,46 +1,37 @@
|
|||
|
||||
import base64
|
||||
|
||||
from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata
|
||||
from ... schema import Term, Triple, DocumentMetadata, ProcessingMetadata
|
||||
from ... messaging.translators.primitives import TermTranslator, TripleTranslator
|
||||
|
||||
# Singleton translator instances
|
||||
_term_translator = TermTranslator()
|
||||
_triple_translator = TripleTranslator()
|
||||
|
||||
# DEPRECATED: These functions have been moved to trustgraph.... messaging.translators
|
||||
# Use the new messaging translation system instead for consistency and reusability.
|
||||
# Examples:
|
||||
# from trustgraph.... messaging.translators.primitives import ValueTranslator
|
||||
# value_translator = ValueTranslator()
|
||||
# pulsar_value = value_translator.to_pulsar({"v": "example", "e": True})
|
||||
|
||||
def to_value(x):
|
||||
return Value(value=x["v"], is_uri=x["e"])
|
||||
"""Convert dict to Term. Delegates to TermTranslator."""
|
||||
return _term_translator.to_pulsar(x)
|
||||
|
||||
|
||||
def to_subgraph(x):
|
||||
return [
|
||||
Triple(
|
||||
s=to_value(t["s"]),
|
||||
p=to_value(t["p"]),
|
||||
o=to_value(t["o"])
|
||||
)
|
||||
for t in x
|
||||
]
|
||||
"""Convert list of dicts to list of Triples. Delegates to TripleTranslator."""
|
||||
return [_triple_translator.to_pulsar(t) for t in x]
|
||||
|
||||
|
||||
def serialize_value(v):
|
||||
return {
|
||||
"v": v.value,
|
||||
"e": v.is_uri,
|
||||
}
|
||||
"""Convert Term to dict. Delegates to TermTranslator."""
|
||||
return _term_translator.from_pulsar(v)
|
||||
|
||||
|
||||
def serialize_triple(t):
|
||||
return {
|
||||
"s": serialize_value(t.s),
|
||||
"p": serialize_value(t.p),
|
||||
"o": serialize_value(t.o)
|
||||
}
|
||||
"""Convert Triple to dict. Delegates to TripleTranslator."""
|
||||
return _triple_translator.from_pulsar(t)
|
||||
|
||||
|
||||
def serialize_subgraph(sg):
|
||||
return [
|
||||
serialize_triple(t)
|
||||
for t in sg
|
||||
]
|
||||
"""Convert list of Triples to list of dicts."""
|
||||
return [serialize_triple(t) for t in sg]
|
||||
|
||||
def serialize_triples(message):
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import logging
|
|||
|
||||
from .... direct.milvus_doc_embeddings import DocVectors
|
||||
from .... schema import DocumentEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... schema import Error
|
||||
from .... base import DocumentEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct
|
|||
from qdrant_client.models import Distance, VectorParams
|
||||
|
||||
from .... schema import DocumentEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... schema import Error
|
||||
from .... base import DocumentEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import logging
|
|||
|
||||
from .... direct.milvus_graph_embeddings import EntityVectors
|
||||
from .... schema import GraphEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... schema import Error, Term, IRI, LITERAL
|
||||
from .... base import GraphEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
|
|
@ -33,9 +33,9 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
|
||||
def create_value(self, ent):
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
return Term(type=IRI, iri=ent)
|
||||
else:
|
||||
return Value(value=ent, is_uri=False)
|
||||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
async def query_graph_embeddings(self, msg):
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ from pinecone import Pinecone, ServerlessSpec
|
|||
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
|
||||
|
||||
from .... schema import GraphEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... schema import Error, Term, IRI, LITERAL
|
||||
from .... base import GraphEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
|
|
@ -51,9 +51,9 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
|
||||
def create_value(self, ent):
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
return Term(type=IRI, iri=ent)
|
||||
else:
|
||||
return Value(value=ent, is_uri=False)
|
||||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
async def query_graph_embeddings(self, msg):
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct
|
|||
from qdrant_client.models import Distance, VectorParams
|
||||
|
||||
from .... schema import GraphEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... schema import Error, Term, IRI, LITERAL
|
||||
from .... base import GraphEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
|
|
@ -67,9 +67,9 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
|
||||
def create_value(self, ent):
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
return Term(type=IRI, iri=ent)
|
||||
else:
|
||||
return Value(value=ent, is_uri=False)
|
||||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
async def query_graph_embeddings(self, msg):
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
|
||||
"""
|
||||
Triples query service. Input is a (s, p, o) triple, some values may be
|
||||
null. Output is a list of triples.
|
||||
Triples query service. Input is a (s, p, o, g) quad pattern, some values may be
|
||||
null. Output is a list of quads.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .... direct.cassandra_kg import KnowledgeGraph
|
||||
from .... direct.cassandra_kg import KnowledgeGraph, GRAPH_WILDCARD, DEFAULT_GRAPH
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... schema import Term, Triple, IRI, LITERAL
|
||||
from .... base import TriplesQueryService
|
||||
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
|
||||
|
||||
|
|
@ -18,6 +18,27 @@ logger = logging.getLogger(__name__)
|
|||
default_ident = "triples-query"
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
|
||||
def create_term(value):
|
||||
"""Create a Term from a string value"""
|
||||
if value.startswith("http://") or value.startswith("https://"):
|
||||
return Term(type=IRI, iri=value)
|
||||
else:
|
||||
return Term(type=LITERAL, value=value)
|
||||
|
||||
|
||||
class Processor(TriplesQueryService):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
|
@ -46,12 +67,6 @@ class Processor(TriplesQueryService):
|
|||
self.cassandra_password = password
|
||||
self.table = None
|
||||
|
||||
def create_value(self, ent):
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
else:
|
||||
return Value(value=ent, is_uri=False)
|
||||
|
||||
async def query_triples(self, query):
|
||||
|
||||
try:
|
||||
|
|
@ -72,77 +87,103 @@ class Processor(TriplesQueryService):
|
|||
)
|
||||
self.table = user
|
||||
|
||||
triples = []
|
||||
# Extract values from query
|
||||
s_val = get_term_value(query.s)
|
||||
p_val = get_term_value(query.p)
|
||||
o_val = get_term_value(query.o)
|
||||
g_val = query.g # Already a string or None
|
||||
|
||||
if query.s is not None:
|
||||
if query.p is not None:
|
||||
if query.o is not None:
|
||||
quads = []
|
||||
|
||||
# Route to appropriate query method based on which fields are specified
|
||||
if s_val is not None:
|
||||
if p_val is not None:
|
||||
if o_val is not None:
|
||||
# SPO specified - find matching graphs
|
||||
resp = self.tg.get_spo(
|
||||
query.collection, query.s.value, query.p.value, query.o.value,
|
||||
query.collection, s_val, p_val, o_val, g=g_val,
|
||||
limit=query.limit
|
||||
)
|
||||
triples.append((query.s.value, query.p.value, query.o.value))
|
||||
for t in resp:
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((s_val, p_val, o_val, g))
|
||||
else:
|
||||
# SP specified
|
||||
resp = self.tg.get_sp(
|
||||
query.collection, query.s.value, query.p.value,
|
||||
query.collection, s_val, p_val, g=g_val,
|
||||
limit=query.limit
|
||||
)
|
||||
for t in resp:
|
||||
triples.append((query.s.value, query.p.value, t.o))
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((s_val, p_val, t.o, g))
|
||||
else:
|
||||
if query.o is not None:
|
||||
if o_val is not None:
|
||||
# SO specified
|
||||
resp = self.tg.get_os(
|
||||
query.collection, query.o.value, query.s.value,
|
||||
query.collection, o_val, s_val, g=g_val,
|
||||
limit=query.limit
|
||||
)
|
||||
for t in resp:
|
||||
triples.append((query.s.value, t.p, query.o.value))
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((s_val, t.p, o_val, g))
|
||||
else:
|
||||
# S only
|
||||
resp = self.tg.get_s(
|
||||
query.collection, query.s.value,
|
||||
query.collection, s_val, g=g_val,
|
||||
limit=query.limit
|
||||
)
|
||||
for t in resp:
|
||||
triples.append((query.s.value, t.p, t.o))
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((s_val, t.p, t.o, g))
|
||||
else:
|
||||
if query.p is not None:
|
||||
if query.o is not None:
|
||||
if p_val is not None:
|
||||
if o_val is not None:
|
||||
# PO specified
|
||||
resp = self.tg.get_po(
|
||||
query.collection, query.p.value, query.o.value,
|
||||
query.collection, p_val, o_val, g=g_val,
|
||||
limit=query.limit
|
||||
)
|
||||
for t in resp:
|
||||
triples.append((t.s, query.p.value, query.o.value))
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((t.s, p_val, o_val, g))
|
||||
else:
|
||||
# P only
|
||||
resp = self.tg.get_p(
|
||||
query.collection, query.p.value,
|
||||
query.collection, p_val, g=g_val,
|
||||
limit=query.limit
|
||||
)
|
||||
for t in resp:
|
||||
triples.append((t.s, query.p.value, t.o))
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((t.s, p_val, t.o, g))
|
||||
else:
|
||||
if query.o is not None:
|
||||
if o_val is not None:
|
||||
# O only
|
||||
resp = self.tg.get_o(
|
||||
query.collection, query.o.value,
|
||||
query.collection, o_val, g=g_val,
|
||||
limit=query.limit
|
||||
)
|
||||
for t in resp:
|
||||
triples.append((t.s, t.p, query.o.value))
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((t.s, t.p, o_val, g))
|
||||
else:
|
||||
# Nothing specified - get all
|
||||
resp = self.tg.get_all(
|
||||
query.collection,
|
||||
limit=query.limit
|
||||
)
|
||||
for t in resp:
|
||||
triples.append((t.s, t.p, t.o))
|
||||
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||
quads.append((t.s, t.p, t.o, g))
|
||||
|
||||
# Convert to Triple objects (with g field)
|
||||
triples = [
|
||||
Triple(
|
||||
s=self.create_value(t[0]),
|
||||
p=self.create_value(t[1]),
|
||||
o=self.create_value(t[2])
|
||||
s=create_term(q[0]),
|
||||
p=create_term(q[1]),
|
||||
o=create_term(q[2]),
|
||||
g=q[3] if q[3] != DEFAULT_GRAPH else None
|
||||
)
|
||||
for t in triples
|
||||
for q in quads
|
||||
]
|
||||
|
||||
return triples
|
||||
|
|
@ -162,4 +203,3 @@ class Processor(TriplesQueryService):
|
|||
def run():
|
||||
|
||||
Processor.launch(default_ident, __doc__)
|
||||
|
||||
|
|
|
|||
|
|
@ -10,12 +10,24 @@ import logging
|
|||
from falkordb import FalkorDB
|
||||
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... schema import Term, Triple, IRI, LITERAL
|
||||
from .... base import TriplesQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
return term.id or term.value
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
default_graph_url = 'falkor://falkordb:6379'
|
||||
|
|
@ -42,9 +54,9 @@ class Processor(TriplesQueryService):
|
|||
def create_value(self, ent):
|
||||
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
return Term(type=IRI, iri=ent)
|
||||
else:
|
||||
return Value(value=ent, is_uri=False)
|
||||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
async def query_triples(self, query):
|
||||
|
||||
|
|
@ -63,28 +75,28 @@ class Processor(TriplesQueryService):
|
|||
"RETURN $src as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"rel": query.p.value,
|
||||
"value": query.o.value,
|
||||
"src": get_term_value(query.s),
|
||||
"rel": get_term_value(query.p),
|
||||
"value": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, query.o.value))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
records = self.io.query(
|
||||
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node {uri: $uri}) "
|
||||
"RETURN $src as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"rel": query.p.value,
|
||||
"uri": query.o.value,
|
||||
"src": get_term_value(query.s),
|
||||
"rel": get_term_value(query.p),
|
||||
"uri": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, query.o.value))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -95,26 +107,26 @@ class Processor(TriplesQueryService):
|
|||
"RETURN dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"rel": query.p.value,
|
||||
"src": get_term_value(query.s),
|
||||
"rel": get_term_value(query.p),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, rec[0]))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), rec[0]))
|
||||
|
||||
records = self.io.query(
|
||||
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node) "
|
||||
"RETURN dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"rel": query.p.value,
|
||||
"src": get_term_value(query.s),
|
||||
"rel": get_term_value(query.p),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, rec[0]))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), rec[0]))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -127,26 +139,26 @@ class Processor(TriplesQueryService):
|
|||
"RETURN rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"value": query.o.value,
|
||||
"src": get_term_value(query.s),
|
||||
"value": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, rec[0], query.o.value))
|
||||
triples.append((get_term_value(query.s), rec[0], get_term_value(query.o)))
|
||||
|
||||
records = self.io.query(
|
||||
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node {uri: $uri}) "
|
||||
"RETURN rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"uri": query.o.value,
|
||||
"src": get_term_value(query.s),
|
||||
"uri": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, rec[0], query.o.value))
|
||||
triples.append((get_term_value(query.s), rec[0], get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -157,24 +169,24 @@ class Processor(TriplesQueryService):
|
|||
"RETURN rel.uri as rel, dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"src": get_term_value(query.s),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, rec[0], rec[1]))
|
||||
triples.append((get_term_value(query.s), rec[0], rec[1]))
|
||||
|
||||
records = self.io.query(
|
||||
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node) "
|
||||
"RETURN rel.uri as rel, dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"src": query.s.value,
|
||||
"src": get_term_value(query.s),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, rec[0], rec[1]))
|
||||
triples.append((get_term_value(query.s), rec[0], rec[1]))
|
||||
|
||||
|
||||
else:
|
||||
|
|
@ -190,26 +202,26 @@ class Processor(TriplesQueryService):
|
|||
"RETURN src.uri as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"uri": query.p.value,
|
||||
"value": query.o.value,
|
||||
"uri": get_term_value(query.p),
|
||||
"value": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((rec[0], query.p.value, query.o.value))
|
||||
triples.append((rec[0], get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
records = self.io.query(
|
||||
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node {uri: $dest}) "
|
||||
"RETURN src.uri as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"uri": query.p.value,
|
||||
"dest": query.o.value,
|
||||
"uri": get_term_value(query.p),
|
||||
"dest": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((rec[0], query.p.value, query.o.value))
|
||||
triples.append((rec[0], get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -220,24 +232,24 @@ class Processor(TriplesQueryService):
|
|||
"RETURN src.uri as src, dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"uri": query.p.value,
|
||||
"uri": get_term_value(query.p),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((rec[0], query.p.value, rec[1]))
|
||||
triples.append((rec[0], get_term_value(query.p), rec[1]))
|
||||
|
||||
records = self.io.query(
|
||||
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node) "
|
||||
"RETURN src.uri as src, dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"uri": query.p.value,
|
||||
"uri": get_term_value(query.p),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((rec[0], query.p.value, rec[1]))
|
||||
triples.append((rec[0], get_term_value(query.p), rec[1]))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -250,24 +262,24 @@ class Processor(TriplesQueryService):
|
|||
"RETURN src.uri as src, rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"value": query.o.value,
|
||||
"value": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((rec[0], rec[1], query.o.value))
|
||||
triples.append((rec[0], rec[1], get_term_value(query.o)))
|
||||
|
||||
records = self.io.query(
|
||||
"MATCH (src:Node)-[rel:Rel]->(dest:Node {uri: $uri}) "
|
||||
"RETURN src.uri as src, rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
params={
|
||||
"uri": query.o.value,
|
||||
"uri": get_term_value(query.o),
|
||||
},
|
||||
).result_set
|
||||
|
||||
for rec in records:
|
||||
triples.append((rec[0], rec[1], query.o.value))
|
||||
triples.append((rec[0], rec[1], get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
|
|||
|
|
@ -10,12 +10,24 @@ import logging
|
|||
from neo4j import GraphDatabase
|
||||
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... schema import Term, Triple, IRI, LITERAL
|
||||
from .... base import TriplesQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
return term.id or term.value
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
default_graph_host = 'bolt://memgraph:7687'
|
||||
|
|
@ -47,9 +59,9 @@ class Processor(TriplesQueryService):
|
|||
def create_value(self, ent):
|
||||
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
return Term(type=IRI, iri=ent)
|
||||
else:
|
||||
return Value(value=ent, is_uri=False)
|
||||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
async def query_triples(self, query):
|
||||
|
||||
|
|
@ -73,13 +85,13 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN $src as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value, value=query.o.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, query.o.value))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -87,13 +99,13 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||
"RETURN $src as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value, uri=query.o.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, query.o.value))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -105,14 +117,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -120,14 +132,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {user: $user, collection: $collection}) "
|
||||
"RETURN dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -141,14 +153,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, value=query.o.value,
|
||||
src=get_term_value(query.s), value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], query.o.value))
|
||||
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -156,14 +168,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, uri=query.o.value,
|
||||
src=get_term_value(query.s), uri=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], query.o.value))
|
||||
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -175,14 +187,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel, dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value,
|
||||
src=get_term_value(query.s),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
||||
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -190,14 +202,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel, dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value,
|
||||
src=get_term_value(query.s),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
||||
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||
|
||||
|
||||
else:
|
||||
|
|
@ -214,14 +226,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value, value=query.o.value,
|
||||
uri=get_term_value(query.p), value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, query.o.value))
|
||||
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||
|
|
@ -229,14 +241,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value, dest=query.o.value,
|
||||
uri=get_term_value(query.p), dest=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, query.o.value))
|
||||
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -248,14 +260,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value,
|
||||
uri=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, data["dest"]))
|
||||
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||
|
|
@ -263,14 +275,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value,
|
||||
uri=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, data["dest"]))
|
||||
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -284,14 +296,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
value=query.o.value,
|
||||
value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], data["rel"], query.o.value))
|
||||
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||
|
|
@ -299,14 +311,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.o.value,
|
||||
uri=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], data["rel"], query.o.value))
|
||||
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
|
|||
|
|
@ -10,12 +10,24 @@ import logging
|
|||
from neo4j import GraphDatabase
|
||||
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... schema import Term, Triple, IRI, LITERAL
|
||||
from .... base import TriplesQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
return term.id or term.value
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
default_graph_host = 'bolt://neo4j:7687'
|
||||
|
|
@ -47,9 +59,9 @@ class Processor(TriplesQueryService):
|
|||
def create_value(self, ent):
|
||||
|
||||
if ent.startswith("http://") or ent.startswith("https://"):
|
||||
return Value(value=ent, is_uri=True)
|
||||
return Term(type=IRI, iri=ent)
|
||||
else:
|
||||
return Value(value=ent, is_uri=False)
|
||||
return Term(type=LITERAL, value=ent)
|
||||
|
||||
async def query_triples(self, query):
|
||||
|
||||
|
|
@ -73,13 +85,13 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN $src as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value, value=query.o.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, query.o.value))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -87,13 +99,13 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||
"RETURN $src as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value, uri=query.o.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
triples.append((query.s.value, query.p.value, query.o.value))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -105,14 +117,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -120,14 +132,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {user: $user, collection: $collection}) "
|
||||
"RETURN dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, rel=query.p.value,
|
||||
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
||||
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -141,14 +153,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, value=query.o.value,
|
||||
src=get_term_value(query.s), value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], query.o.value))
|
||||
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -156,14 +168,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value, uri=query.o.value,
|
||||
src=get_term_value(query.s), uri=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], query.o.value))
|
||||
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -175,14 +187,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel, dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value,
|
||||
src=get_term_value(query.s),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
||||
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||
|
|
@ -190,14 +202,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {user: $user, collection: $collection}) "
|
||||
"RETURN rel.uri as rel, dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
src=query.s.value,
|
||||
src=get_term_value(query.s),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
||||
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||
|
||||
|
||||
else:
|
||||
|
|
@ -214,14 +226,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value, value=query.o.value,
|
||||
uri=get_term_value(query.p), value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, query.o.value))
|
||||
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||
|
|
@ -229,14 +241,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value, dest=query.o.value,
|
||||
uri=get_term_value(query.p), dest=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, query.o.value))
|
||||
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -248,14 +260,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, dest.value as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value,
|
||||
uri=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, data["dest"]))
|
||||
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||
|
|
@ -263,14 +275,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, dest.uri as dest "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.p.value,
|
||||
uri=get_term_value(query.p),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], query.p.value, data["dest"]))
|
||||
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||
|
||||
else:
|
||||
|
||||
|
|
@ -284,14 +296,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
value=query.o.value,
|
||||
value=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], data["rel"], query.o.value))
|
||||
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||
|
||||
records, summary, keys = self.io.execute_query(
|
||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||
|
|
@ -299,14 +311,14 @@ class Processor(TriplesQueryService):
|
|||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||
"RETURN src.uri as src, rel.uri as rel "
|
||||
"LIMIT " + str(query.limit),
|
||||
uri=query.o.value,
|
||||
uri=get_term_value(query.o),
|
||||
user=user, collection=collection,
|
||||
database_=self.db,
|
||||
)
|
||||
|
||||
for rec in records:
|
||||
data = rec.data()
|
||||
triples.append((data["src"], data["rel"], query.o.value))
|
||||
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||
|
||||
else:
|
||||
|
||||
|
|
|
|||
|
|
@ -9,10 +9,24 @@ from .... direct.milvus_graph_embeddings import EntityVectors
|
|||
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
||||
from .... base import AsyncProcessor, Consumer, Producer
|
||||
from .... base import ConsumerMetrics, ProducerMetrics
|
||||
from .... schema import IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
default_ident = "ge-write"
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
|
|
@ -36,11 +50,12 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
|||
async def store_graph_embeddings(self, message):
|
||||
|
||||
for entity in message.entities:
|
||||
entity_value = get_term_value(entity.entity)
|
||||
|
||||
if entity.entity.value != "" and entity.entity.value is not None:
|
||||
if entity_value != "" and entity_value is not None:
|
||||
for vec in entity.vectors:
|
||||
self.vecstore.insert(
|
||||
vec, entity.entity.value,
|
||||
vec, entity_value,
|
||||
message.metadata.user,
|
||||
message.metadata.collection
|
||||
)
|
||||
|
|
|
|||
|
|
@ -14,10 +14,24 @@ import logging
|
|||
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
||||
from .... base import AsyncProcessor, Consumer, Producer
|
||||
from .... base import ConsumerMetrics, ProducerMetrics
|
||||
from .... schema import IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
default_ident = "ge-write"
|
||||
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
|
||||
default_cloud = "aws"
|
||||
|
|
@ -100,8 +114,9 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
|||
return
|
||||
|
||||
for entity in message.entities:
|
||||
entity_value = get_term_value(entity.entity)
|
||||
|
||||
if entity.entity.value == "" or entity.entity.value is None:
|
||||
if entity_value == "" or entity_value is None:
|
||||
continue
|
||||
|
||||
for vec in entity.vectors:
|
||||
|
|
@ -126,7 +141,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
|||
{
|
||||
"id": vector_id,
|
||||
"values": vec,
|
||||
"metadata": { "entity": entity.entity.value },
|
||||
"metadata": { "entity": entity_value },
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -12,10 +12,25 @@ import logging
|
|||
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
||||
from .... base import AsyncProcessor, Consumer, Producer
|
||||
from .... base import ConsumerMetrics, ProducerMetrics
|
||||
from .... schema import IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
|
||||
default_ident = "ge-write"
|
||||
|
||||
default_store_uri = 'http://localhost:6333'
|
||||
|
|
@ -51,8 +66,10 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
|||
return
|
||||
|
||||
for entity in message.entities:
|
||||
entity_value = get_term_value(entity.entity)
|
||||
|
||||
if entity.entity.value == "" or entity.entity.value is None: return
|
||||
if entity_value == "" or entity_value is None:
|
||||
continue
|
||||
|
||||
for vec in entity.vectors:
|
||||
|
||||
|
|
@ -80,7 +97,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
|||
id=str(uuid.uuid4()),
|
||||
vector=vec,
|
||||
payload={
|
||||
"entity": entity.entity.value,
|
||||
"entity": entity_value,
|
||||
}
|
||||
)
|
||||
]
|
||||
|
|
|
|||
|
|
@ -10,11 +10,12 @@ import argparse
|
|||
import time
|
||||
import logging
|
||||
|
||||
from .... direct.cassandra_kg import KnowledgeGraph
|
||||
from .... direct.cassandra_kg import KnowledgeGraph, DEFAULT_GRAPH
|
||||
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||
from .... base import AsyncProcessor, Consumer, Producer
|
||||
from .... base import ConsumerMetrics, ProducerMetrics
|
||||
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
|
||||
from .... schema import IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -22,6 +23,19 @@ logger = logging.getLogger(__name__)
|
|||
default_ident = "triples-write"
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
|
||||
class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
|
@ -84,11 +98,19 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
|||
self.table = user
|
||||
|
||||
for t in message.triples:
|
||||
# Extract values from Term objects
|
||||
s_val = get_term_value(t.s)
|
||||
p_val = get_term_value(t.p)
|
||||
o_val = get_term_value(t.o)
|
||||
# t.g is None for default graph, or a graph IRI
|
||||
g_val = t.g if t.g is not None else DEFAULT_GRAPH
|
||||
|
||||
self.tg.insert(
|
||||
message.metadata.collection,
|
||||
t.s.value,
|
||||
t.p.value,
|
||||
t.o.value
|
||||
s_val,
|
||||
p_val,
|
||||
o_val,
|
||||
g=g_val
|
||||
)
|
||||
|
||||
async def create_collection(self, user: str, collection: str, metadata: dict):
|
||||
|
|
|
|||
|
|
@ -15,12 +15,27 @@ from falkordb import FalkorDB
|
|||
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||
from .... base import AsyncProcessor, Consumer, Producer
|
||||
from .... base import ConsumerMetrics, ProducerMetrics
|
||||
from .... schema import IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-write"
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
|
||||
default_graph_url = 'falkor://falkordb:6379'
|
||||
default_database = 'falkordb'
|
||||
|
||||
|
|
@ -164,14 +179,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
|||
|
||||
for t in message.triples:
|
||||
|
||||
self.create_node(t.s.value, user, collection)
|
||||
s_val = get_term_value(t.s)
|
||||
p_val = get_term_value(t.p)
|
||||
o_val = get_term_value(t.o)
|
||||
|
||||
if t.o.is_uri:
|
||||
self.create_node(t.o.value, user, collection)
|
||||
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
|
||||
self.create_node(s_val, user, collection)
|
||||
|
||||
if t.o.type == IRI:
|
||||
self.create_node(o_val, user, collection)
|
||||
self.relate_node(s_val, p_val, o_val, user, collection)
|
||||
else:
|
||||
self.create_literal(t.o.value, user, collection)
|
||||
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
|
||||
self.create_literal(o_val, user, collection)
|
||||
self.relate_literal(s_val, p_val, o_val, user, collection)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -15,12 +15,27 @@ from neo4j import GraphDatabase
|
|||
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||
from .... base import AsyncProcessor, Consumer, Producer
|
||||
from .... base import ConsumerMetrics, ProducerMetrics
|
||||
from .... schema import IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-write"
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
|
||||
default_graph_host = 'bolt://memgraph:7687'
|
||||
default_username = 'memgraph'
|
||||
default_password = 'password'
|
||||
|
|
@ -204,40 +219,44 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
|||
|
||||
def create_triple(self, tx, t, user, collection):
|
||||
|
||||
s_val = get_term_value(t.s)
|
||||
p_val = get_term_value(t.p)
|
||||
o_val = get_term_value(t.o)
|
||||
|
||||
# Create new s node with given uri, if not exists
|
||||
result = tx.run(
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri=t.s.value, user=user, collection=collection
|
||||
uri=s_val, user=user, collection=collection
|
||||
)
|
||||
|
||||
if t.o.is_uri:
|
||||
if t.o.type == IRI:
|
||||
|
||||
# Create new o node with given uri, if not exists
|
||||
result = tx.run(
|
||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||
uri=t.o.value, user=user, collection=collection
|
||||
uri=o_val, user=user, collection=collection
|
||||
)
|
||||
|
||||
result = tx.run(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection,
|
||||
src=s_val, dest=o_val, uri=p_val, user=user, collection=collection,
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
|
||||
# Create new o literal with given uri, if not exists
|
||||
result = tx.run(
|
||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||
value=t.o.value, user=user, collection=collection
|
||||
value=o_val, user=user, collection=collection
|
||||
)
|
||||
|
||||
result = tx.run(
|
||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||
src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection,
|
||||
src=s_val, dest=o_val, uri=p_val, user=user, collection=collection,
|
||||
)
|
||||
|
||||
async def store_triples(self, message):
|
||||
|
|
@ -257,14 +276,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
|||
|
||||
for t in message.triples:
|
||||
|
||||
self.create_node(t.s.value, user, collection)
|
||||
s_val = get_term_value(t.s)
|
||||
p_val = get_term_value(t.p)
|
||||
o_val = get_term_value(t.o)
|
||||
|
||||
if t.o.is_uri:
|
||||
self.create_node(t.o.value, user, collection)
|
||||
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
|
||||
self.create_node(s_val, user, collection)
|
||||
|
||||
if t.o.type == IRI:
|
||||
self.create_node(o_val, user, collection)
|
||||
self.relate_node(s_val, p_val, o_val, user, collection)
|
||||
else:
|
||||
self.create_literal(t.o.value, user, collection)
|
||||
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
|
||||
self.create_literal(o_val, user, collection)
|
||||
self.relate_literal(s_val, p_val, o_val, user, collection)
|
||||
|
||||
# Alternative implementation using transactions
|
||||
# with self.io.session(database=self.db) as session:
|
||||
|
|
|
|||
|
|
@ -14,12 +14,27 @@ from neo4j import GraphDatabase
|
|||
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||
from .... base import AsyncProcessor, Consumer, Producer
|
||||
from .... base import ConsumerMetrics, ProducerMetrics
|
||||
from .... schema import IRI, LITERAL
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-write"
|
||||
|
||||
|
||||
def get_term_value(term):
|
||||
"""Extract the string value from a Term"""
|
||||
if term is None:
|
||||
return None
|
||||
if term.type == IRI:
|
||||
return term.iri
|
||||
elif term.type == LITERAL:
|
||||
return term.value
|
||||
else:
|
||||
# For blank nodes or other types, use id or value
|
||||
return term.id or term.value
|
||||
|
||||
|
||||
default_graph_host = 'bolt://neo4j:7687'
|
||||
default_username = 'neo4j'
|
||||
default_password = 'password'
|
||||
|
|
@ -212,14 +227,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
|||
|
||||
for t in message.triples:
|
||||
|
||||
self.create_node(t.s.value, user, collection)
|
||||
s_val = get_term_value(t.s)
|
||||
p_val = get_term_value(t.p)
|
||||
o_val = get_term_value(t.o)
|
||||
|
||||
if t.o.is_uri:
|
||||
self.create_node(t.o.value, user, collection)
|
||||
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
|
||||
self.create_node(s_val, user, collection)
|
||||
|
||||
if t.o.type == IRI:
|
||||
self.create_node(o_val, user, collection)
|
||||
self.relate_node(s_val, p_val, o_val, user, collection)
|
||||
else:
|
||||
self.create_literal(t.o.value, user, collection)
|
||||
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
|
||||
self.create_literal(o_val, user, collection)
|
||||
self.relate_literal(s_val, p_val, o_val, user, collection)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
||||
from .. schema import Metadata, Value, GraphEmbeddings
|
||||
from .. schema import Metadata, GraphEmbeddings
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
from cassandra.auth import PlainTextAuthProvider
|
||||
|
|
|
|||
|
|
@ -1,8 +1,24 @@
|
|||
|
||||
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
||||
from .. schema import Metadata, Value, GraphEmbeddings
|
||||
from .. schema import Metadata, Term, IRI, LITERAL, GraphEmbeddings
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
|
||||
|
||||
def term_to_tuple(term):
|
||||
"""Convert Term to (value, is_uri) tuple for database storage."""
|
||||
if term.type == IRI:
|
||||
return (term.iri, True)
|
||||
else: # LITERAL
|
||||
return (term.value, False)
|
||||
|
||||
|
||||
def tuple_to_term(value, is_uri):
|
||||
"""Convert (value, is_uri) tuple from database to Term."""
|
||||
if is_uri:
|
||||
return Term(type=IRI, iri=value)
|
||||
else:
|
||||
return Term(type=LITERAL, value=value)
|
||||
from cassandra.auth import PlainTextAuthProvider
|
||||
from ssl import SSLContext, PROTOCOL_TLSv1_2
|
||||
|
||||
|
|
@ -205,8 +221,7 @@ class KnowledgeTableStore:
|
|||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
|
|
@ -215,8 +230,7 @@ class KnowledgeTableStore:
|
|||
|
||||
triples = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in m.triples
|
||||
]
|
||||
|
|
@ -248,8 +262,7 @@ class KnowledgeTableStore:
|
|||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
|
|
@ -258,7 +271,7 @@ class KnowledgeTableStore:
|
|||
|
||||
entities = [
|
||||
(
|
||||
(v.entity.value, v.entity.is_uri),
|
||||
term_to_tuple(v.entity),
|
||||
v.vectors
|
||||
)
|
||||
for v in m.entities
|
||||
|
|
@ -291,8 +304,7 @@ class KnowledgeTableStore:
|
|||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
|
|
@ -414,9 +426,9 @@ class KnowledgeTableStore:
|
|||
if row[2]:
|
||||
metadata = [
|
||||
Triple(
|
||||
s = Value(value = elt[0], is_uri = elt[1]),
|
||||
p = Value(value = elt[2], is_uri = elt[3]),
|
||||
o = Value(value = elt[4], is_uri = elt[5]),
|
||||
s = tuple_to_term(elt[0], elt[1]),
|
||||
p = tuple_to_term(elt[2], elt[3]),
|
||||
o = tuple_to_term(elt[4], elt[5]),
|
||||
)
|
||||
for elt in row[2]
|
||||
]
|
||||
|
|
@ -425,9 +437,9 @@ class KnowledgeTableStore:
|
|||
|
||||
triples = [
|
||||
Triple(
|
||||
s = Value(value = elt[0], is_uri = elt[1]),
|
||||
p = Value(value = elt[2], is_uri = elt[3]),
|
||||
o = Value(value = elt[4], is_uri = elt[5]),
|
||||
s = tuple_to_term(elt[0], elt[1]),
|
||||
p = tuple_to_term(elt[2], elt[3]),
|
||||
o = tuple_to_term(elt[4], elt[5]),
|
||||
)
|
||||
for elt in row[3]
|
||||
]
|
||||
|
|
@ -470,9 +482,9 @@ class KnowledgeTableStore:
|
|||
if row[2]:
|
||||
metadata = [
|
||||
Triple(
|
||||
s = Value(value = elt[0], is_uri = elt[1]),
|
||||
p = Value(value = elt[2], is_uri = elt[3]),
|
||||
o = Value(value = elt[4], is_uri = elt[5]),
|
||||
s = tuple_to_term(elt[0], elt[1]),
|
||||
p = tuple_to_term(elt[2], elt[3]),
|
||||
o = tuple_to_term(elt[4], elt[5]),
|
||||
)
|
||||
for elt in row[2]
|
||||
]
|
||||
|
|
@ -481,7 +493,7 @@ class KnowledgeTableStore:
|
|||
|
||||
entities = [
|
||||
EntityEmbeddings(
|
||||
entity = Value(value = ent[0][0], is_uri = ent[0][1]),
|
||||
entity = tuple_to_term(ent[0][0], ent[0][1]),
|
||||
vectors = ent[1]
|
||||
)
|
||||
for ent in row[3]
|
||||
|
|
|
|||
|
|
@ -1,8 +1,24 @@
|
|||
|
||||
from .. schema import LibrarianRequest, LibrarianResponse
|
||||
from .. schema import DocumentMetadata, ProcessingMetadata
|
||||
from .. schema import Error, Triple, Value
|
||||
from .. schema import Error, Triple, Term, IRI, LITERAL
|
||||
from .. knowledge import hash
|
||||
|
||||
|
||||
def term_to_tuple(term):
|
||||
"""Convert Term to (value, is_uri) tuple for database storage."""
|
||||
if term.type == IRI:
|
||||
return (term.iri, True)
|
||||
else: # LITERAL
|
||||
return (term.value, False)
|
||||
|
||||
|
||||
def tuple_to_term(value, is_uri):
|
||||
"""Convert (value, is_uri) tuple from database to Term."""
|
||||
if is_uri:
|
||||
return Term(type=IRI, iri=value)
|
||||
else:
|
||||
return Term(type=LITERAL, value=value)
|
||||
from .. exceptions import RequestError
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
|
|
@ -215,8 +231,7 @@ class LibraryTableStore:
|
|||
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in document.metadata
|
||||
]
|
||||
|
|
@ -249,8 +264,7 @@ class LibraryTableStore:
|
|||
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||
)
|
||||
for v in document.metadata
|
||||
]
|
||||
|
|
@ -331,9 +345,9 @@ class LibraryTableStore:
|
|||
comments = row[4],
|
||||
metadata = [
|
||||
Triple(
|
||||
s=Value(value=m[0], is_uri=m[1]),
|
||||
p=Value(value=m[2], is_uri=m[3]),
|
||||
o=Value(value=m[4], is_uri=m[5])
|
||||
s=tuple_to_term(m[0], m[1]),
|
||||
p=tuple_to_term(m[2], m[3]),
|
||||
o=tuple_to_term(m[4], m[5])
|
||||
)
|
||||
for m in row[5]
|
||||
],
|
||||
|
|
@ -376,9 +390,9 @@ class LibraryTableStore:
|
|||
comments = row[3],
|
||||
metadata = [
|
||||
Triple(
|
||||
s=Value(value=m[0], is_uri=m[1]),
|
||||
p=Value(value=m[2], is_uri=m[3]),
|
||||
o=Value(value=m[4], is_uri=m[5])
|
||||
s=tuple_to_term(m[0], m[1]),
|
||||
p=tuple_to_term(m[2], m[3]),
|
||||
o=tuple_to_term(m[4], m[5])
|
||||
)
|
||||
for m in row[4]
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue