Entity-centric graph (#633)

* Tech spec for new entity-centric graph schema

* Graph implementation
This commit is contained in:
cybermaggedon 2026-02-16 13:26:43 +00:00 committed by GitHub
parent f24f1ebd80
commit 00c1ca681b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 1858 additions and 225 deletions

View file

@ -20,6 +20,11 @@ DEFAULT_GRAPH = ""
class KnowledgeGraph:
"""
REDUNDANT: This 7-table implementation has been superseded by
EntityCentricKnowledgeGraph which uses a more efficient 2-table model.
This class is retained temporarily for reference but should not be used
for new deployments.
Cassandra-backed knowledge graph supporting quads (s, p, o, g).
Uses 7 tables to support all 16 query patterns efficiently:
@ -516,3 +521,575 @@ class KnowledgeGraph:
self.cluster.shutdown()
if self.cluster in _active_clusters:
_active_clusters.remove(self.cluster)
class EntityCentricKnowledgeGraph:
"""
Entity-centric Cassandra-backed knowledge graph supporting quads (s, p, o, g).
Uses 2 tables instead of 7:
- quads_by_entity: every entity knows every quad it participates in
- quads_by_collection: manifest for collection-level queries and deletion
Supports all 16 query patterns with single-partition reads.
"""
def __init__(
self, hosts=None,
keyspace="trustgraph", username=None, password=None
):
if hosts is None:
hosts = ["localhost"]
self.keyspace = keyspace
self.username = username
# 2-table entity-centric schema
self.entity_table = "quads_by_entity"
self.collection_table = "quads_by_collection"
# Collection metadata tracking
self.collection_metadata_table = "collection_metadata"
if username and password:
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
auth_provider = PlainTextAuthProvider(username=username, password=password)
self.cluster = Cluster(hosts, auth_provider=auth_provider, ssl_context=ssl_context)
else:
self.cluster = Cluster(hosts)
self.session = self.cluster.connect()
# Track this cluster globally
_active_clusters.append(self.cluster)
self.init()
self.prepare_statements()
def clear(self):
self.session.execute(f"""
drop keyspace if exists {self.keyspace};
""")
self.init()
def init(self):
self.session.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
}};
""")
self.session.set_keyspace(self.keyspace)
self.init_entity_centric_schema()
def init_entity_centric_schema(self):
"""Initialize 2-table entity-centric schema"""
# quads_by_entity: primary data table
# Every entity has a partition containing all quads it participates in
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.entity_table} (
collection text,
entity text,
role text,
p text,
otype text,
s text,
o text,
d text,
dtype text,
lang text,
PRIMARY KEY ((collection, entity), role, p, otype, s, o, d)
);
""")
# quads_by_collection: manifest for collection-level queries and deletion
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.collection_table} (
collection text,
d text,
s text,
p text,
o text,
otype text,
dtype text,
lang text,
PRIMARY KEY (collection, d, s, p, o)
);
""")
# Collection metadata tracking
self.session.execute(f"""
CREATE TABLE IF NOT EXISTS {self.collection_metadata_table} (
collection text,
created_at timestamp,
PRIMARY KEY (collection)
);
""")
logger.info("Entity-centric schema initialized (2 tables + metadata)")
def prepare_statements(self):
"""Prepare statements for entity-centric schema"""
# Insert statement for quads_by_entity
self.insert_entity_stmt = self.session.prepare(
f"INSERT INTO {self.entity_table} "
"(collection, entity, role, p, otype, s, o, d, dtype, lang) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
)
# Insert statement for quads_by_collection
self.insert_collection_stmt = self.session.prepare(
f"INSERT INTO {self.collection_table} "
"(collection, d, s, p, o, otype, dtype, lang) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
)
# Query statements for quads_by_entity
# Get all quads for an entity (any role)
self.get_entity_all_stmt = self.session.prepare(
f"SELECT role, p, otype, s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? LIMIT ?"
)
# Get quads where entity is subject (role='S')
self.get_entity_as_s_stmt = self.session.prepare(
f"SELECT p, otype, s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? AND role = 'S' LIMIT ?"
)
# Get quads where entity is subject with specific predicate
self.get_entity_as_s_p_stmt = self.session.prepare(
f"SELECT otype, s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? AND role = 'S' AND p = ? LIMIT ?"
)
# Get quads where entity is subject with specific predicate and otype
self.get_entity_as_s_p_otype_stmt = self.session.prepare(
f"SELECT s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? AND role = 'S' AND p = ? AND otype = ? LIMIT ?"
)
# Get quads where entity is predicate (role='P')
self.get_entity_as_p_stmt = self.session.prepare(
f"SELECT p, otype, s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? AND role = 'P' LIMIT ?"
)
# Get quads where entity is object (role='O')
self.get_entity_as_o_stmt = self.session.prepare(
f"SELECT p, otype, s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? AND role = 'O' LIMIT ?"
)
# Get quads where entity is object with specific predicate
self.get_entity_as_o_p_stmt = self.session.prepare(
f"SELECT otype, s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? AND role = 'O' AND p = ? LIMIT ?"
)
# Get quads where entity is graph (role='G')
self.get_entity_as_g_stmt = self.session.prepare(
f"SELECT p, otype, s, o, d, dtype, lang FROM {self.entity_table} "
"WHERE collection = ? AND entity = ? AND role = 'G' LIMIT ?"
)
# Query statements for quads_by_collection
# Get all quads in collection
self.get_collection_all_stmt = self.session.prepare(
f"SELECT d, s, p, o, otype, dtype, lang FROM {self.collection_table} "
"WHERE collection = ? LIMIT ?"
)
# Get all quads in a specific graph
self.get_collection_by_graph_stmt = self.session.prepare(
f"SELECT s, p, o, otype, dtype, lang FROM {self.collection_table} "
"WHERE collection = ? AND d = ? LIMIT ?"
)
# Delete statements
self.delete_entity_partition_stmt = self.session.prepare(
f"DELETE FROM {self.entity_table} WHERE collection = ? AND entity = ?"
)
self.delete_collection_row_stmt = self.session.prepare(
f"DELETE FROM {self.collection_table} WHERE collection = ? AND d = ? AND s = ? AND p = ? AND o = ?"
)
logger.info("Prepared statements initialized for entity-centric schema")
def insert(self, collection, s, p, o, g=None, otype=None, dtype="", lang=""):
"""
Insert a quad into entity-centric tables.
Writes 4 rows to quads_by_entity (one for each entity role) + 1 row to
quads_by_collection. For literals, only 3 entity rows are written since
literals are not independently queryable entities.
Args:
collection: Collection/tenant scope
s: Subject (string value)
p: Predicate (string value)
o: Object (string value)
g: Graph/dataset (None for default graph)
otype: Object type - 'u' (URI), 'l' (literal), 't' (triple)
Auto-detected from o value if not provided
dtype: XSD datatype (for literals)
lang: Language tag (for literals)
"""
# Default graph stored as empty string
if g is None:
g = DEFAULT_GRAPH
# Auto-detect otype if not provided (backwards compatibility)
if otype is None:
if o.startswith("http://") or o.startswith("https://"):
otype = "u"
else:
otype = "l"
batch = BatchStatement()
# Write row for subject entity (role='S')
batch.add(self.insert_entity_stmt, (
collection, s, 'S', p, otype, s, o, g, dtype, lang
))
# Write row for predicate entity (role='P')
batch.add(self.insert_entity_stmt, (
collection, p, 'P', p, otype, s, o, g, dtype, lang
))
# Write row for object entity (role='O') - only for URIs, not literals
if otype == 'u' or otype == 't':
batch.add(self.insert_entity_stmt, (
collection, o, 'O', p, otype, s, o, g, dtype, lang
))
# Write row for graph entity (role='G') - only for non-default graphs
if g != DEFAULT_GRAPH:
batch.add(self.insert_entity_stmt, (
collection, g, 'G', p, otype, s, o, g, dtype, lang
))
# Write row to quads_by_collection
batch.add(self.insert_collection_stmt, (
collection, g, s, p, o, otype, dtype, lang
))
self.session.execute(batch)
# ========================================================================
# Query methods
# g=None means default graph, g="*" means all graphs
# Results include otype, dtype, lang for proper Term reconstruction
# ========================================================================
def get_all(self, collection, limit=50):
"""Get all quads in collection"""
return self.session.execute(self.get_collection_all_stmt, (collection, limit))
def get_s(self, collection, s, g=None, limit=10):
"""
Query by subject. Returns quads where s is the subject.
g=None: default graph, g='*': all graphs
"""
rows = self.session.execute(self.get_entity_as_s_stmt, (collection, s, limit))
results = []
for row in rows:
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
# Filter by graph if specified
if g is None or g == DEFAULT_GRAPH:
if d != DEFAULT_GRAPH:
continue
elif g != GRAPH_WILDCARD and d != g:
continue
results.append(QuadResult(
s=row.s, p=row.p, o=row.o, g=d,
otype=row.otype, dtype=row.dtype, lang=row.lang
))
return results
def get_p(self, collection, p, g=None, limit=10):
"""Query by predicate"""
rows = self.session.execute(self.get_entity_as_p_stmt, (collection, p, limit))
results = []
for row in rows:
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
if g is None or g == DEFAULT_GRAPH:
if d != DEFAULT_GRAPH:
continue
elif g != GRAPH_WILDCARD and d != g:
continue
results.append(QuadResult(
s=row.s, p=row.p, o=row.o, g=d,
otype=row.otype, dtype=row.dtype, lang=row.lang
))
return results
def get_o(self, collection, o, g=None, limit=10):
"""Query by object"""
rows = self.session.execute(self.get_entity_as_o_stmt, (collection, o, limit))
results = []
for row in rows:
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
if g is None or g == DEFAULT_GRAPH:
if d != DEFAULT_GRAPH:
continue
elif g != GRAPH_WILDCARD and d != g:
continue
results.append(QuadResult(
s=row.s, p=row.p, o=row.o, g=d,
otype=row.otype, dtype=row.dtype, lang=row.lang
))
return results
def get_sp(self, collection, s, p, g=None, limit=10):
"""Query by subject and predicate"""
rows = self.session.execute(self.get_entity_as_s_p_stmt, (collection, s, p, limit))
results = []
for row in rows:
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
if g is None or g == DEFAULT_GRAPH:
if d != DEFAULT_GRAPH:
continue
elif g != GRAPH_WILDCARD and d != g:
continue
results.append(QuadResult(
s=s, p=p, o=row.o, g=d,
otype=row.otype, dtype=row.dtype, lang=row.lang
))
return results
def get_po(self, collection, p, o, g=None, limit=10):
"""Query by predicate and object"""
rows = self.session.execute(self.get_entity_as_o_p_stmt, (collection, o, p, limit))
results = []
for row in rows:
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
if g is None or g == DEFAULT_GRAPH:
if d != DEFAULT_GRAPH:
continue
elif g != GRAPH_WILDCARD and d != g:
continue
results.append(QuadResult(
s=row.s, p=p, o=o, g=d,
otype=row.otype, dtype=row.dtype, lang=row.lang
))
return results
def get_os(self, collection, o, s, g=None, limit=10):
"""Query by object and subject"""
# Use subject partition with role='S', filter by o
rows = self.session.execute(self.get_entity_as_s_stmt, (collection, s, limit))
results = []
for row in rows:
if row.o != o:
continue
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
if g is None or g == DEFAULT_GRAPH:
if d != DEFAULT_GRAPH:
continue
elif g != GRAPH_WILDCARD and d != g:
continue
results.append(QuadResult(
s=s, p=row.p, o=o, g=d,
otype=row.otype, dtype=row.dtype, lang=row.lang
))
return results
def get_spo(self, collection, s, p, o, g=None, limit=10):
"""Query by subject, predicate, object (find which graphs)"""
rows = self.session.execute(self.get_entity_as_s_p_stmt, (collection, s, p, limit))
results = []
for row in rows:
if row.o != o:
continue
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
if g is None or g == DEFAULT_GRAPH:
if d != DEFAULT_GRAPH:
continue
elif g != GRAPH_WILDCARD and d != g:
continue
results.append(QuadResult(
s=s, p=p, o=o, g=d,
otype=row.otype, dtype=row.dtype, lang=row.lang
))
return results
def get_g(self, collection, g, limit=50):
"""Get all quads in a specific graph"""
if g is None:
g = DEFAULT_GRAPH
return self.session.execute(self.get_collection_by_graph_stmt, (collection, g, limit))
# ========================================================================
# Collection management
# ========================================================================
def collection_exists(self, collection):
"""Check if collection exists"""
try:
result = self.session.execute(
f"SELECT collection FROM {self.collection_metadata_table} WHERE collection = %s LIMIT 1",
(collection,)
)
return bool(list(result))
except Exception as e:
logger.error(f"Error checking collection existence: {e}")
return False
def create_collection(self, collection):
"""Create collection by inserting metadata row"""
try:
import datetime
self.session.execute(
f"INSERT INTO {self.collection_metadata_table} (collection, created_at) VALUES (%s, %s)",
(collection, datetime.datetime.now())
)
logger.info(f"Created collection metadata for {collection}")
except Exception as e:
logger.error(f"Error creating collection: {e}")
raise e
def delete_collection(self, collection):
"""
Delete all quads for a collection from both tables.
Uses efficient partition-level deletes:
1. Read quads from quads_by_collection to get all quads
2. Extract unique entities (s, p, o for URIs, g for non-default)
3. Delete entire entity partitions
4. Delete collection rows
"""
# Read all quads from collection table
rows = self.session.execute(
f"SELECT d, s, p, o, otype FROM {self.collection_table} WHERE collection = %s",
(collection,)
)
# Collect unique entities and quad data for deletion
entities = set()
quads = []
for row in rows:
d, s, p, o, otype = row.d, row.s, row.p, row.o, row.otype
quads.append((d, s, p, o))
# Subject and predicate are always entities
entities.add(s)
entities.add(p)
# Object is an entity only for URIs
if otype == 'u' or otype == 't':
entities.add(o)
# Graph is an entity for non-default graphs
if d != DEFAULT_GRAPH:
entities.add(d)
# Delete entity partitions (efficient partition-level deletes)
batch = BatchStatement()
count = 0
for entity in entities:
batch.add(self.delete_entity_partition_stmt, (collection, entity))
count += 1
# Execute batch every 50 entities
if count % 50 == 0:
self.session.execute(batch)
batch = BatchStatement()
# Execute remaining entity deletes
if count % 50 != 0:
self.session.execute(batch)
# Delete collection rows
batch = BatchStatement()
count = 0
for d, s, p, o in quads:
batch.add(self.delete_collection_row_stmt, (collection, d, s, p, o))
count += 1
# Execute batch every 50 quads
if count % 50 == 0:
self.session.execute(batch)
batch = BatchStatement()
# Execute remaining collection row deletes
if count % 50 != 0:
self.session.execute(batch)
# Delete collection metadata
self.session.execute(
f"DELETE FROM {self.collection_metadata_table} WHERE collection = %s",
(collection,)
)
logger.info(f"Deleted collection {collection}: {len(entities)} entity partitions, {len(quads)} quads")
def close(self):
"""Close connections"""
if hasattr(self, 'session') and self.session:
self.session.shutdown()
if hasattr(self, 'cluster') and self.cluster:
self.cluster.shutdown()
if self.cluster in _active_clusters:
_active_clusters.remove(self.cluster)
class QuadResult:
"""
Result object for quad queries, including object type metadata.
Attributes:
s: Subject value
p: Predicate value
o: Object value
g: Graph/dataset value
otype: Object type - 'u' (URI), 'l' (literal), 't' (triple)
dtype: XSD datatype (for literals)
lang: Language tag (for literals)
"""
def __init__(self, s, p, o, g, otype='u', dtype='', lang=''):
self.s = s
self.p = p
self.o = o
self.g = g
self.otype = otype
self.dtype = dtype
self.lang = lang

View file

@ -6,7 +6,9 @@ null. Output is a list of quads.
import logging
from .... direct.cassandra_kg import KnowledgeGraph, GRAPH_WILDCARD, DEFAULT_GRAPH
from .... direct.cassandra_kg import (
EntityCentricKnowledgeGraph, GRAPH_WILDCARD, DEFAULT_GRAPH
)
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
from .... schema import Term, Triple, IRI, LITERAL
from .... base import TriplesQueryService
@ -31,8 +33,37 @@ def get_term_value(term):
return term.id or term.value
def create_term(value):
"""Create a Term from a string value"""
def create_term(value, otype=None, dtype=None, lang=None):
"""
Create a Term from a string value, optionally using type metadata.
Args:
value: The string value
otype: Object type - 'u' (URI), 'l' (literal), 't' (triple)
dtype: XSD datatype (for literals)
lang: Language tag (for literals)
If otype is provided, uses it to determine Term type.
Otherwise falls back to URL detection heuristic.
"""
if otype is not None:
if otype == 'u':
return Term(type=IRI, iri=value)
elif otype == 'l':
return Term(
type=LITERAL,
value=value,
datatype=dtype or "",
language=lang or ""
)
elif otype == 't':
# Triple/reification - treat as IRI for now
return Term(type=IRI, iri=value)
else:
# Unknown otype, fall back to heuristic
pass
# Heuristic fallback for backwards compatibility
if value.startswith("http://") or value.startswith("https://"):
return Term(type=IRI, iri=value)
else:
@ -74,14 +105,17 @@ class Processor(TriplesQueryService):
user = query.user
if user != self.table:
# Use factory function to select implementation
KGClass = EntityCentricKnowledgeGraph
if self.cassandra_username and self.cassandra_password:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=query.user,
username=self.cassandra_username, password=self.cassandra_password
)
else:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=query.user,
)
@ -93,6 +127,14 @@ class Processor(TriplesQueryService):
o_val = get_term_value(query.o)
g_val = query.g # Already a string or None
# Helper to extract object metadata from result row
def get_o_metadata(t):
"""Extract otype/dtype/lang from result row if available"""
otype = getattr(t, 'otype', None)
dtype = getattr(t, 'dtype', None)
lang = getattr(t, 'lang', None)
return otype, dtype, lang
quads = []
# Route to appropriate query method based on which fields are specified
@ -106,7 +148,8 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, p_val, o_val, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((s_val, p_val, o_val, g, otype, dtype, lang))
else:
# SP specified
resp = self.tg.get_sp(
@ -115,7 +158,8 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, p_val, t.o, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((s_val, p_val, t.o, g, otype, dtype, lang))
else:
if o_val is not None:
# SO specified
@ -125,7 +169,8 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, t.p, o_val, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((s_val, t.p, o_val, g, otype, dtype, lang))
else:
# S only
resp = self.tg.get_s(
@ -134,7 +179,8 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((s_val, t.p, t.o, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((s_val, t.p, t.o, g, otype, dtype, lang))
else:
if p_val is not None:
if o_val is not None:
@ -145,7 +191,8 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, p_val, o_val, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((t.s, p_val, o_val, g, otype, dtype, lang))
else:
# P only
resp = self.tg.get_p(
@ -154,7 +201,8 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, p_val, t.o, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((t.s, p_val, t.o, g, otype, dtype, lang))
else:
if o_val is not None:
# O only
@ -164,7 +212,8 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, t.p, o_val, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((t.s, t.p, o_val, g, otype, dtype, lang))
else:
# Nothing specified - get all
resp = self.tg.get_all(
@ -173,14 +222,16 @@ class Processor(TriplesQueryService):
)
for t in resp:
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
quads.append((t.s, t.p, t.o, g))
otype, dtype, lang = get_o_metadata(t)
quads.append((t.s, t.p, t.o, g, otype, dtype, lang))
# Convert to Triple objects (with g field)
# Use otype/dtype/lang for proper Term reconstruction if available
triples = [
Triple(
s=create_term(q[0]),
p=create_term(q[1]),
o=create_term(q[2]),
o=create_term(q[2], otype=q[4], dtype=q[5], lang=q[6]),
g=q[3] if q[3] != DEFAULT_GRAPH else None
)
for q in quads

View file

@ -10,12 +10,14 @@ import argparse
import time
import logging
from .... direct.cassandra_kg import KnowledgeGraph, DEFAULT_GRAPH
from .... direct.cassandra_kg import (
EntityCentricKnowledgeGraph, DEFAULT_GRAPH
)
from .... base import TriplesStoreService, CollectionConfigHandler
from .... base import AsyncProcessor, Consumer, Producer
from .... base import ConsumerMetrics, ProducerMetrics
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
from .... schema import IRI, LITERAL
from .... schema import IRI, LITERAL, BLANK, TRIPLE
# Module logger
logger = logging.getLogger(__name__)
@ -36,6 +38,46 @@ def get_term_value(term):
return term.id or term.value
def get_term_otype(term):
"""
Get object type code from a Term for entity-centric storage.
Maps Term.type to otype:
- IRI ("i") "u" (URI)
- BLANK ("b") "u" (treated as URI)
- LITERAL ("l") "l" (Literal)
- TRIPLE ("t") "t" (Triple/reification)
"""
if term is None:
return "u"
if term.type == IRI or term.type == BLANK:
return "u"
elif term.type == LITERAL:
return "l"
elif term.type == TRIPLE:
return "t"
else:
return "u"
def get_term_dtype(term):
"""Extract datatype from a Term (for literals)"""
if term is None:
return ""
if term.type == LITERAL:
return term.datatype or ""
return ""
def get_term_lang(term):
"""Extract language tag from a Term (for literals)"""
if term is None:
return ""
if term.type == LITERAL:
return term.language or ""
return ""
class Processor(CollectionConfigHandler, TriplesStoreService):
def __init__(self, **params):
@ -78,15 +120,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
self.tg = None
# Use factory function to select implementation
KGClass = EntityCentricKnowledgeGraph
try:
if self.cassandra_username and self.cassandra_password:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=message.metadata.user,
username=self.cassandra_username, password=self.cassandra_password
)
else:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=message.metadata.user,
)
@ -105,12 +150,20 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
# t.g is None for default graph, or a graph IRI
g_val = t.g if t.g is not None else DEFAULT_GRAPH
# Extract object type metadata for entity-centric storage
otype = get_term_otype(t.o)
dtype = get_term_dtype(t.o)
lang = get_term_lang(t.o)
self.tg.insert(
message.metadata.collection,
s_val,
p_val,
o_val,
g=g_val
g=g_val,
otype=otype,
dtype=dtype,
lang=lang
)
async def create_collection(self, user: str, collection: str, metadata: dict):
@ -120,16 +173,19 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
if self.table is None or self.table != user:
self.tg = None
# Use factory function to select implementation
KGClass = EntityCentricKnowledgeGraph
try:
if self.cassandra_username and self.cassandra_password:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=user,
username=self.cassandra_username,
password=self.cassandra_password
)
else:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=user,
)
@ -159,16 +215,19 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
if self.table is None or self.table != user:
self.tg = None
# Use factory function to select implementation
KGClass = EntityCentricKnowledgeGraph
try:
if self.cassandra_username and self.cassandra_password:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=user,
username=self.cassandra_username,
password=self.cassandra_password
)
else:
self.tg = KnowledgeGraph(
self.tg = KGClass(
hosts=self.cassandra_host,
keyspace=user,
)