mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Fix Cassandra schema and graph filter semantics (#680)
Schema fix (dtype/lang clustering key): - Add dtype and lang to PRIMARY KEY in quads_by_entity table - Add otype, dtype, lang to PRIMARY KEY in quads_by_collection table - Fixes deduplication bug where literals with same value but different datatype or language tag were collapsed (e.g., "thing" vs "thing"@en) - Update delete_collection to pass new clustering columns - Update tech spec to reflect new schema Graph filter semantics (simplified, no wildcard constant): - g=None means all graphs (no filter) - g="" means default graph only - g="uri" means specific named graph - Remove GRAPH_WILDCARD usage from EntityCentricKnowledgeGraph - Fix service.py streaming and non-streaming paths - Fix CLI to preserve empty string for -g '' argument
This commit is contained in:
parent
c951562189
commit
84941ce645
5 changed files with 102 additions and 65 deletions
|
|
@ -42,7 +42,7 @@ CREATE TABLE quads_by_entity (
|
||||||
d text, -- Dataset/graph of the quad
|
d text, -- Dataset/graph of the quad
|
||||||
dtype text, -- XSD datatype (when otype = 'L'), e.g. 'xsd:string'
|
dtype text, -- XSD datatype (when otype = 'L'), e.g. 'xsd:string'
|
||||||
lang text, -- Language tag (when otype = 'L'), e.g. 'en', 'fr'
|
lang text, -- Language tag (when otype = 'L'), e.g. 'en', 'fr'
|
||||||
PRIMARY KEY ((collection, entity), role, p, otype, s, o, d)
|
PRIMARY KEY ((collection, entity), role, p, otype, s, o, d, dtype, lang)
|
||||||
);
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -54,6 +54,7 @@ CREATE TABLE quads_by_entity (
|
||||||
2. **p** — next most common filter, "give me all `knows` relationships"
|
2. **p** — next most common filter, "give me all `knows` relationships"
|
||||||
3. **otype** — enables filtering by URI-valued vs literal-valued relationships
|
3. **otype** — enables filtering by URI-valued vs literal-valued relationships
|
||||||
4. **s, o, d** — remaining columns for uniqueness
|
4. **s, o, d** — remaining columns for uniqueness
|
||||||
|
5. **dtype, lang** — distinguish literals with same value but different type metadata (e.g., `"thing"` vs `"thing"@en` vs `"thing"^^xsd:string`)
|
||||||
|
|
||||||
### Table 2: quads_by_collection
|
### Table 2: quads_by_collection
|
||||||
|
|
||||||
|
|
@ -69,11 +70,11 @@ CREATE TABLE quads_by_collection (
|
||||||
otype text, -- 'U' (URI), 'L' (literal), 'T' (triple/reification)
|
otype text, -- 'U' (URI), 'L' (literal), 'T' (triple/reification)
|
||||||
dtype text, -- XSD datatype (when otype = 'L')
|
dtype text, -- XSD datatype (when otype = 'L')
|
||||||
lang text, -- Language tag (when otype = 'L')
|
lang text, -- Language tag (when otype = 'L')
|
||||||
PRIMARY KEY (collection, d, s, p, o)
|
PRIMARY KEY (collection, d, s, p, o, otype, dtype, lang)
|
||||||
);
|
);
|
||||||
```
|
```
|
||||||
|
|
||||||
Clustered by dataset first, enabling deletion at either collection or dataset granularity.
|
Clustered by dataset first, enabling deletion at either collection or dataset granularity. The `otype`, `dtype`, and `lang` columns are included in the clustering key to distinguish literals with the same value but different type metadata — in RDF, `"thing"`, `"thing"@en`, and `"thing"^^xsd:string` are semantically distinct values.
|
||||||
|
|
||||||
## Write Path
|
## Write Path
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -305,9 +305,8 @@ class TestEntityCentricKnowledgeGraph:
|
||||||
|
|
||||||
mock_session.execute.assert_called()
|
mock_session.execute.assert_called()
|
||||||
|
|
||||||
def test_graph_wildcard_returns_all_graphs(self, entity_kg):
|
def test_graph_none_returns_all_graphs(self, entity_kg):
|
||||||
"""Test that g='*' returns quads from all graphs"""
|
"""Test that g=None returns quads from all graphs"""
|
||||||
from trustgraph.direct.cassandra_kg import GRAPH_WILDCARD
|
|
||||||
kg, mock_session = entity_kg
|
kg, mock_session = entity_kg
|
||||||
|
|
||||||
mock_result = [
|
mock_result = [
|
||||||
|
|
@ -320,7 +319,7 @@ class TestEntityCentricKnowledgeGraph:
|
||||||
]
|
]
|
||||||
mock_session.execute.return_value = mock_result
|
mock_session.execute.return_value = mock_result
|
||||||
|
|
||||||
results = kg.get_s('test_collection', 'http://example.org/Alice', g=GRAPH_WILDCARD)
|
results = kg.get_s('test_collection', 'http://example.org/Alice', g=None)
|
||||||
|
|
||||||
# Should return quads from both graphs
|
# Should return quads from both graphs
|
||||||
assert len(results) == 2
|
assert len(results) == 2
|
||||||
|
|
|
||||||
|
|
@ -186,6 +186,12 @@ def build_quoted_triple_term(qt_subject, qt_subject_type,
|
||||||
def format_term(term_dict):
|
def format_term(term_dict):
|
||||||
"""Format a term dict for display in space/pipe output formats.
|
"""Format a term dict for display in space/pipe output formats.
|
||||||
|
|
||||||
|
Handles multiple wire format styles:
|
||||||
|
- Short form (send): {"t": "i", "i": "..."}, {"t": "l", "v": "..."}
|
||||||
|
- Long form (receive): {"type": "i", "iri": "..."}, {"type": "l", "value": "..."}
|
||||||
|
- Raw quoted triple: {"s": {...}, "p": {...}, "o": {...}} (no type wrapper)
|
||||||
|
- Stringified quoted triple in IRI: {"t": "i", "i": "{\"s\":...}"} (backend quirk)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
term_dict: Wire-format term dict
|
term_dict: Wire-format term dict
|
||||||
|
|
||||||
|
|
@ -195,25 +201,53 @@ def format_term(term_dict):
|
||||||
if not term_dict:
|
if not term_dict:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
t = term_dict.get("t")
|
# Get type - handle both short and long form
|
||||||
|
t = term_dict.get("t") or term_dict.get("type")
|
||||||
|
|
||||||
if t == "i":
|
if t == "i":
|
||||||
return term_dict.get("i", "")
|
# IRI - handle both "i" and "iri" keys
|
||||||
|
iri_value = term_dict.get("i") or term_dict.get("iri", "")
|
||||||
|
# Check if IRI value is actually a stringified quoted triple (backend quirk)
|
||||||
|
if iri_value.startswith('{"s":') or iri_value.startswith("{\"s\":"):
|
||||||
|
try:
|
||||||
|
parsed = json.loads(iri_value)
|
||||||
|
if "s" in parsed and "p" in parsed and "o" in parsed:
|
||||||
|
# It's a stringified quoted triple - format it properly
|
||||||
|
s = format_term(parsed.get("s", {}))
|
||||||
|
p = format_term(parsed.get("p", {}))
|
||||||
|
o = format_term(parsed.get("o", {}))
|
||||||
|
return f"<<{s} {p} {o}>>"
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass # Not valid JSON, treat as regular IRI
|
||||||
|
return iri_value
|
||||||
elif t == "l":
|
elif t == "l":
|
||||||
value = term_dict.get("v", "")
|
# Literal - handle both short and long form keys
|
||||||
# Quote literals and show language/datatype if present
|
value = term_dict.get("v") or term_dict.get("value", "")
|
||||||
result = f'"{value}"'
|
result = f'"{value}"'
|
||||||
if "ln" in term_dict:
|
# Language tag
|
||||||
result += f'@{term_dict["ln"]}'
|
lang = term_dict.get("ln") or term_dict.get("language")
|
||||||
elif "dt" in term_dict:
|
if lang:
|
||||||
result += f'^^{term_dict["dt"]}'
|
result += f'@{lang}'
|
||||||
|
else:
|
||||||
|
# Datatype
|
||||||
|
dt = term_dict.get("dt") or term_dict.get("datatype")
|
||||||
|
if dt:
|
||||||
|
result += f'^^{dt}'
|
||||||
return result
|
return result
|
||||||
elif t == "t":
|
elif t == "t":
|
||||||
# Format quoted triple as <<s p o>>
|
# Quoted triple - handle both "tr" and "triple" keys
|
||||||
tr = term_dict.get("tr", {})
|
tr = term_dict.get("tr") or term_dict.get("triple", {})
|
||||||
s = format_term(tr.get("s", {}))
|
s = format_term(tr.get("s", {}))
|
||||||
p = format_term(tr.get("p", {}))
|
p = format_term(tr.get("p", {}))
|
||||||
o = format_term(tr.get("o", {}))
|
o = format_term(tr.get("o", {}))
|
||||||
return f"<<{s} {p} {o}>>"
|
return f"<<{s} {p} {o}>>"
|
||||||
|
elif t is None and "s" in term_dict and "p" in term_dict and "o" in term_dict:
|
||||||
|
# Raw quoted triple without type wrapper (has s, p, o keys directly)
|
||||||
|
s = format_term(term_dict.get("s", {}))
|
||||||
|
p = format_term(term_dict.get("p", {}))
|
||||||
|
o = format_term(term_dict.get("o", {}))
|
||||||
|
return f"<<{s} {p} {o}>>"
|
||||||
|
|
||||||
return str(term_dict)
|
return str(term_dict)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -526,8 +560,9 @@ def main():
|
||||||
else:
|
else:
|
||||||
obj_term = None
|
obj_term = None
|
||||||
|
|
||||||
# Graph is always an IRI
|
# Graph is a plain IRI string, not a Term
|
||||||
graph_term = build_term(args.graph, term_type='iri') if args.graph else None
|
# None = all graphs, "" = default graph only, "uri" = specific graph
|
||||||
|
graph_value = args.graph
|
||||||
|
|
||||||
query_graph(
|
query_graph(
|
||||||
url=args.api_url,
|
url=args.api_url,
|
||||||
|
|
@ -539,7 +574,7 @@ def main():
|
||||||
subject=subject_term,
|
subject=subject_term,
|
||||||
predicate=predicate_term,
|
predicate=predicate_term,
|
||||||
obj=obj_term,
|
obj=obj_term,
|
||||||
graph=graph_term,
|
graph=graph_value,
|
||||||
output_format=args.format,
|
output_format=args.format,
|
||||||
headers=args.headers,
|
headers=args.headers,
|
||||||
token=args.token,
|
token=args.token,
|
||||||
|
|
|
||||||
|
|
@ -589,6 +589,8 @@ class EntityCentricKnowledgeGraph:
|
||||||
|
|
||||||
# quads_by_entity: primary data table
|
# quads_by_entity: primary data table
|
||||||
# Every entity has a partition containing all quads it participates in
|
# Every entity has a partition containing all quads it participates in
|
||||||
|
# Clustering key includes dtype/lang to distinguish literals with same value
|
||||||
|
# but different datatype or language tag (e.g., "thing" vs "thing"@en)
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
CREATE TABLE IF NOT EXISTS {self.entity_table} (
|
CREATE TABLE IF NOT EXISTS {self.entity_table} (
|
||||||
collection text,
|
collection text,
|
||||||
|
|
@ -601,11 +603,13 @@ class EntityCentricKnowledgeGraph:
|
||||||
d text,
|
d text,
|
||||||
dtype text,
|
dtype text,
|
||||||
lang text,
|
lang text,
|
||||||
PRIMARY KEY ((collection, entity), role, p, otype, s, o, d)
|
PRIMARY KEY ((collection, entity), role, p, otype, s, o, d, dtype, lang)
|
||||||
);
|
);
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# quads_by_collection: manifest for collection-level queries and deletion
|
# quads_by_collection: manifest for collection-level queries and deletion
|
||||||
|
# Clustering key includes otype/dtype/lang to distinguish literals with same
|
||||||
|
# value but different metadata (e.g., "thing" vs "thing"@en vs "thing"^^xsd:string)
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
CREATE TABLE IF NOT EXISTS {self.collection_table} (
|
CREATE TABLE IF NOT EXISTS {self.collection_table} (
|
||||||
collection text,
|
collection text,
|
||||||
|
|
@ -616,7 +620,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
otype text,
|
otype text,
|
||||||
dtype text,
|
dtype text,
|
||||||
lang text,
|
lang text,
|
||||||
PRIMARY KEY (collection, d, s, p, o)
|
PRIMARY KEY (collection, d, s, p, o, otype, dtype, lang)
|
||||||
);
|
);
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
@ -718,7 +722,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
)
|
)
|
||||||
|
|
||||||
self.delete_collection_row_stmt = self.session.prepare(
|
self.delete_collection_row_stmt = self.session.prepare(
|
||||||
f"DELETE FROM {self.collection_table} WHERE collection = ? AND d = ? AND s = ? AND p = ? AND o = ?"
|
f"DELETE FROM {self.collection_table} WHERE collection = ? AND d = ? AND s = ? AND p = ? AND o = ? AND otype = ? AND dtype = ? AND lang = ?"
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Prepared statements initialized for entity-centric schema")
|
logger.info("Prepared statements initialized for entity-centric schema")
|
||||||
|
|
@ -797,7 +801,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
def get_s(self, collection, s, g=None, limit=10):
|
def get_s(self, collection, s, g=None, limit=10):
|
||||||
"""
|
"""
|
||||||
Query by subject. Returns quads where s is the subject.
|
Query by subject. Returns quads where s is the subject.
|
||||||
g=None: default graph, g='*': all graphs
|
g=None: all graphs, g='': default graph only, g='uri': specific graph
|
||||||
"""
|
"""
|
||||||
rows = self.session.execute(self.get_entity_as_s_stmt, (collection, s, limit))
|
rows = self.session.execute(self.get_entity_as_s_stmt, (collection, s, limit))
|
||||||
|
|
||||||
|
|
@ -805,10 +809,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
for row in rows:
|
for row in rows:
|
||||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
# Filter by graph if specified
|
# Filter by graph if specified
|
||||||
if g is None or g == DEFAULT_GRAPH:
|
if g is not None and d != g:
|
||||||
if d != DEFAULT_GRAPH:
|
|
||||||
continue
|
|
||||||
elif g != GRAPH_WILDCARD and d != g:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(QuadResult(
|
results.append(QuadResult(
|
||||||
|
|
@ -819,16 +820,13 @@ class EntityCentricKnowledgeGraph:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_p(self, collection, p, g=None, limit=10):
|
def get_p(self, collection, p, g=None, limit=10):
|
||||||
"""Query by predicate"""
|
"""Query by predicate. g=None: all graphs, g='': default graph only"""
|
||||||
rows = self.session.execute(self.get_entity_as_p_stmt, (collection, p, limit))
|
rows = self.session.execute(self.get_entity_as_p_stmt, (collection, p, limit))
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
if g is None or g == DEFAULT_GRAPH:
|
if g is not None and d != g:
|
||||||
if d != DEFAULT_GRAPH:
|
|
||||||
continue
|
|
||||||
elif g != GRAPH_WILDCARD and d != g:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(QuadResult(
|
results.append(QuadResult(
|
||||||
|
|
@ -839,16 +837,13 @@ class EntityCentricKnowledgeGraph:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_o(self, collection, o, g=None, limit=10):
|
def get_o(self, collection, o, g=None, limit=10):
|
||||||
"""Query by object"""
|
"""Query by object. g=None: all graphs, g='': default graph only"""
|
||||||
rows = self.session.execute(self.get_entity_as_o_stmt, (collection, o, limit))
|
rows = self.session.execute(self.get_entity_as_o_stmt, (collection, o, limit))
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
if g is None or g == DEFAULT_GRAPH:
|
if g is not None and d != g:
|
||||||
if d != DEFAULT_GRAPH:
|
|
||||||
continue
|
|
||||||
elif g != GRAPH_WILDCARD and d != g:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(QuadResult(
|
results.append(QuadResult(
|
||||||
|
|
@ -859,16 +854,13 @@ class EntityCentricKnowledgeGraph:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_sp(self, collection, s, p, g=None, limit=10):
|
def get_sp(self, collection, s, p, g=None, limit=10):
|
||||||
"""Query by subject and predicate"""
|
"""Query by subject and predicate. g=None: all graphs, g='': default graph only"""
|
||||||
rows = self.session.execute(self.get_entity_as_s_p_stmt, (collection, s, p, limit))
|
rows = self.session.execute(self.get_entity_as_s_p_stmt, (collection, s, p, limit))
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
if g is None or g == DEFAULT_GRAPH:
|
if g is not None and d != g:
|
||||||
if d != DEFAULT_GRAPH:
|
|
||||||
continue
|
|
||||||
elif g != GRAPH_WILDCARD and d != g:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(QuadResult(
|
results.append(QuadResult(
|
||||||
|
|
@ -879,16 +871,13 @@ class EntityCentricKnowledgeGraph:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_po(self, collection, p, o, g=None, limit=10):
|
def get_po(self, collection, p, o, g=None, limit=10):
|
||||||
"""Query by predicate and object"""
|
"""Query by predicate and object. g=None: all graphs, g='': default graph only"""
|
||||||
rows = self.session.execute(self.get_entity_as_o_p_stmt, (collection, o, p, limit))
|
rows = self.session.execute(self.get_entity_as_o_p_stmt, (collection, o, p, limit))
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
if g is None or g == DEFAULT_GRAPH:
|
if g is not None and d != g:
|
||||||
if d != DEFAULT_GRAPH:
|
|
||||||
continue
|
|
||||||
elif g != GRAPH_WILDCARD and d != g:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(QuadResult(
|
results.append(QuadResult(
|
||||||
|
|
@ -899,7 +888,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_os(self, collection, o, s, g=None, limit=10):
|
def get_os(self, collection, o, s, g=None, limit=10):
|
||||||
"""Query by object and subject"""
|
"""Query by object and subject. g=None: all graphs, g='': default graph only"""
|
||||||
# Use subject partition with role='S', filter by o
|
# Use subject partition with role='S', filter by o
|
||||||
rows = self.session.execute(self.get_entity_as_s_stmt, (collection, s, limit))
|
rows = self.session.execute(self.get_entity_as_s_stmt, (collection, s, limit))
|
||||||
|
|
||||||
|
|
@ -909,10 +898,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
if g is None or g == DEFAULT_GRAPH:
|
if g is not None and d != g:
|
||||||
if d != DEFAULT_GRAPH:
|
|
||||||
continue
|
|
||||||
elif g != GRAPH_WILDCARD and d != g:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(QuadResult(
|
results.append(QuadResult(
|
||||||
|
|
@ -923,7 +909,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
def get_spo(self, collection, s, p, o, g=None, limit=10):
|
def get_spo(self, collection, s, p, o, g=None, limit=10):
|
||||||
"""Query by subject, predicate, object (find which graphs)"""
|
"""Query by subject, predicate, object (find which graphs). g=None: all graphs, g='': default graph only"""
|
||||||
rows = self.session.execute(self.get_entity_as_s_p_stmt, (collection, s, p, limit))
|
rows = self.session.execute(self.get_entity_as_s_p_stmt, (collection, s, p, limit))
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
|
|
@ -932,10 +918,7 @@ class EntityCentricKnowledgeGraph:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
d = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
if g is None or g == DEFAULT_GRAPH:
|
if g is not None and d != g:
|
||||||
if d != DEFAULT_GRAPH:
|
|
||||||
continue
|
|
||||||
elif g != GRAPH_WILDCARD and d != g:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
results.append(QuadResult(
|
results.append(QuadResult(
|
||||||
|
|
@ -991,9 +974,9 @@ class EntityCentricKnowledgeGraph:
|
||||||
3. Delete entire entity partitions
|
3. Delete entire entity partitions
|
||||||
4. Delete collection rows
|
4. Delete collection rows
|
||||||
"""
|
"""
|
||||||
# Read all quads from collection table
|
# Read all quads from collection table (including type metadata for delete)
|
||||||
rows = self.session.execute(
|
rows = self.session.execute(
|
||||||
f"SELECT d, s, p, o, otype FROM {self.collection_table} WHERE collection = %s",
|
f"SELECT d, s, p, o, otype, dtype, lang FROM {self.collection_table} WHERE collection = %s",
|
||||||
(collection,)
|
(collection,)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1002,8 +985,11 @@ class EntityCentricKnowledgeGraph:
|
||||||
quads = []
|
quads = []
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
d, s, p, o, otype = row.d, row.s, row.p, row.o, row.otype
|
d, s, p, o = row.d, row.s, row.p, row.o
|
||||||
quads.append((d, s, p, o))
|
otype = row.otype
|
||||||
|
dtype = row.dtype if hasattr(row, 'dtype') else ''
|
||||||
|
lang = row.lang if hasattr(row, 'lang') else ''
|
||||||
|
quads.append((d, s, p, o, otype, dtype, lang))
|
||||||
|
|
||||||
# Subject and predicate are always entities
|
# Subject and predicate are always entities
|
||||||
entities.add(s)
|
entities.add(s)
|
||||||
|
|
@ -1038,8 +1024,8 @@ class EntityCentricKnowledgeGraph:
|
||||||
batch = BatchStatement()
|
batch = BatchStatement()
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for d, s, p, o in quads:
|
for d, s, p, o, otype, dtype, lang in quads:
|
||||||
batch.add(self.delete_collection_row_stmt, (collection, d, s, p, o))
|
batch.add(self.delete_collection_row_stmt, (collection, d, s, p, o, otype, dtype, lang))
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
# Execute batch every 50 quads
|
# Execute batch every 50 quads
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import json
|
||||||
from cassandra.query import SimpleStatement
|
from cassandra.query import SimpleStatement
|
||||||
|
|
||||||
from .... direct.cassandra_kg import (
|
from .... direct.cassandra_kg import (
|
||||||
EntityCentricKnowledgeGraph, GRAPH_WILDCARD, DEFAULT_GRAPH
|
EntityCentricKnowledgeGraph, DEFAULT_GRAPH
|
||||||
)
|
)
|
||||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||||
from .... schema import Term, Triple, IRI, LITERAL, TRIPLE, BLANK
|
from .... schema import Term, Triple, IRI, LITERAL, TRIPLE, BLANK
|
||||||
|
|
@ -304,6 +304,13 @@ class Processor(TriplesQueryService):
|
||||||
for t in resp:
|
for t in resp:
|
||||||
# Note: quads_by_collection uses 'd' for graph field
|
# Note: quads_by_collection uses 'd' for graph field
|
||||||
g = t.d if hasattr(t, 'd') else DEFAULT_GRAPH
|
g = t.d if hasattr(t, 'd') else DEFAULT_GRAPH
|
||||||
|
# Filter by graph
|
||||||
|
# g_val=None means all graphs (no filter)
|
||||||
|
# g_val="" means default graph only
|
||||||
|
# otherwise filter to specific named graph
|
||||||
|
if g_val is not None:
|
||||||
|
if g != g_val:
|
||||||
|
continue
|
||||||
term_type, datatype, language = get_object_metadata(t)
|
term_type, datatype, language = get_object_metadata(t)
|
||||||
quads.append((t.s, t.p, t.o, g, term_type, datatype, language))
|
quads.append((t.s, t.p, t.o, g, term_type, datatype, language))
|
||||||
|
|
||||||
|
|
@ -379,6 +386,15 @@ class Processor(TriplesQueryService):
|
||||||
break
|
break
|
||||||
|
|
||||||
g = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
g = row.d if hasattr(row, 'd') else DEFAULT_GRAPH
|
||||||
|
|
||||||
|
# Filter by graph
|
||||||
|
# g_val=None means all graphs (no filter)
|
||||||
|
# g_val="" means default graph only
|
||||||
|
# otherwise filter to specific named graph
|
||||||
|
if g_val is not None:
|
||||||
|
if g != g_val:
|
||||||
|
continue
|
||||||
|
|
||||||
term_type, datatype, language = get_object_metadata(row)
|
term_type, datatype, language = get_object_metadata(row)
|
||||||
|
|
||||||
# s and p are always IRIs in RDF
|
# s and p are always IRIs in RDF
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue