Fix Cassandra schema and graph filter semantics (#680)

Schema fix (dtype/lang clustering key): - Add dtype and lang to PRIMARY KEY in quads_by_entity table - Add otype, dtype, lang to PRIMARY KEY in quads_by_collection table - Fixes deduplication bug where literals with same value but different datatype or language tag were collapsed (e.g., "thing" vs "thing"@en) - Update delete_collection to pass new clustering columns - Update tech spec to reflect new schema Graph filter semantics (simplified, no wildcard constant): - g=None means all graphs (no filter) - g="" means default graph only - g="uri" means specific named graph - Remove GRAPH_WILDCARD usage from EntityCentricKnowledgeGraph - Fix service.py streaming and non-streaming paths - Fix CLI to preserve empty string for -g '' argument
2026-06-14 09:15:13 +02:00 · 2026-03-10 12:52:51 +00:00 · 2026-03-10 12:52:51 +00:00 · 84941ce645
commit 84941ce645
parent c951562189
5 changed files with 102 additions and 65 deletions
--- a/trustgraph-cli/trustgraph/cli/query_graph.py
+++ b/trustgraph-cli/trustgraph/cli/query_graph.py
@ -186,6 +186,12 @@ def build_quoted_triple_term(qt_subject, qt_subject_type,
 def format_term(term_dict):
    """Format a term dict for display in space/pipe output formats.

+    Handles multiple wire format styles:
+    - Short form (send): {"t": "i", "i": "..."}, {"t": "l", "v": "..."}
+    - Long form (receive): {"type": "i", "iri": "..."}, {"type": "l", "value": "..."}
+    - Raw quoted triple: {"s": {...}, "p": {...}, "o": {...}} (no type wrapper)
+    - Stringified quoted triple in IRI: {"t": "i", "i": "{\"s\":...}"} (backend quirk)
+
    Args:
        term_dict: Wire-format term dict

@ -195,25 +201,53 @@ def format_term(term_dict):
    if not term_dict:
        return ""

-    t = term_dict.get("t")
+    # Get type - handle both short and long form
+    t = term_dict.get("t") or term_dict.get("type")
+
    if t == "i":
-        return term_dict.get("i", "")
+        # IRI - handle both "i" and "iri" keys
+        iri_value = term_dict.get("i") or term_dict.get("iri", "")
+        # Check if IRI value is actually a stringified quoted triple (backend quirk)
+        if iri_value.startswith('{"s":') or iri_value.startswith("{\"s\":"):
+            try:
+                parsed = json.loads(iri_value)
+                if "s" in parsed and "p" in parsed and "o" in parsed:
+                    # It's a stringified quoted triple - format it properly
+                    s = format_term(parsed.get("s", {}))
+                    p = format_term(parsed.get("p", {}))
+                    o = format_term(parsed.get("o", {}))
+                    return f"<<{s} {p} {o}>>"
+            except json.JSONDecodeError:
+                pass  # Not valid JSON, treat as regular IRI
+        return iri_value
    elif t == "l":
-        value = term_dict.get("v", "")
-        # Quote literals and show language/datatype if present
+        # Literal - handle both short and long form keys
+        value = term_dict.get("v") or term_dict.get("value", "")
        result = f'"{value}"'
-        if "ln" in term_dict:
-            result += f'@{term_dict["ln"]}'
-        elif "dt" in term_dict:
-            result += f'^^{term_dict["dt"]}'
+        # Language tag
+        lang = term_dict.get("ln") or term_dict.get("language")
+        if lang:
+            result += f'@{lang}'
+        else:
+            # Datatype
+            dt = term_dict.get("dt") or term_dict.get("datatype")
+            if dt:
+                result += f'^^{dt}'
        return result
    elif t == "t":
-        # Format quoted triple as <<s p o>>
-        tr = term_dict.get("tr", {})
+        # Quoted triple - handle both "tr" and "triple" keys
+        tr = term_dict.get("tr") or term_dict.get("triple", {})
        s = format_term(tr.get("s", {}))
        p = format_term(tr.get("p", {}))
        o = format_term(tr.get("o", {}))
        return f"<<{s} {p} {o}>>"
+    elif t is None and "s" in term_dict and "p" in term_dict and "o" in term_dict:
+        # Raw quoted triple without type wrapper (has s, p, o keys directly)
+        s = format_term(term_dict.get("s", {}))
+        p = format_term(term_dict.get("p", {}))
+        o = format_term(term_dict.get("o", {}))
+        return f"<<{s} {p} {o}>>"
+
    return str(term_dict)


@ -526,8 +560,9 @@ def main():
        else:
            obj_term = None

-        # Graph is always an IRI
-        graph_term = build_term(args.graph, term_type='iri') if args.graph else None
+        # Graph is a plain IRI string, not a Term
+        # None = all graphs, "" = default graph only, "uri" = specific graph
+        graph_value = args.graph

        query_graph(
            url=args.api_url,
@ -539,7 +574,7 @@ def main():
            subject=subject_term,
            predicate=predicate_term,
            obj=obj_term,
-            graph=graph_term,
+            graph=graph_value,
            output_format=args.format,
            headers=args.headers,
            token=args.token,