feat: extend SPARQL evaluator with comprehensive function and operator support (#945)

Add 30+ SPARQL 1.1 built-in functions and the MINUS algebra operator to the custom SPARQL query backend. String functions: - SUBSTR (2-arg and 3-arg forms), STRBEFORE, STRAFTER - REPLACE (regex with flags), ENCODE_FOR_URI Numeric functions: - FLOOR, CEIL, ROUND, ABS Date/time accessors: - YEAR, MONTH, DAY, HOURS, MINUTES, SECONDS - NOW, TZ Hash functions: - MD5, SHA1, SHA256, SHA512 Term constructors: - IRI/URI, BNODE, UUID, STRUUID Other functions: - LANGMATCHES, RAND - EXISTS / NOT EXISTS (with async pre-evaluation to bridge the sync expression evaluator and async algebra evaluator) Algebra: - MINUS set-difference operator - HAVING already works via rdflib's Filter mapping (verified) Fix SPARQL ORDER handling Includes 653 lines of new unit tests covering all added functionality across expressions, solutions, and algebra layers.
2026-05-27 16:25:12 +02:00 · 2026-05-21 10:50:11 +01:00 · 2026-05-21 10:50:11 +01:00 · 2c3a699af3
commit 2c3a699af3
parent e57f4669e1
6 changed files with 1021 additions and 29 deletions
--- a/tests/unit/test_query/test_sparql_algebra.py
+++ b/tests/unit/test_query/test_sparql_algebra.py
@ -84,6 +84,20 @@ def make_distinct(inner):
    return node
 def make_filter(inner, expr):
    node = CompValue("Filter")
    node.p = inner
    node.expr = expr
    return node
 def make_minus(left, right):
    node = CompValue("Minus")
    node.p1 = left
    node.p2 = right
    return node
 class TestQueryPattern:
    """Tests for _query_pattern — the leaf that calls TriplesClient."""
@ -282,6 +296,177 @@ class TestEvaluate:
        assert len(solutions) == 1
    @pytest.mark.asyncio
    async def test_minus_removes_matching(self):
        tc = AsyncMock()
        alice = iri("http://example.com/alice")
        bob = iri("http://example.com/bob")
        knows = iri("http://example.com/knows")
        hates = iri("http://example.com/hates")
        charlie = iri("http://example.com/charlie")
        left_triple = make_triple(alice, knows, bob)
        right_triple1 = make_triple(alice, knows, bob)
        right_triple2 = make_triple(alice, hates, charlie)
        left_bgp = make_bgp(
            (Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
        )
        right_bgp = make_bgp(
            (Variable("s"), URIRef("http://example.com/hates"), Variable("r"))
        )
        async def mock_query(**kwargs):
            pred = kwargs.get("p")
            if pred and pred.iri == "http://example.com/knows":
                return [left_triple]
            elif pred and pred.iri == "http://example.com/hates":
                return [right_triple2]
            return []
        tc.query.side_effect = mock_query
        tree = make_select(
            make_project(
                make_minus(left_bgp, right_bgp),
                ["s", "o"]
            )
        )
        solutions = await evaluate(tree, tc, collection="default")
        # alice knows bob, but alice also hates charlie
        # shared var is "s" (alice), so alice's solution is removed
        assert len(solutions) == 0
    @pytest.mark.asyncio
    async def test_minus_no_shared_vars_preserves_all(self):
        tc = AsyncMock()
        alice = iri("http://example.com/alice")
        bob = iri("http://example.com/bob")
        left_triple = make_triple(alice, iri("http://example.com/p"), bob)
        left_bgp = make_bgp(
            (Variable("s"), URIRef("http://example.com/p"), Variable("o"))
        )
        right_bgp = make_bgp(
            (Variable("x"), URIRef("http://example.com/q"), Variable("y"))
        )
        async def mock_query(**kwargs):
            pred = kwargs.get("p")
            if pred and pred.iri == "http://example.com/p":
                return [left_triple]
            return []
        tc.query.side_effect = mock_query
        tree = make_select(
            make_project(
                make_minus(left_bgp, right_bgp),
                ["s", "o"]
            )
        )
        solutions = await evaluate(tree, tc, collection="default")
        assert len(solutions) == 1
    @pytest.mark.asyncio
    async def test_filter_exists_keeps_matching(self):
        tc = AsyncMock()
        alice = iri("http://example.com/alice")
        bob = iri("http://example.com/bob")
        charlie = iri("http://example.com/charlie")
        left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
        left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
        exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
        left_bgp = make_bgp(
            (Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
        )
        exists_bgp = make_bgp(
            (Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
        )
        async def mock_query(**kwargs):
            pred = kwargs.get("p")
            if pred and pred.iri == "http://example.com/knows":
                return [left_triple1, left_triple2]
            elif pred and pred.iri == "http://example.com/likes":
                return [exists_triple]
            return []
        tc.query.side_effect = mock_query
        exists_expr = CompValue("Builtin_EXISTS")
        exists_expr.graph = exists_bgp
        tree = make_select(
            make_project(
                make_filter(left_bgp, exists_expr),
                ["s", "o"]
            )
        )
        solutions = await evaluate(tree, tc, collection="default")
        # Only bob has a "likes" triple, so only the bob solution passes
        result_objects = [s["o"].iri for s in solutions]
        assert "http://example.com/bob" in result_objects
        assert "http://example.com/charlie" not in result_objects
    @pytest.mark.asyncio
    async def test_filter_not_exists_removes_matching(self):
        tc = AsyncMock()
        alice = iri("http://example.com/alice")
        bob = iri("http://example.com/bob")
        charlie = iri("http://example.com/charlie")
        left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
        left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
        exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
        left_bgp = make_bgp(
            (Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
        )
        exists_bgp = make_bgp(
            (Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
        )
        async def mock_query(**kwargs):
            pred = kwargs.get("p")
            if pred and pred.iri == "http://example.com/knows":
                return [left_triple1, left_triple2]
            elif pred and pred.iri == "http://example.com/likes":
                return [exists_triple]
            return []
        tc.query.side_effect = mock_query
        not_exists_expr = CompValue("Builtin_NOTEXISTS")
        not_exists_expr.graph = exists_bgp
        tree = make_select(
            make_project(
                make_filter(left_bgp, not_exists_expr),
                ["s", "o"]
            )
        )
        solutions = await evaluate(tree, tc, collection="default")
        # bob has a "likes" triple so is removed; charlie stays
        result_objects = [s["o"].iri for s in solutions]
        assert "http://example.com/charlie" in result_objects
        assert "http://example.com/bob" not in result_objects
    @pytest.mark.asyncio
    async def test_unsupported_node_returns_empty_solution(self):
        tc = AsyncMock()
--- a/tests/unit/test_query/test_sparql_expressions.py
+++ b/tests/unit/test_query/test_sparql_expressions.py
@ -300,6 +300,438 @@ class TestBuiltinFunctions:
                                  flags=None)
        assert evaluate_expression(expr, {"x": lit("hello")}) is False
    def test_substr_three_args(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("SUBSTR",
                                  arg=Variable("x"),
                                  start=Literal(1),
                                  length=Literal(4))
        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
        assert result.type == LITERAL
        assert result.value == "2024"
    def test_substr_two_args(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("SUBSTR",
                                  arg=Variable("x"),
                                  start=Literal(6),
                                  length=None)
        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
        assert result.type == LITERAL
        assert result.value == "03-15"
    def test_substr_middle(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("SUBSTR",
                                  arg=Variable("x"),
                                  start=Literal(6),
                                  length=Literal(2))
        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
        assert result.type == LITERAL
        assert result.value == "03"
    def test_substr_null_start(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("SUBSTR",
                                  arg=Variable("x"),
                                  start=Variable("missing"),
                                  length=None)
        result = evaluate_expression(expr, {"x": lit("hello")})
        assert result is None
    def test_year(self):
        from rdflib.term import Variable
        expr = self._make_builtin("YEAR", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
        )
        assert result == 2024
    def test_month(self):
        from rdflib.term import Variable
        expr = self._make_builtin("MONTH", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
        )
        assert result == 3
    def test_day(self):
        from rdflib.term import Variable
        expr = self._make_builtin("DAY", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
        )
        assert result == 15
    def test_hours(self):
        from rdflib.term import Variable
        expr = self._make_builtin("HOURS", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
        )
        assert result == 10
    def test_minutes(self):
        from rdflib.term import Variable
        expr = self._make_builtin("MINUTES", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
        )
        assert result == 30
    def test_seconds(self):
        from rdflib.term import Variable
        expr = self._make_builtin("SECONDS", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
        )
        assert result == 45
    def test_year_from_datetime(self):
        from rdflib.term import Variable
        expr = self._make_builtin("YEAR", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
        )
        assert result == 2024
    def test_hours_from_date_returns_zero(self):
        from rdflib.term import Variable
        expr = self._make_builtin("HOURS", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
        )
        assert result == 0
    def test_year_invalid_date(self):
        from rdflib.term import Variable
        expr = self._make_builtin("YEAR", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("not-a-date")}
        )
        assert result is None
    def test_floor(self):
        from rdflib.term import Variable
        expr = self._make_builtin("FLOOR", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("3.7")}) == 3
    def test_floor_negative(self):
        from rdflib.term import Variable
        expr = self._make_builtin("FLOOR", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("-2.3")}) == -3
    def test_floor_none(self):
        from rdflib.term import Variable
        expr = self._make_builtin("FLOOR", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("abc")}) is None
    def test_ceil(self):
        from rdflib.term import Variable
        expr = self._make_builtin("CEIL", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("3.2")}) == 4
    def test_ceil_negative(self):
        from rdflib.term import Variable
        expr = self._make_builtin("CEIL", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("-2.7")}) == -2
    def test_abs_positive(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ABS", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("42")}) == 42
    def test_abs_negative(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ABS", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("-42")}) == 42
    def test_abs_none(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ABS", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("abc")}) is None
    def test_replace_simple(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("REPLACE",
                                  arg=Variable("x"),
                                  pattern=Literal(" BC"),
                                  replacement=Literal(""),
                                  flags=None)
        result = evaluate_expression(expr, {"x": lit("500 BC")})
        assert result.type == LITERAL
        assert result.value == "500"
    def test_replace_regex(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("REPLACE",
                                  arg=Variable("x"),
                                  pattern=Literal("[0-9]+"),
                                  replacement=Literal("X"),
                                  flags=None)
        result = evaluate_expression(expr, {"x": lit("abc123def456")})
        assert result.value == "abcXdefX"
    def test_replace_case_insensitive(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("REPLACE",
                                  arg=Variable("x"),
                                  pattern=Literal("hello"),
                                  replacement=Literal("world"),
                                  flags=Literal("i"))
        result = evaluate_expression(expr, {"x": lit("HELLO there")})
        assert result.value == "world there"
    def test_round_up(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ROUND", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("3.7")}) == 4
    def test_round_down(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ROUND", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("3.2")}) == 3
    def test_round_none(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ROUND", arg=Variable("x"))
        assert evaluate_expression(expr, {"x": lit("abc")}) is None
    def test_strbefore(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("STRBEFORE",
                                  arg1=Variable("x"), arg2=Literal("-"))
        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
        assert result.value == "2024"
    def test_strbefore_not_found(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("STRBEFORE",
                                  arg1=Variable("x"), arg2=Literal("/"))
        result = evaluate_expression(expr, {"x": lit("hello")})
        assert result.value == ""
    def test_strafter(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("STRAFTER",
                                  arg1=Variable("x"), arg2=Literal("-"))
        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
        assert result.value == "03-15"
    def test_strafter_not_found(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("STRAFTER",
                                  arg1=Variable("x"), arg2=Literal("/"))
        result = evaluate_expression(expr, {"x": lit("hello")})
        assert result.value == ""
    def test_encode_for_uri(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
        result = evaluate_expression(expr, {"x": lit("hello world")})
        assert result.value == "hello%20world"
    def test_encode_for_uri_special_chars(self):
        from rdflib.term import Variable
        expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
        result = evaluate_expression(expr, {"x": lit("a/b?c=d&e")})
        assert result.value == "a%2Fb%3Fc%3Dd%26e"
    def test_langmatches_basic(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("LANGMATCHES",
                                  arg1=Literal("en"), arg2=Literal("en"))
        assert evaluate_expression(expr, {}) is True
    def test_langmatches_subtag(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("LANGMATCHES",
                                  arg1=Literal("en-US"), arg2=Literal("en"))
        assert evaluate_expression(expr, {}) is True
    def test_langmatches_wildcard(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("LANGMATCHES",
                                  arg1=Literal("fr"), arg2=Literal("*"))
        assert evaluate_expression(expr, {}) is True
    def test_langmatches_wildcard_empty(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("LANGMATCHES",
                                  arg1=Literal(""), arg2=Literal("*"))
        assert evaluate_expression(expr, {}) is False
    def test_langmatches_no_match(self):
        from rdflib.term import Variable
        from rdflib import Literal
        expr = self._make_builtin("LANGMATCHES",
                                  arg1=Literal("fr"), arg2=Literal("en"))
        assert evaluate_expression(expr, {}) is False
    def test_iri_constructor(self):
        from rdflib.term import Variable
        expr = self._make_builtin("IRI", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("http://example.com/test")}
        )
        assert result.type == IRI
        assert result.iri == "http://example.com/test"
    def test_uri_constructor(self):
        from rdflib.term import Variable
        expr = self._make_builtin("URI", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("http://example.com/test")}
        )
        assert result.type == IRI
        assert result.iri == "http://example.com/test"
    def test_bnode_no_arg(self):
        expr = self._make_builtin("BNODE")
        result = evaluate_expression(expr, {})
        assert result.type == BLANK
        assert len(result.id) > 0
    def test_bnode_with_label(self):
        from rdflib import Literal
        expr = self._make_builtin("BNODE", arg=Literal("mynode"))
        result = evaluate_expression(expr, {})
        assert result.type == BLANK
        assert result.id == "mynode"
    def test_now(self):
        import re as re_mod
        expr = self._make_builtin("NOW")
        result = evaluate_expression(expr, {})
        assert result.type == LITERAL
        assert result.datatype == XSD + "dateTime"
        assert re_mod.match(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", result.value)
    def test_tz_with_utc(self):
        from rdflib.term import Variable
        expr = self._make_builtin("TZ", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15T10:30:45+0000",
                            datatype=XSD + "dateTime")}
        )
        assert result.type == LITERAL
        assert result.value == "+00:00"
    def test_tz_no_timezone(self):
        from rdflib.term import Variable
        expr = self._make_builtin("TZ", arg=Variable("x"))
        result = evaluate_expression(
            expr, {"x": lit("2024-03-15T10:30:45",
                            datatype=XSD + "dateTime")}
        )
        assert result.value == ""
    def test_rand(self):
        expr = self._make_builtin("RAND")
        result = evaluate_expression(expr, {})
        assert isinstance(result, float)
        assert 0.0 <= result < 1.0
    def test_uuid(self):
        import re as re_mod
        expr = self._make_builtin("UUID")
        result = evaluate_expression(expr, {})
        assert result.type == IRI
        assert result.iri.startswith("urn:uuid:")
        uuid_part = result.iri[len("urn:uuid:"):]
        assert re_mod.match(
            r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
            uuid_part
        )
    def test_struuid(self):
        import re as re_mod
        expr = self._make_builtin("STRUUID")
        result = evaluate_expression(expr, {})
        assert result.type == LITERAL
        assert re_mod.match(
            r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
            result.value
        )
    def test_md5(self):
        from rdflib.term import Variable
        expr = self._make_builtin("MD5", arg=Variable("x"))
        result = evaluate_expression(expr, {"x": lit("hello")})
        assert result.type == LITERAL
        assert result.value == "5d41402abc4b2a76b9719d911017c592"
    def test_sha1(self):
        from rdflib.term import Variable
        expr = self._make_builtin("SHA1", arg=Variable("x"))
        result = evaluate_expression(expr, {"x": lit("hello")})
        assert result.type == LITERAL
        assert result.value == "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d"
    def test_sha256(self):
        from rdflib.term import Variable
        expr = self._make_builtin("SHA256", arg=Variable("x"))
        result = evaluate_expression(expr, {"x": lit("hello")})
        assert result.type == LITERAL
        assert result.value == (
            "2cf24dba5fb0a30e26e83b2ac5b9e29e"
            "1b161e5c1fa7425e73043362938b9824"
        )
    def test_sha512(self):
        from rdflib.term import Variable
        expr = self._make_builtin("SHA512", arg=Variable("x"))
        result = evaluate_expression(expr, {"x": lit("hello")})
        assert result.type == LITERAL
        assert len(result.value) == 128
    def test_exists_with_callback(self):
        from rdflib.plugins.sparql.parserutils import CompValue
        graph = CompValue("BGP")
        expr = self._make_builtin("EXISTS", graph=graph)
        cb = lambda g, s: True
        result = evaluate_expression(expr, {}, exists_cb=cb)
        assert result is True
    def test_exists_callback_false(self):
        from rdflib.plugins.sparql.parserutils import CompValue
        graph = CompValue("BGP")
        expr = self._make_builtin("EXISTS", graph=graph)
        cb = lambda g, s: False
        result = evaluate_expression(expr, {}, exists_cb=cb)
        assert result is False
    def test_notexists_with_callback(self):
        from rdflib.plugins.sparql.parserutils import CompValue
        graph = CompValue("BGP")
        expr = self._make_builtin("NOTEXISTS", graph=graph)
        cb = lambda g, s: True
        result = evaluate_expression(expr, {}, exists_cb=cb)
        assert result is False
    def test_notexists_callback_false(self):
        from rdflib.plugins.sparql.parserutils import CompValue
        graph = CompValue("BGP")
        expr = self._make_builtin("NOTEXISTS", graph=graph)
        cb = lambda g, s: False
        result = evaluate_expression(expr, {}, exists_cb=cb)
        assert result is True
 class TestEffectiveBoolean:
--- a/tests/unit/test_query/test_sparql_solutions.py
+++ b/tests/unit/test_query/test_sparql_solutions.py
@ -5,7 +5,7 @@ Tests for SPARQL solution sequence operations.
 import pytest
 from trustgraph.schema import Term, IRI, LITERAL
 from trustgraph.query.sparql.solutions import (
-    hash_join, left_join, union, project, distinct,
+    hash_join, left_join, minus, union, project, distinct,
    order_by, slice_solutions, _terms_equal, _compatible,
 )
@ -311,6 +311,30 @@ class TestOrderBy:
        result = order_by(solutions, [])
        assert len(result) == 1
    def test_order_by_numeric_literals(self):
        solutions = [
            {"year": lit("1950")},
            {"year": lit("700")},
            {"year": lit("2000")},
            {"year": lit("450")},
            {"year": lit("1200")},
        ]
        key_fns = [(lambda sol: sol.get("year"), True)]
        result = order_by(solutions, key_fns)
        values = [s["year"].value for s in result]
        assert values == ["450", "700", "1200", "1950", "2000"]
    def test_order_by_numeric_descending(self):
        solutions = [
            {"year": lit("1950")},
            {"year": lit("700")},
            {"year": lit("2000")},
        ]
        key_fns = [(lambda sol: sol.get("year"), False)]
        result = order_by(solutions, key_fns)
        values = [s["year"].value for s in result]
        assert values == ["2000", "1950", "700"]
 class TestSlice:
@ -343,3 +367,37 @@ class TestSlice:
        solutions = [{"s": alice}, {"s": bob}]
        result = slice_solutions(solutions)
        assert len(result) == 2
 class TestMinus:
    def test_removes_compatible(self, alice, bob):
        left = [{"s": alice}, {"s": bob}]
        right = [{"s": alice}]
        result = minus(left, right)
        assert len(result) == 1
        assert result[0]["s"].iri == "http://example.com/bob"
    def test_empty_right_preserves_all(self, alice, bob):
        left = [{"s": alice}, {"s": bob}]
        result = minus(left, [])
        assert len(result) == 2
    def test_no_shared_variables_preserves_all(self, alice, bob):
        left = [{"s": alice}]
        right = [{"t": bob}]
        result = minus(left, right)
        assert len(result) == 1
    def test_all_removed(self, alice):
        left = [{"s": alice}]
        right = [{"s": alice}]
        result = minus(left, right)
        assert len(result) == 0
    def test_partial_shared_variables(self, alice, bob):
        left = [{"s": alice, "p": lit("x")}, {"s": bob, "p": lit("y")}]
        right = [{"s": alice}]
        result = minus(left, right)
        assert len(result) == 1
        assert result[0]["s"].iri == "http://example.com/bob"
--- a/trustgraph-flow/trustgraph/query/sparql/algebra.py
+++ b/trustgraph-flow/trustgraph/query/sparql/algebra.py
@ -17,7 +17,7 @@ from ... knowledge import Uri
 from ... knowledge import Literal as KgLiteral
 from . parser import rdflib_term_to_term
 from . solutions import (
-    hash_join, left_join, union, project, distinct,
+    hash_join, left_join, minus, union, project, distinct,
    order_by, slice_solutions, _term_key,
 )
 from . expressions import evaluate_expression, _effective_boolean
@ -159,14 +159,69 @@ async def _eval_union(node, tc, collection, limit):
    return union(left, right)[:limit]
 async def _eval_minus(node, tc, collection, limit):
    """Evaluate a Minus node."""
    left = await evaluate(node.p1, tc, collection, limit)
    right = await evaluate(node.p2, tc, collection, limit)
    return minus(left, right)
 async def _check_exists(graph_node, sol, tc, collection, limit):
    """Evaluate an EXISTS graph pattern against a solution."""
    results = await evaluate(graph_node, tc, collection, limit)
    for r in results:
        shared = set(sol.keys()) & set(r.keys())
        if all(
            _term_key(sol[v]) == _term_key(r[v])
            for v in shared
            if sol.get(v) is not None and r.get(v) is not None
        ):
            return True
    return False
 async def _pre_eval_exists(expr, sol, tc, collection, limit, cache):
    """Walk an expression tree, pre-evaluate EXISTS/NOT EXISTS, cache results."""
    if not isinstance(expr, CompValue):
        return
    if expr.name in ("Builtin_EXISTS", "Builtin_NOTEXISTS"):
        key = id(expr.graph), id(sol)
        if key not in cache:
            cache[key] = await _check_exists(
                expr.graph, sol, tc, collection, limit
            )
        return
    for attr in ("expr", "other", "arg", "arg1", "arg2", "arg3"):
        child = getattr(expr, attr, None)
        if child is None:
            continue
        if isinstance(child, CompValue):
            await _pre_eval_exists(child, sol, tc, collection, limit, cache)
        elif isinstance(child, (list, tuple)):
            for item in child:
                if isinstance(item, CompValue):
                    await _pre_eval_exists(
                        item, sol, tc, collection, limit, cache
                    )
 async def _eval_filter(node, tc, collection, limit):
    """Evaluate a Filter node."""
    solutions = await evaluate(node.p, tc, collection, limit)
    expr = node.expr
-    return [
+    exists_cache = {}
-        sol for sol in solutions
+
-        if _effective_boolean(evaluate_expression(expr, sol))
+    def exists_cb(graph_node, sol):
-    ]
+        key = id(graph_node), id(sol)
        return exists_cache.get(key, False)
    result = []
    for sol in solutions:
        await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
        if _effective_boolean(evaluate_expression(expr, sol, exists_cb=exists_cb)):
            result.append(sol)
    return result
 async def _eval_distinct(node, tc, collection, limit):
@ -222,10 +277,16 @@ async def _eval_extend(node, tc, collection, limit):
    solutions = await evaluate(node.p, tc, collection, limit)
    var_name = str(node.var)
    expr = node.expr
    exists_cache = {}
    def exists_cb(graph_node, sol):
        key = id(graph_node), id(sol)
        return exists_cache.get(key, False)
    result = []
    for sol in solutions:
-        val = evaluate_expression(expr, sol)
+        await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
        val = evaluate_expression(expr, sol, exists_cb=exists_cb)
        new_sol = dict(sol)
        if isinstance(val, Term):
            new_sol[var_name] = val
@ -525,6 +586,7 @@ _HANDLERS = {
    "Join": _eval_join,
    "LeftJoin": _eval_left_join,
    "Union": _eval_union,
    "Minus": _eval_minus,
    "Filter": _eval_filter,
    "Distinct": _eval_distinct,
    "Reduced": _eval_reduced,
--- a/trustgraph-flow/trustgraph/query/sparql/expressions.py
+++ b/trustgraph-flow/trustgraph/query/sparql/expressions.py
@ -5,9 +5,15 @@ Evaluates rdflib algebra expression nodes against a solution (variable
 binding) to produce a value or boolean result.
 """
 import hashlib
 import math
 import random
 import re
 import logging
 import operator
 import uuid
 from datetime import datetime, date, timezone
 from urllib.parse import quote
 from rdflib.term import Variable, URIRef, Literal, BNode
 from rdflib.plugins.sparql.parserutils import CompValue
@ -17,23 +23,31 @@ from . parser import rdflib_term_to_term
 logger = logging.getLogger(__name__)
 _exists_callback = None
 class ExpressionError(Exception):
    """Raised when a SPARQL expression cannot be evaluated."""
    pass
-def evaluate_expression(expr, solution):
+def evaluate_expression(expr, solution, exists_cb=None):
    """
    Evaluate a SPARQL expression against a solution binding.
    Args:
        expr: rdflib algebra expression node
        solution: dict mapping variable names to Term values
        exists_cb: optional callback(graph_node, solution) -> bool for
            EXISTS/NOT EXISTS evaluation; provided by algebra.py
    Returns:
        The result value (Term, bool, number, string, or None)
    """
    global _exists_callback
    if exists_cb is not None:
        _exists_callback = exists_cb
    if expr is None:
        return True
@ -119,15 +133,7 @@ def _evaluate_comp_value(node, solution):
    if name == "Function":
        return _eval_function(node, solution)
-    # Exists / NotExists
+    # Exists / NotExists — handled via _eval_builtin now
    if name == "Builtin_EXISTS":
        # EXISTS requires graph pattern evaluation - not handled here
        logger.warning("EXISTS not supported in filter expressions")
        return True
    if name == "Builtin_NOTEXISTS":
        logger.warning("NOT EXISTS not supported in filter expressions")
        return True
    # TrueFilter (used with OPTIONAL)
    if name == "TrueFilter":
@ -335,6 +341,197 @@ def _eval_builtin(name, node, solution):
                return val
        return None
    if builtin == "YEAR":
        dt = _to_datetime(evaluate_expression(node.arg, solution))
        return dt.year if dt is not None else None
    if builtin == "MONTH":
        dt = _to_datetime(evaluate_expression(node.arg, solution))
        return dt.month if dt is not None else None
    if builtin == "DAY":
        dt = _to_datetime(evaluate_expression(node.arg, solution))
        return dt.day if dt is not None else None
    if builtin == "HOURS":
        dt = _to_datetime(evaluate_expression(node.arg, solution))
        if dt is None:
            return None
        return dt.hour if isinstance(dt, datetime) else 0
    if builtin == "MINUTES":
        dt = _to_datetime(evaluate_expression(node.arg, solution))
        if dt is None:
            return None
        return dt.minute if isinstance(dt, datetime) else 0
    if builtin == "SECONDS":
        dt = _to_datetime(evaluate_expression(node.arg, solution))
        if dt is None:
            return None
        return dt.second if isinstance(dt, datetime) else 0
    if builtin == "FLOOR":
        val = _to_numeric(evaluate_expression(node.arg, solution))
        if val is None:
            return None
        return int(math.floor(val))
    if builtin == "CEIL":
        val = _to_numeric(evaluate_expression(node.arg, solution))
        if val is None:
            return None
        return int(math.ceil(val))
    if builtin == "ABS":
        val = _to_numeric(evaluate_expression(node.arg, solution))
        if val is None:
            return None
        return abs(val)
    if builtin == "ROUND":
        val = _to_numeric(evaluate_expression(node.arg, solution))
        if val is None:
            return None
        return round(val)
    if builtin == "STRBEFORE":
        string = _to_string(evaluate_expression(node.arg1, solution))
        sep = _to_string(evaluate_expression(node.arg2, solution))
        idx = string.find(sep)
        if idx < 0:
            return Term(type=LITERAL, value="")
        return Term(type=LITERAL, value=string[:idx])
    if builtin == "STRAFTER":
        string = _to_string(evaluate_expression(node.arg1, solution))
        sep = _to_string(evaluate_expression(node.arg2, solution))
        idx = string.find(sep)
        if idx < 0:
            return Term(type=LITERAL, value="")
        return Term(type=LITERAL, value=string[idx + len(sep):])
    if builtin == "ENCODE_FOR_URI":
        val = _to_string(evaluate_expression(node.arg, solution))
        return Term(type=LITERAL, value=quote(val, safe=""))
    if builtin == "REPLACE":
        string = _to_string(evaluate_expression(node.arg, solution))
        pattern = _to_string(evaluate_expression(node.pattern, solution))
        replacement = _to_string(
            evaluate_expression(node.replacement, solution)
        )
        flags_str = ""
        if hasattr(node, "flags") and node.flags is not None:
            flags_str = _to_string(evaluate_expression(node.flags, solution))
        re_flags = 0
        if "i" in flags_str:
            re_flags |= re.IGNORECASE
        if "m" in flags_str:
            re_flags |= re.MULTILINE
        if "s" in flags_str:
            re_flags |= re.DOTALL
        try:
            result = re.sub(pattern, replacement, string, flags=re_flags)
            return Term(type=LITERAL, value=result)
        except re.error:
            return None
    if builtin == "SUBSTR":
        string = _to_string(evaluate_expression(node.arg, solution))
        start = _to_numeric(evaluate_expression(node.start, solution))
        if start is None:
            return None
        start_idx = max(int(start) - 1, 0)
        if hasattr(node, "length") and node.length is not None:
            length = _to_numeric(evaluate_expression(node.length, solution))
            if length is None:
                return None
            return Term(
                type=LITERAL, value=string[start_idx:start_idx + int(length)]
            )
        return Term(type=LITERAL, value=string[start_idx:])
    if builtin == "EXISTS":
        if _exists_callback is not None:
            return _exists_callback(node.graph, solution)
        logger.warning("EXISTS requires an exists_cb; not available")
        return True
    if builtin == "NOTEXISTS":
        if _exists_callback is not None:
            return not _exists_callback(node.graph, solution)
        logger.warning("NOT EXISTS requires an exists_cb; not available")
        return True
    if builtin == "LANGMATCHES":
        tag = _to_string(evaluate_expression(node.arg1, solution))
        rng = _to_string(evaluate_expression(node.arg2, solution))
        if rng == "*":
            return len(tag) > 0
        return tag.lower().startswith(rng.lower())
    if builtin == "IRI" or builtin == "URI":
        val = _to_string(evaluate_expression(node.arg, solution))
        return Term(type=IRI, iri=val)
    if builtin == "BNODE":
        if hasattr(node, "arg") and node.arg is not None:
            label = _to_string(evaluate_expression(node.arg, solution))
            return Term(type=BLANK, id=label)
        return Term(type=BLANK, id=str(uuid.uuid4()))
    if builtin == "NOW":
        now = datetime.now(timezone.utc)
        return Term(
            type=LITERAL,
            value=now.strftime("%Y-%m-%dT%H:%M:%S%z"),
            datatype="http://www.w3.org/2001/XMLSchema#dateTime",
        )
    if builtin == "TZ":
        dt = _to_datetime(evaluate_expression(node.arg, solution))
        if dt is None:
            return Term(type=LITERAL, value="")
        if dt.tzinfo is not None:
            offset = dt.strftime("%z")
            if offset:
                return Term(type=LITERAL, value=offset[:3] + ":" + offset[3:])
        return Term(type=LITERAL, value="")
    if builtin == "RAND":
        return random.random()
    if builtin == "UUID":
        return Term(type=IRI, iri="urn:uuid:" + str(uuid.uuid4()))
    if builtin == "STRUUID":
        return Term(type=LITERAL, value=str(uuid.uuid4()))
    if builtin == "MD5":
        val = _to_string(evaluate_expression(node.arg, solution))
        return Term(
            type=LITERAL, value=hashlib.md5(val.encode()).hexdigest()
        )
    if builtin == "SHA1":
        val = _to_string(evaluate_expression(node.arg, solution))
        return Term(
            type=LITERAL, value=hashlib.sha1(val.encode()).hexdigest()
        )
    if builtin == "SHA256":
        val = _to_string(evaluate_expression(node.arg, solution))
        return Term(
            type=LITERAL, value=hashlib.sha256(val.encode()).hexdigest()
        )
    if builtin == "SHA512":
        val = _to_string(evaluate_expression(node.arg, solution))
        return Term(
            type=LITERAL, value=hashlib.sha512(val.encode()).hexdigest()
        )
    if builtin == "sameTerm":
        left = evaluate_expression(node.arg1, solution)
        right = evaluate_expression(node.arg2, solution)
@ -454,6 +651,27 @@ def _to_numeric(val):
    return None
 def _to_datetime(val):
    """Convert a value to a date or datetime object."""
    if val is None:
        return None
    s = _to_string(val)
    for fmt in (
        "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z",
        "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M",
        "%Y-%m-%d",
    ):
        try:
            return datetime.strptime(s, fmt)
        except ValueError:
            continue
    try:
        return datetime.fromisoformat(s)
    except ValueError:
        pass
    return None
 def _comparable_value(val):
    """
    Convert a value to a form suitable for comparison.
--- a/trustgraph-flow/trustgraph/query/sparql/solutions.py
+++ b/trustgraph-flow/trustgraph/query/sparql/solutions.py
@ -150,6 +150,30 @@ def left_join(left, right, filter_fn=None):
    return results
 def minus(left, right):
    """
    MINUS operation: remove left solutions that are compatible with any
    right solution sharing at least one variable.
    """
    if not right:
        return list(left)
    right_vars = set()
    for sol in right:
        right_vars.update(sol.keys())
    results = []
    for sol_l in left:
        shared = set(sol_l.keys()) & right_vars
        if not shared:
            results.append(sol_l)
            continue
        if not any(_compatible(sol_l, sol_r) for sol_r in right):
            results.append(sol_l)
    return results
 def union(left, right):
    """Union two solution sequences (concatenation)."""
    return list(left) + list(right)
@ -177,6 +201,28 @@ def distinct(solutions):
    return results
 def _sort_comparable(val):
    """Convert a value to a form suitable for sort ordering."""
    if val is None:
        return (0, "")
    if isinstance(val, (int, float)):
        return (2, val)
    if isinstance(val, Term):
        if val.type == LITERAL:
            try:
                if "." in val.value:
                    return (2, float(val.value))
                return (2, int(val.value))
            except (ValueError, TypeError):
                pass
            return (3, val.value)
        elif val.type == IRI:
            return (4, val.iri)
        elif val.type == BLANK:
            return (5, val.id)
    return (6, str(val))
 def order_by(solutions, key_fns):
    """
    Sort solutions by the given key functions.
@ -191,14 +237,7 @@ def order_by(solutions, key_fns):
        keys = []
        for fn, ascending in key_fns:
            val = fn(sol)
-            # Convert to comparable form
+            keys.append(_sort_comparable(val))
            if val is None:
                comparable = ("", "")
            elif isinstance(val, Term):
                comparable = _term_key(val)
            else:
                comparable = ("v", str(val))
            keys.append(comparable)
        return keys
    # Handle ascending/descending
@ -224,10 +263,8 @@ def _mixed_sort(solutions, key_fns):
    def compare(a, b):
        for fn, ascending in key_fns:
-            va = fn(a)
+            ka = _sort_comparable(fn(a))
-            vb = fn(b)
+            kb = _sort_comparable(fn(b))
            ka = _term_key(va) if isinstance(va, Term) else ("v", str(va)) if va is not None else ("", "")
            kb = _term_key(vb) if isinstance(vb, Term) else ("v", str(vb)) if vb is not None else ("", "")
            if ka < kb:
                return -1 if ascending else 1