From 2c3a699af39111411ab02389781d8fffe2871c0c Mon Sep 17 00:00:00 2001
From: cybermaggedon <cybermaggedon@gmail.com>
Date: Thu, 21 May 2026 10:50:11 +0100
Subject: [PATCH] feat: extend SPARQL evaluator with comprehensive function and
 operator support (#945)

Add 30+ SPARQL 1.1 built-in functions and the MINUS algebra operator to the
custom SPARQL query backend.

String functions:
- SUBSTR (2-arg and 3-arg forms), STRBEFORE, STRAFTER
- REPLACE (regex with flags), ENCODE_FOR_URI

Numeric functions:
- FLOOR, CEIL, ROUND, ABS

Date/time accessors:
- YEAR, MONTH, DAY, HOURS, MINUTES, SECONDS
- NOW, TZ

Hash functions:
- MD5, SHA1, SHA256, SHA512

Term constructors:
- IRI/URI, BNODE, UUID, STRUUID

Other functions:
- LANGMATCHES, RAND
- EXISTS / NOT EXISTS (with async pre-evaluation to bridge the
  sync expression evaluator and async algebra evaluator)

Algebra:
- MINUS set-difference operator
- HAVING already works via rdflib's Filter mapping (verified)

Fix SPARQL ORDER handling

Includes 653 lines of new unit tests covering all added functionality
across expressions, solutions, and algebra layers.
---
 tests/unit/test_query/test_sparql_algebra.py  | 185 ++++++++
 .../test_query/test_sparql_expressions.py     | 432 ++++++++++++++++++
 .../unit/test_query/test_sparql_solutions.py  |  60 ++-
 .../trustgraph/query/sparql/algebra.py        |  74 ++-
 .../trustgraph/query/sparql/expressions.py    | 238 +++++++++-
 .../trustgraph/query/sparql/solutions.py      |  61 ++-
 6 files changed, 1021 insertions(+), 29 deletions(-)

diff --git a/tests/unit/test_query/test_sparql_algebra.py b/tests/unit/test_query/test_sparql_algebra.py
index 9827b2de..980ce870 100644
--- a/tests/unit/test_query/test_sparql_algebra.py
+++ b/tests/unit/test_query/test_sparql_algebra.py
@@ -84,6 +84,20 @@ def make_distinct(inner):
     return node
 
 
+def make_filter(inner, expr):
+    node = CompValue("Filter")
+    node.p = inner
+    node.expr = expr
+    return node
+
+
+def make_minus(left, right):
+    node = CompValue("Minus")
+    node.p1 = left
+    node.p2 = right
+    return node
+
+
 class TestQueryPattern:
     """Tests for _query_pattern — the leaf that calls TriplesClient."""
 
@@ -282,6 +296,177 @@ class TestEvaluate:
 
         assert len(solutions) == 1
 
+    @pytest.mark.asyncio
+    async def test_minus_removes_matching(self):
+        tc = AsyncMock()
+
+        alice = iri("http://example.com/alice")
+        bob = iri("http://example.com/bob")
+        knows = iri("http://example.com/knows")
+        hates = iri("http://example.com/hates")
+        charlie = iri("http://example.com/charlie")
+
+        left_triple = make_triple(alice, knows, bob)
+        right_triple1 = make_triple(alice, knows, bob)
+        right_triple2 = make_triple(alice, hates, charlie)
+
+        left_bgp = make_bgp(
+            (Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
+        )
+        right_bgp = make_bgp(
+            (Variable("s"), URIRef("http://example.com/hates"), Variable("r"))
+        )
+
+        async def mock_query(**kwargs):
+            pred = kwargs.get("p")
+            if pred and pred.iri == "http://example.com/knows":
+                return [left_triple]
+            elif pred and pred.iri == "http://example.com/hates":
+                return [right_triple2]
+            return []
+
+        tc.query.side_effect = mock_query
+
+        tree = make_select(
+            make_project(
+                make_minus(left_bgp, right_bgp),
+                ["s", "o"]
+            )
+        )
+
+        solutions = await evaluate(tree, tc, collection="default")
+
+        # alice knows bob, but alice also hates charlie
+        # shared var is "s" (alice), so alice's solution is removed
+        assert len(solutions) == 0
+
+    @pytest.mark.asyncio
+    async def test_minus_no_shared_vars_preserves_all(self):
+        tc = AsyncMock()
+
+        alice = iri("http://example.com/alice")
+        bob = iri("http://example.com/bob")
+
+        left_triple = make_triple(alice, iri("http://example.com/p"), bob)
+
+        left_bgp = make_bgp(
+            (Variable("s"), URIRef("http://example.com/p"), Variable("o"))
+        )
+        right_bgp = make_bgp(
+            (Variable("x"), URIRef("http://example.com/q"), Variable("y"))
+        )
+
+        async def mock_query(**kwargs):
+            pred = kwargs.get("p")
+            if pred and pred.iri == "http://example.com/p":
+                return [left_triple]
+            return []
+
+        tc.query.side_effect = mock_query
+
+        tree = make_select(
+            make_project(
+                make_minus(left_bgp, right_bgp),
+                ["s", "o"]
+            )
+        )
+
+        solutions = await evaluate(tree, tc, collection="default")
+
+        assert len(solutions) == 1
+
+    @pytest.mark.asyncio
+    async def test_filter_exists_keeps_matching(self):
+        tc = AsyncMock()
+
+        alice = iri("http://example.com/alice")
+        bob = iri("http://example.com/bob")
+        charlie = iri("http://example.com/charlie")
+
+        left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
+        left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
+        exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
+
+        left_bgp = make_bgp(
+            (Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
+        )
+        exists_bgp = make_bgp(
+            (Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
+        )
+
+        async def mock_query(**kwargs):
+            pred = kwargs.get("p")
+            if pred and pred.iri == "http://example.com/knows":
+                return [left_triple1, left_triple2]
+            elif pred and pred.iri == "http://example.com/likes":
+                return [exists_triple]
+            return []
+
+        tc.query.side_effect = mock_query
+
+        exists_expr = CompValue("Builtin_EXISTS")
+        exists_expr.graph = exists_bgp
+
+        tree = make_select(
+            make_project(
+                make_filter(left_bgp, exists_expr),
+                ["s", "o"]
+            )
+        )
+
+        solutions = await evaluate(tree, tc, collection="default")
+
+        # Only bob has a "likes" triple, so only the bob solution passes
+        result_objects = [s["o"].iri for s in solutions]
+        assert "http://example.com/bob" in result_objects
+        assert "http://example.com/charlie" not in result_objects
+
+    @pytest.mark.asyncio
+    async def test_filter_not_exists_removes_matching(self):
+        tc = AsyncMock()
+
+        alice = iri("http://example.com/alice")
+        bob = iri("http://example.com/bob")
+        charlie = iri("http://example.com/charlie")
+
+        left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
+        left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
+        exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
+
+        left_bgp = make_bgp(
+            (Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
+        )
+        exists_bgp = make_bgp(
+            (Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
+        )
+
+        async def mock_query(**kwargs):
+            pred = kwargs.get("p")
+            if pred and pred.iri == "http://example.com/knows":
+                return [left_triple1, left_triple2]
+            elif pred and pred.iri == "http://example.com/likes":
+                return [exists_triple]
+            return []
+
+        tc.query.side_effect = mock_query
+
+        not_exists_expr = CompValue("Builtin_NOTEXISTS")
+        not_exists_expr.graph = exists_bgp
+
+        tree = make_select(
+            make_project(
+                make_filter(left_bgp, not_exists_expr),
+                ["s", "o"]
+            )
+        )
+
+        solutions = await evaluate(tree, tc, collection="default")
+
+        # bob has a "likes" triple so is removed; charlie stays
+        result_objects = [s["o"].iri for s in solutions]
+        assert "http://example.com/charlie" in result_objects
+        assert "http://example.com/bob" not in result_objects
+
     @pytest.mark.asyncio
     async def test_unsupported_node_returns_empty_solution(self):
         tc = AsyncMock()
diff --git a/tests/unit/test_query/test_sparql_expressions.py b/tests/unit/test_query/test_sparql_expressions.py
index 63e9188f..87c862e8 100644
--- a/tests/unit/test_query/test_sparql_expressions.py
+++ b/tests/unit/test_query/test_sparql_expressions.py
@@ -300,6 +300,438 @@ class TestBuiltinFunctions:
                                   flags=None)
         assert evaluate_expression(expr, {"x": lit("hello")}) is False
 
+    def test_substr_three_args(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("SUBSTR",
+                                  arg=Variable("x"),
+                                  start=Literal(1),
+                                  length=Literal(4))
+        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
+        assert result.type == LITERAL
+        assert result.value == "2024"
+
+    def test_substr_two_args(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("SUBSTR",
+                                  arg=Variable("x"),
+                                  start=Literal(6),
+                                  length=None)
+        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
+        assert result.type == LITERAL
+        assert result.value == "03-15"
+
+    def test_substr_middle(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("SUBSTR",
+                                  arg=Variable("x"),
+                                  start=Literal(6),
+                                  length=Literal(2))
+        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
+        assert result.type == LITERAL
+        assert result.value == "03"
+
+    def test_substr_null_start(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("SUBSTR",
+                                  arg=Variable("x"),
+                                  start=Variable("missing"),
+                                  length=None)
+        result = evaluate_expression(expr, {"x": lit("hello")})
+        assert result is None
+
+    def test_year(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("YEAR", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
+        )
+        assert result == 2024
+
+    def test_month(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("MONTH", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
+        )
+        assert result == 3
+
+    def test_day(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("DAY", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
+        )
+        assert result == 15
+
+    def test_hours(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("HOURS", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
+        )
+        assert result == 10
+
+    def test_minutes(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("MINUTES", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
+        )
+        assert result == 30
+
+    def test_seconds(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("SECONDS", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
+        )
+        assert result == 45
+
+    def test_year_from_datetime(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("YEAR", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
+        )
+        assert result == 2024
+
+    def test_hours_from_date_returns_zero(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("HOURS", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
+        )
+        assert result == 0
+
+    def test_year_invalid_date(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("YEAR", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("not-a-date")}
+        )
+        assert result is None
+
+    def test_floor(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("FLOOR", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("3.7")}) == 3
+
+    def test_floor_negative(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("FLOOR", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("-2.3")}) == -3
+
+    def test_floor_none(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("FLOOR", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("abc")}) is None
+
+    def test_ceil(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("CEIL", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("3.2")}) == 4
+
+    def test_ceil_negative(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("CEIL", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("-2.7")}) == -2
+
+    def test_abs_positive(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ABS", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("42")}) == 42
+
+    def test_abs_negative(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ABS", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("-42")}) == 42
+
+    def test_abs_none(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ABS", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("abc")}) is None
+
+    def test_replace_simple(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("REPLACE",
+                                  arg=Variable("x"),
+                                  pattern=Literal(" BC"),
+                                  replacement=Literal(""),
+                                  flags=None)
+        result = evaluate_expression(expr, {"x": lit("500 BC")})
+        assert result.type == LITERAL
+        assert result.value == "500"
+
+    def test_replace_regex(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("REPLACE",
+                                  arg=Variable("x"),
+                                  pattern=Literal("[0-9]+"),
+                                  replacement=Literal("X"),
+                                  flags=None)
+        result = evaluate_expression(expr, {"x": lit("abc123def456")})
+        assert result.value == "abcXdefX"
+
+    def test_replace_case_insensitive(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("REPLACE",
+                                  arg=Variable("x"),
+                                  pattern=Literal("hello"),
+                                  replacement=Literal("world"),
+                                  flags=Literal("i"))
+        result = evaluate_expression(expr, {"x": lit("HELLO there")})
+        assert result.value == "world there"
+
+    def test_round_up(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ROUND", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("3.7")}) == 4
+
+    def test_round_down(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ROUND", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("3.2")}) == 3
+
+    def test_round_none(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ROUND", arg=Variable("x"))
+        assert evaluate_expression(expr, {"x": lit("abc")}) is None
+
+    def test_strbefore(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("STRBEFORE",
+                                  arg1=Variable("x"), arg2=Literal("-"))
+        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
+        assert result.value == "2024"
+
+    def test_strbefore_not_found(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("STRBEFORE",
+                                  arg1=Variable("x"), arg2=Literal("/"))
+        result = evaluate_expression(expr, {"x": lit("hello")})
+        assert result.value == ""
+
+    def test_strafter(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("STRAFTER",
+                                  arg1=Variable("x"), arg2=Literal("-"))
+        result = evaluate_expression(expr, {"x": lit("2024-03-15")})
+        assert result.value == "03-15"
+
+    def test_strafter_not_found(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("STRAFTER",
+                                  arg1=Variable("x"), arg2=Literal("/"))
+        result = evaluate_expression(expr, {"x": lit("hello")})
+        assert result.value == ""
+
+    def test_encode_for_uri(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
+        result = evaluate_expression(expr, {"x": lit("hello world")})
+        assert result.value == "hello%20world"
+
+    def test_encode_for_uri_special_chars(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
+        result = evaluate_expression(expr, {"x": lit("a/b?c=d&e")})
+        assert result.value == "a%2Fb%3Fc%3Dd%26e"
+
+    def test_langmatches_basic(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("LANGMATCHES",
+                                  arg1=Literal("en"), arg2=Literal("en"))
+        assert evaluate_expression(expr, {}) is True
+
+    def test_langmatches_subtag(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("LANGMATCHES",
+                                  arg1=Literal("en-US"), arg2=Literal("en"))
+        assert evaluate_expression(expr, {}) is True
+
+    def test_langmatches_wildcard(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("LANGMATCHES",
+                                  arg1=Literal("fr"), arg2=Literal("*"))
+        assert evaluate_expression(expr, {}) is True
+
+    def test_langmatches_wildcard_empty(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("LANGMATCHES",
+                                  arg1=Literal(""), arg2=Literal("*"))
+        assert evaluate_expression(expr, {}) is False
+
+    def test_langmatches_no_match(self):
+        from rdflib.term import Variable
+        from rdflib import Literal
+        expr = self._make_builtin("LANGMATCHES",
+                                  arg1=Literal("fr"), arg2=Literal("en"))
+        assert evaluate_expression(expr, {}) is False
+
+    def test_iri_constructor(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("IRI", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("http://example.com/test")}
+        )
+        assert result.type == IRI
+        assert result.iri == "http://example.com/test"
+
+    def test_uri_constructor(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("URI", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("http://example.com/test")}
+        )
+        assert result.type == IRI
+        assert result.iri == "http://example.com/test"
+
+    def test_bnode_no_arg(self):
+        expr = self._make_builtin("BNODE")
+        result = evaluate_expression(expr, {})
+        assert result.type == BLANK
+        assert len(result.id) > 0
+
+    def test_bnode_with_label(self):
+        from rdflib import Literal
+        expr = self._make_builtin("BNODE", arg=Literal("mynode"))
+        result = evaluate_expression(expr, {})
+        assert result.type == BLANK
+        assert result.id == "mynode"
+
+    def test_now(self):
+        import re as re_mod
+        expr = self._make_builtin("NOW")
+        result = evaluate_expression(expr, {})
+        assert result.type == LITERAL
+        assert result.datatype == XSD + "dateTime"
+        assert re_mod.match(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", result.value)
+
+    def test_tz_with_utc(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("TZ", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15T10:30:45+0000",
+                            datatype=XSD + "dateTime")}
+        )
+        assert result.type == LITERAL
+        assert result.value == "+00:00"
+
+    def test_tz_no_timezone(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("TZ", arg=Variable("x"))
+        result = evaluate_expression(
+            expr, {"x": lit("2024-03-15T10:30:45",
+                            datatype=XSD + "dateTime")}
+        )
+        assert result.value == ""
+
+    def test_rand(self):
+        expr = self._make_builtin("RAND")
+        result = evaluate_expression(expr, {})
+        assert isinstance(result, float)
+        assert 0.0 <= result < 1.0
+
+    def test_uuid(self):
+        import re as re_mod
+        expr = self._make_builtin("UUID")
+        result = evaluate_expression(expr, {})
+        assert result.type == IRI
+        assert result.iri.startswith("urn:uuid:")
+        uuid_part = result.iri[len("urn:uuid:"):]
+        assert re_mod.match(
+            r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
+            uuid_part
+        )
+
+    def test_struuid(self):
+        import re as re_mod
+        expr = self._make_builtin("STRUUID")
+        result = evaluate_expression(expr, {})
+        assert result.type == LITERAL
+        assert re_mod.match(
+            r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
+            result.value
+        )
+
+    def test_md5(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("MD5", arg=Variable("x"))
+        result = evaluate_expression(expr, {"x": lit("hello")})
+        assert result.type == LITERAL
+        assert result.value == "5d41402abc4b2a76b9719d911017c592"
+
+    def test_sha1(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("SHA1", arg=Variable("x"))
+        result = evaluate_expression(expr, {"x": lit("hello")})
+        assert result.type == LITERAL
+        assert result.value == "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d"
+
+    def test_sha256(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("SHA256", arg=Variable("x"))
+        result = evaluate_expression(expr, {"x": lit("hello")})
+        assert result.type == LITERAL
+        assert result.value == (
+            "2cf24dba5fb0a30e26e83b2ac5b9e29e"
+            "1b161e5c1fa7425e73043362938b9824"
+        )
+
+    def test_sha512(self):
+        from rdflib.term import Variable
+        expr = self._make_builtin("SHA512", arg=Variable("x"))
+        result = evaluate_expression(expr, {"x": lit("hello")})
+        assert result.type == LITERAL
+        assert len(result.value) == 128
+
+    def test_exists_with_callback(self):
+        from rdflib.plugins.sparql.parserutils import CompValue
+        graph = CompValue("BGP")
+        expr = self._make_builtin("EXISTS", graph=graph)
+        cb = lambda g, s: True
+        result = evaluate_expression(expr, {}, exists_cb=cb)
+        assert result is True
+
+    def test_exists_callback_false(self):
+        from rdflib.plugins.sparql.parserutils import CompValue
+        graph = CompValue("BGP")
+        expr = self._make_builtin("EXISTS", graph=graph)
+        cb = lambda g, s: False
+        result = evaluate_expression(expr, {}, exists_cb=cb)
+        assert result is False
+
+    def test_notexists_with_callback(self):
+        from rdflib.plugins.sparql.parserutils import CompValue
+        graph = CompValue("BGP")
+        expr = self._make_builtin("NOTEXISTS", graph=graph)
+        cb = lambda g, s: True
+        result = evaluate_expression(expr, {}, exists_cb=cb)
+        assert result is False
+
+    def test_notexists_callback_false(self):
+        from rdflib.plugins.sparql.parserutils import CompValue
+        graph = CompValue("BGP")
+        expr = self._make_builtin("NOTEXISTS", graph=graph)
+        cb = lambda g, s: False
+        result = evaluate_expression(expr, {}, exists_cb=cb)
+        assert result is True
+
 
 class TestEffectiveBoolean:
 
diff --git a/tests/unit/test_query/test_sparql_solutions.py b/tests/unit/test_query/test_sparql_solutions.py
index 5805ca84..7588a95b 100644
--- a/tests/unit/test_query/test_sparql_solutions.py
+++ b/tests/unit/test_query/test_sparql_solutions.py
@@ -5,7 +5,7 @@ Tests for SPARQL solution sequence operations.
 import pytest
 from trustgraph.schema import Term, IRI, LITERAL
 from trustgraph.query.sparql.solutions import (
-    hash_join, left_join, union, project, distinct,
+    hash_join, left_join, minus, union, project, distinct,
     order_by, slice_solutions, _terms_equal, _compatible,
 )
 
@@ -311,6 +311,30 @@ class TestOrderBy:
         result = order_by(solutions, [])
         assert len(result) == 1
 
+    def test_order_by_numeric_literals(self):
+        solutions = [
+            {"year": lit("1950")},
+            {"year": lit("700")},
+            {"year": lit("2000")},
+            {"year": lit("450")},
+            {"year": lit("1200")},
+        ]
+        key_fns = [(lambda sol: sol.get("year"), True)]
+        result = order_by(solutions, key_fns)
+        values = [s["year"].value for s in result]
+        assert values == ["450", "700", "1200", "1950", "2000"]
+
+    def test_order_by_numeric_descending(self):
+        solutions = [
+            {"year": lit("1950")},
+            {"year": lit("700")},
+            {"year": lit("2000")},
+        ]
+        key_fns = [(lambda sol: sol.get("year"), False)]
+        result = order_by(solutions, key_fns)
+        values = [s["year"].value for s in result]
+        assert values == ["2000", "1950", "700"]
+
 
 class TestSlice:
 
@@ -343,3 +367,37 @@ class TestSlice:
         solutions = [{"s": alice}, {"s": bob}]
         result = slice_solutions(solutions)
         assert len(result) == 2
+
+
+class TestMinus:
+
+    def test_removes_compatible(self, alice, bob):
+        left = [{"s": alice}, {"s": bob}]
+        right = [{"s": alice}]
+        result = minus(left, right)
+        assert len(result) == 1
+        assert result[0]["s"].iri == "http://example.com/bob"
+
+    def test_empty_right_preserves_all(self, alice, bob):
+        left = [{"s": alice}, {"s": bob}]
+        result = minus(left, [])
+        assert len(result) == 2
+
+    def test_no_shared_variables_preserves_all(self, alice, bob):
+        left = [{"s": alice}]
+        right = [{"t": bob}]
+        result = minus(left, right)
+        assert len(result) == 1
+
+    def test_all_removed(self, alice):
+        left = [{"s": alice}]
+        right = [{"s": alice}]
+        result = minus(left, right)
+        assert len(result) == 0
+
+    def test_partial_shared_variables(self, alice, bob):
+        left = [{"s": alice, "p": lit("x")}, {"s": bob, "p": lit("y")}]
+        right = [{"s": alice}]
+        result = minus(left, right)
+        assert len(result) == 1
+        assert result[0]["s"].iri == "http://example.com/bob"
diff --git a/trustgraph-flow/trustgraph/query/sparql/algebra.py b/trustgraph-flow/trustgraph/query/sparql/algebra.py
index 76b1ad8e..6f0227c8 100644
--- a/trustgraph-flow/trustgraph/query/sparql/algebra.py
+++ b/trustgraph-flow/trustgraph/query/sparql/algebra.py
@@ -17,7 +17,7 @@ from ... knowledge import Uri
 from ... knowledge import Literal as KgLiteral
 from . parser import rdflib_term_to_term
 from . solutions import (
-    hash_join, left_join, union, project, distinct,
+    hash_join, left_join, minus, union, project, distinct,
     order_by, slice_solutions, _term_key,
 )
 from . expressions import evaluate_expression, _effective_boolean
@@ -159,14 +159,69 @@ async def _eval_union(node, tc, collection, limit):
     return union(left, right)[:limit]
 
 
+async def _eval_minus(node, tc, collection, limit):
+    """Evaluate a Minus node."""
+    left = await evaluate(node.p1, tc, collection, limit)
+    right = await evaluate(node.p2, tc, collection, limit)
+    return minus(left, right)
+
+
+async def _check_exists(graph_node, sol, tc, collection, limit):
+    """Evaluate an EXISTS graph pattern against a solution."""
+    results = await evaluate(graph_node, tc, collection, limit)
+    for r in results:
+        shared = set(sol.keys()) & set(r.keys())
+        if all(
+            _term_key(sol[v]) == _term_key(r[v])
+            for v in shared
+            if sol.get(v) is not None and r.get(v) is not None
+        ):
+            return True
+    return False
+
+
+async def _pre_eval_exists(expr, sol, tc, collection, limit, cache):
+    """Walk an expression tree, pre-evaluate EXISTS/NOT EXISTS, cache results."""
+    if not isinstance(expr, CompValue):
+        return
+    if expr.name in ("Builtin_EXISTS", "Builtin_NOTEXISTS"):
+        key = id(expr.graph), id(sol)
+        if key not in cache:
+            cache[key] = await _check_exists(
+                expr.graph, sol, tc, collection, limit
+            )
+        return
+    for attr in ("expr", "other", "arg", "arg1", "arg2", "arg3"):
+        child = getattr(expr, attr, None)
+        if child is None:
+            continue
+        if isinstance(child, CompValue):
+            await _pre_eval_exists(child, sol, tc, collection, limit, cache)
+        elif isinstance(child, (list, tuple)):
+            for item in child:
+                if isinstance(item, CompValue):
+                    await _pre_eval_exists(
+                        item, sol, tc, collection, limit, cache
+                    )
+
+
 async def _eval_filter(node, tc, collection, limit):
     """Evaluate a Filter node."""
     solutions = await evaluate(node.p, tc, collection, limit)
     expr = node.expr
-    return [
-        sol for sol in solutions
-        if _effective_boolean(evaluate_expression(expr, sol))
-    ]
+    exists_cache = {}
+
+    def exists_cb(graph_node, sol):
+        key = id(graph_node), id(sol)
+        return exists_cache.get(key, False)
+
+    result = []
+    for sol in solutions:
+        await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
+        if _effective_boolean(evaluate_expression(expr, sol, exists_cb=exists_cb)):
+            result.append(sol)
+
+    return result
 
 
 async def _eval_distinct(node, tc, collection, limit):
@@ -222,10 +277,16 @@ async def _eval_extend(node, tc, collection, limit):
     solutions = await evaluate(node.p, tc, collection, limit)
     var_name = str(node.var)
     expr = node.expr
+    exists_cache = {}
+
+    def exists_cb(graph_node, sol):
+        key = id(graph_node), id(sol)
+        return exists_cache.get(key, False)
 
     result = []
     for sol in solutions:
-        val = evaluate_expression(expr, sol)
+        await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
+        val = evaluate_expression(expr, sol, exists_cb=exists_cb)
         new_sol = dict(sol)
         if isinstance(val, Term):
             new_sol[var_name] = val
@@ -525,6 +586,7 @@ _HANDLERS = {
     "Join": _eval_join,
     "LeftJoin": _eval_left_join,
     "Union": _eval_union,
+    "Minus": _eval_minus,
     "Filter": _eval_filter,
     "Distinct": _eval_distinct,
     "Reduced": _eval_reduced,
diff --git a/trustgraph-flow/trustgraph/query/sparql/expressions.py b/trustgraph-flow/trustgraph/query/sparql/expressions.py
index eac1199c..ad3202d9 100644
--- a/trustgraph-flow/trustgraph/query/sparql/expressions.py
+++ b/trustgraph-flow/trustgraph/query/sparql/expressions.py
@@ -5,9 +5,15 @@ Evaluates rdflib algebra expression nodes against a solution (variable
 binding) to produce a value or boolean result.
 """
 
+import hashlib
+import math
+import random
 import re
 import logging
 import operator
+import uuid
+from datetime import datetime, date, timezone
+from urllib.parse import quote
 
 from rdflib.term import Variable, URIRef, Literal, BNode
 from rdflib.plugins.sparql.parserutils import CompValue
@@ -17,23 +23,31 @@ from . parser import rdflib_term_to_term
 
 logger = logging.getLogger(__name__)
 
+_exists_callback = None
+
 
 class ExpressionError(Exception):
     """Raised when a SPARQL expression cannot be evaluated."""
     pass
 
 
-def evaluate_expression(expr, solution):
+def evaluate_expression(expr, solution, exists_cb=None):
     """
     Evaluate a SPARQL expression against a solution binding.
 
     Args:
         expr: rdflib algebra expression node
         solution: dict mapping variable names to Term values
+        exists_cb: optional callback(graph_node, solution) -> bool for
+            EXISTS/NOT EXISTS evaluation; provided by algebra.py
 
     Returns:
         The result value (Term, bool, number, string, or None)
     """
+    global _exists_callback
+    if exists_cb is not None:
+        _exists_callback = exists_cb
+
     if expr is None:
         return True
 
@@ -119,15 +133,7 @@ def _evaluate_comp_value(node, solution):
     if name == "Function":
         return _eval_function(node, solution)
 
-    # Exists / NotExists
-    if name == "Builtin_EXISTS":
-        # EXISTS requires graph pattern evaluation - not handled here
-        logger.warning("EXISTS not supported in filter expressions")
-        return True
-
-    if name == "Builtin_NOTEXISTS":
-        logger.warning("NOT EXISTS not supported in filter expressions")
-        return True
+    # Exists / NotExists — handled via _eval_builtin now
 
     # TrueFilter (used with OPTIONAL)
     if name == "TrueFilter":
@@ -335,6 +341,197 @@ def _eval_builtin(name, node, solution):
                 return val
         return None
 
+    if builtin == "YEAR":
+        dt = _to_datetime(evaluate_expression(node.arg, solution))
+        return dt.year if dt is not None else None
+
+    if builtin == "MONTH":
+        dt = _to_datetime(evaluate_expression(node.arg, solution))
+        return dt.month if dt is not None else None
+
+    if builtin == "DAY":
+        dt = _to_datetime(evaluate_expression(node.arg, solution))
+        return dt.day if dt is not None else None
+
+    if builtin == "HOURS":
+        dt = _to_datetime(evaluate_expression(node.arg, solution))
+        if dt is None:
+            return None
+        return dt.hour if isinstance(dt, datetime) else 0
+
+    if builtin == "MINUTES":
+        dt = _to_datetime(evaluate_expression(node.arg, solution))
+        if dt is None:
+            return None
+        return dt.minute if isinstance(dt, datetime) else 0
+
+    if builtin == "SECONDS":
+        dt = _to_datetime(evaluate_expression(node.arg, solution))
+        if dt is None:
+            return None
+        return dt.second if isinstance(dt, datetime) else 0
+
+    if builtin == "FLOOR":
+        val = _to_numeric(evaluate_expression(node.arg, solution))
+        if val is None:
+            return None
+        return int(math.floor(val))
+
+    if builtin == "CEIL":
+        val = _to_numeric(evaluate_expression(node.arg, solution))
+        if val is None:
+            return None
+        return int(math.ceil(val))
+
+    if builtin == "ABS":
+        val = _to_numeric(evaluate_expression(node.arg, solution))
+        if val is None:
+            return None
+        return abs(val)
+
+    if builtin == "ROUND":
+        val = _to_numeric(evaluate_expression(node.arg, solution))
+        if val is None:
+            return None
+        return round(val)
+
+    if builtin == "STRBEFORE":
+        string = _to_string(evaluate_expression(node.arg1, solution))
+        sep = _to_string(evaluate_expression(node.arg2, solution))
+        idx = string.find(sep)
+        if idx < 0:
+            return Term(type=LITERAL, value="")
+        return Term(type=LITERAL, value=string[:idx])
+
+    if builtin == "STRAFTER":
+        string = _to_string(evaluate_expression(node.arg1, solution))
+        sep = _to_string(evaluate_expression(node.arg2, solution))
+        idx = string.find(sep)
+        if idx < 0:
+            return Term(type=LITERAL, value="")
+        return Term(type=LITERAL, value=string[idx + len(sep):])
+
+    if builtin == "ENCODE_FOR_URI":
+        val = _to_string(evaluate_expression(node.arg, solution))
+        return Term(type=LITERAL, value=quote(val, safe=""))
+
+    if builtin == "REPLACE":
+        string = _to_string(evaluate_expression(node.arg, solution))
+        pattern = _to_string(evaluate_expression(node.pattern, solution))
+        replacement = _to_string(
+            evaluate_expression(node.replacement, solution)
+        )
+        flags_str = ""
+        if hasattr(node, "flags") and node.flags is not None:
+            flags_str = _to_string(evaluate_expression(node.flags, solution))
+        re_flags = 0
+        if "i" in flags_str:
+            re_flags |= re.IGNORECASE
+        if "m" in flags_str:
+            re_flags |= re.MULTILINE
+        if "s" in flags_str:
+            re_flags |= re.DOTALL
+        try:
+            result = re.sub(pattern, replacement, string, flags=re_flags)
+            return Term(type=LITERAL, value=result)
+        except re.error:
+            return None
+
+    if builtin == "SUBSTR":
+        string = _to_string(evaluate_expression(node.arg, solution))
+        start = _to_numeric(evaluate_expression(node.start, solution))
+        if start is None:
+            return None
+        start_idx = max(int(start) - 1, 0)
+        if hasattr(node, "length") and node.length is not None:
+            length = _to_numeric(evaluate_expression(node.length, solution))
+            if length is None:
+                return None
+            return Term(
+                type=LITERAL, value=string[start_idx:start_idx + int(length)]
+            )
+        return Term(type=LITERAL, value=string[start_idx:])
+
+    if builtin == "EXISTS":
+        if _exists_callback is not None:
+            return _exists_callback(node.graph, solution)
+        logger.warning("EXISTS requires an exists_cb; not available")
+        return True
+
+    if builtin == "NOTEXISTS":
+        if _exists_callback is not None:
+            return not _exists_callback(node.graph, solution)
+        logger.warning("NOT EXISTS requires an exists_cb; not available")
+        return True
+
+    if builtin == "LANGMATCHES":
+        tag = _to_string(evaluate_expression(node.arg1, solution))
+        rng = _to_string(evaluate_expression(node.arg2, solution))
+        if rng == "*":
+            return len(tag) > 0
+        return tag.lower().startswith(rng.lower())
+
+    if builtin == "IRI" or builtin == "URI":
+        val = _to_string(evaluate_expression(node.arg, solution))
+        return Term(type=IRI, iri=val)
+
+    if builtin == "BNODE":
+        if hasattr(node, "arg") and node.arg is not None:
+            label = _to_string(evaluate_expression(node.arg, solution))
+            return Term(type=BLANK, id=label)
+        return Term(type=BLANK, id=str(uuid.uuid4()))
+
+    if builtin == "NOW":
+        now = datetime.now(timezone.utc)
+        return Term(
+            type=LITERAL,
+            value=now.strftime("%Y-%m-%dT%H:%M:%S%z"),
+            datatype="http://www.w3.org/2001/XMLSchema#dateTime",
+        )
+
+    if builtin == "TZ":
+        dt = _to_datetime(evaluate_expression(node.arg, solution))
+        if dt is None:
+            return Term(type=LITERAL, value="")
+        if dt.tzinfo is not None:
+            offset = dt.strftime("%z")
+            if offset:
+                return Term(type=LITERAL, value=offset[:3] + ":" + offset[3:])
+        return Term(type=LITERAL, value="")
+
+    if builtin == "RAND":
+        return random.random()
+
+    if builtin == "UUID":
+        return Term(type=IRI, iri="urn:uuid:" + str(uuid.uuid4()))
+
+    if builtin == "STRUUID":
+        return Term(type=LITERAL, value=str(uuid.uuid4()))
+
+    if builtin == "MD5":
+        val = _to_string(evaluate_expression(node.arg, solution))
+        return Term(
+            type=LITERAL, value=hashlib.md5(val.encode()).hexdigest()
+        )
+
+    if builtin == "SHA1":
+        val = _to_string(evaluate_expression(node.arg, solution))
+        return Term(
+            type=LITERAL, value=hashlib.sha1(val.encode()).hexdigest()
+        )
+
+    if builtin == "SHA256":
+        val = _to_string(evaluate_expression(node.arg, solution))
+        return Term(
+            type=LITERAL, value=hashlib.sha256(val.encode()).hexdigest()
+        )
+
+    if builtin == "SHA512":
+        val = _to_string(evaluate_expression(node.arg, solution))
+        return Term(
+            type=LITERAL, value=hashlib.sha512(val.encode()).hexdigest()
+        )
+
     if builtin == "sameTerm":
         left = evaluate_expression(node.arg1, solution)
         right = evaluate_expression(node.arg2, solution)
@@ -454,6 +651,27 @@ def _to_numeric(val):
     return None
 
 
+def _to_datetime(val):
+    """Convert a value to a date or datetime object."""
+    if val is None:
+        return None
+    s = _to_string(val)
+    for fmt in (
+        "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z",
+        "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M",
+        "%Y-%m-%d",
+    ):
+        try:
+            return datetime.strptime(s, fmt)
+        except ValueError:
+            continue
+    try:
+        return datetime.fromisoformat(s)
+    except ValueError:
+        pass
+    return None
+
+
 def _comparable_value(val):
     """
     Convert a value to a form suitable for comparison.
diff --git a/trustgraph-flow/trustgraph/query/sparql/solutions.py b/trustgraph-flow/trustgraph/query/sparql/solutions.py
index d1ea8373..edf3401d 100644
--- a/trustgraph-flow/trustgraph/query/sparql/solutions.py
+++ b/trustgraph-flow/trustgraph/query/sparql/solutions.py
@@ -150,6 +150,30 @@ def left_join(left, right, filter_fn=None):
     return results
 
 
+def minus(left, right):
+    """
+    MINUS operation: remove left solutions that are compatible with any
+    right solution sharing at least one variable.
+    """
+    if not right:
+        return list(left)
+
+    right_vars = set()
+    for sol in right:
+        right_vars.update(sol.keys())
+
+    results = []
+    for sol_l in left:
+        shared = set(sol_l.keys()) & right_vars
+        if not shared:
+            results.append(sol_l)
+            continue
+        if not any(_compatible(sol_l, sol_r) for sol_r in right):
+            results.append(sol_l)
+
+    return results
+
+
 def union(left, right):
     """Union two solution sequences (concatenation)."""
     return list(left) + list(right)
@@ -177,6 +201,28 @@ def distinct(solutions):
     return results
 
 
+def _sort_comparable(val):
+    """Convert a value to a form suitable for sort ordering."""
+    if val is None:
+        return (0, "")
+    if isinstance(val, (int, float)):
+        return (2, val)
+    if isinstance(val, Term):
+        if val.type == LITERAL:
+            try:
+                if "." in val.value:
+                    return (2, float(val.value))
+                return (2, int(val.value))
+            except (ValueError, TypeError):
+                pass
+            return (3, val.value)
+        elif val.type == IRI:
+            return (4, val.iri)
+        elif val.type == BLANK:
+            return (5, val.id)
+    return (6, str(val))
+
+
 def order_by(solutions, key_fns):
     """
     Sort solutions by the given key functions.
@@ -191,14 +237,7 @@ def order_by(solutions, key_fns):
         keys = []
         for fn, ascending in key_fns:
             val = fn(sol)
-            # Convert to comparable form
-            if val is None:
-                comparable = ("", "")
-            elif isinstance(val, Term):
-                comparable = _term_key(val)
-            else:
-                comparable = ("v", str(val))
-            keys.append(comparable)
+            keys.append(_sort_comparable(val))
         return keys
 
     # Handle ascending/descending
@@ -224,10 +263,8 @@ def _mixed_sort(solutions, key_fns):
 
     def compare(a, b):
         for fn, ascending in key_fns:
-            va = fn(a)
-            vb = fn(b)
-            ka = _term_key(va) if isinstance(va, Term) else ("v", str(va)) if va is not None else ("", "")
-            kb = _term_key(vb) if isinstance(vb, Term) else ("v", str(vb)) if vb is not None else ("", "")
+            ka = _sort_comparable(fn(a))
+            kb = _sort_comparable(fn(b))
 
             if ka < kb:
                 return -1 if ascending else 1