mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-27 16:25:12 +02:00
feat: extend SPARQL evaluator with comprehensive function and operator support (#945)
Add 30+ SPARQL 1.1 built-in functions and the MINUS algebra operator to the custom SPARQL query backend. String functions: - SUBSTR (2-arg and 3-arg forms), STRBEFORE, STRAFTER - REPLACE (regex with flags), ENCODE_FOR_URI Numeric functions: - FLOOR, CEIL, ROUND, ABS Date/time accessors: - YEAR, MONTH, DAY, HOURS, MINUTES, SECONDS - NOW, TZ Hash functions: - MD5, SHA1, SHA256, SHA512 Term constructors: - IRI/URI, BNODE, UUID, STRUUID Other functions: - LANGMATCHES, RAND - EXISTS / NOT EXISTS (with async pre-evaluation to bridge the sync expression evaluator and async algebra evaluator) Algebra: - MINUS set-difference operator - HAVING already works via rdflib's Filter mapping (verified) Fix SPARQL ORDER handling Includes 653 lines of new unit tests covering all added functionality across expressions, solutions, and algebra layers.
This commit is contained in:
parent
e57f4669e1
commit
2c3a699af3
6 changed files with 1021 additions and 29 deletions
|
|
@ -84,6 +84,20 @@ def make_distinct(inner):
|
||||||
return node
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
def make_filter(inner, expr):
|
||||||
|
node = CompValue("Filter")
|
||||||
|
node.p = inner
|
||||||
|
node.expr = expr
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
|
def make_minus(left, right):
|
||||||
|
node = CompValue("Minus")
|
||||||
|
node.p1 = left
|
||||||
|
node.p2 = right
|
||||||
|
return node
|
||||||
|
|
||||||
|
|
||||||
class TestQueryPattern:
|
class TestQueryPattern:
|
||||||
"""Tests for _query_pattern — the leaf that calls TriplesClient."""
|
"""Tests for _query_pattern — the leaf that calls TriplesClient."""
|
||||||
|
|
||||||
|
|
@ -282,6 +296,177 @@ class TestEvaluate:
|
||||||
|
|
||||||
assert len(solutions) == 1
|
assert len(solutions) == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_minus_removes_matching(self):
|
||||||
|
tc = AsyncMock()
|
||||||
|
|
||||||
|
alice = iri("http://example.com/alice")
|
||||||
|
bob = iri("http://example.com/bob")
|
||||||
|
knows = iri("http://example.com/knows")
|
||||||
|
hates = iri("http://example.com/hates")
|
||||||
|
charlie = iri("http://example.com/charlie")
|
||||||
|
|
||||||
|
left_triple = make_triple(alice, knows, bob)
|
||||||
|
right_triple1 = make_triple(alice, knows, bob)
|
||||||
|
right_triple2 = make_triple(alice, hates, charlie)
|
||||||
|
|
||||||
|
left_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||||
|
)
|
||||||
|
right_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/hates"), Variable("r"))
|
||||||
|
)
|
||||||
|
|
||||||
|
async def mock_query(**kwargs):
|
||||||
|
pred = kwargs.get("p")
|
||||||
|
if pred and pred.iri == "http://example.com/knows":
|
||||||
|
return [left_triple]
|
||||||
|
elif pred and pred.iri == "http://example.com/hates":
|
||||||
|
return [right_triple2]
|
||||||
|
return []
|
||||||
|
|
||||||
|
tc.query.side_effect = mock_query
|
||||||
|
|
||||||
|
tree = make_select(
|
||||||
|
make_project(
|
||||||
|
make_minus(left_bgp, right_bgp),
|
||||||
|
["s", "o"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
solutions = await evaluate(tree, tc, collection="default")
|
||||||
|
|
||||||
|
# alice knows bob, but alice also hates charlie
|
||||||
|
# shared var is "s" (alice), so alice's solution is removed
|
||||||
|
assert len(solutions) == 0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_minus_no_shared_vars_preserves_all(self):
|
||||||
|
tc = AsyncMock()
|
||||||
|
|
||||||
|
alice = iri("http://example.com/alice")
|
||||||
|
bob = iri("http://example.com/bob")
|
||||||
|
|
||||||
|
left_triple = make_triple(alice, iri("http://example.com/p"), bob)
|
||||||
|
|
||||||
|
left_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/p"), Variable("o"))
|
||||||
|
)
|
||||||
|
right_bgp = make_bgp(
|
||||||
|
(Variable("x"), URIRef("http://example.com/q"), Variable("y"))
|
||||||
|
)
|
||||||
|
|
||||||
|
async def mock_query(**kwargs):
|
||||||
|
pred = kwargs.get("p")
|
||||||
|
if pred and pred.iri == "http://example.com/p":
|
||||||
|
return [left_triple]
|
||||||
|
return []
|
||||||
|
|
||||||
|
tc.query.side_effect = mock_query
|
||||||
|
|
||||||
|
tree = make_select(
|
||||||
|
make_project(
|
||||||
|
make_minus(left_bgp, right_bgp),
|
||||||
|
["s", "o"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
solutions = await evaluate(tree, tc, collection="default")
|
||||||
|
|
||||||
|
assert len(solutions) == 1
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_filter_exists_keeps_matching(self):
|
||||||
|
tc = AsyncMock()
|
||||||
|
|
||||||
|
alice = iri("http://example.com/alice")
|
||||||
|
bob = iri("http://example.com/bob")
|
||||||
|
charlie = iri("http://example.com/charlie")
|
||||||
|
|
||||||
|
left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
|
||||||
|
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
||||||
|
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
||||||
|
|
||||||
|
left_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||||
|
)
|
||||||
|
exists_bgp = make_bgp(
|
||||||
|
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
||||||
|
)
|
||||||
|
|
||||||
|
async def mock_query(**kwargs):
|
||||||
|
pred = kwargs.get("p")
|
||||||
|
if pred and pred.iri == "http://example.com/knows":
|
||||||
|
return [left_triple1, left_triple2]
|
||||||
|
elif pred and pred.iri == "http://example.com/likes":
|
||||||
|
return [exists_triple]
|
||||||
|
return []
|
||||||
|
|
||||||
|
tc.query.side_effect = mock_query
|
||||||
|
|
||||||
|
exists_expr = CompValue("Builtin_EXISTS")
|
||||||
|
exists_expr.graph = exists_bgp
|
||||||
|
|
||||||
|
tree = make_select(
|
||||||
|
make_project(
|
||||||
|
make_filter(left_bgp, exists_expr),
|
||||||
|
["s", "o"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
solutions = await evaluate(tree, tc, collection="default")
|
||||||
|
|
||||||
|
# Only bob has a "likes" triple, so only the bob solution passes
|
||||||
|
result_objects = [s["o"].iri for s in solutions]
|
||||||
|
assert "http://example.com/bob" in result_objects
|
||||||
|
assert "http://example.com/charlie" not in result_objects
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_filter_not_exists_removes_matching(self):
|
||||||
|
tc = AsyncMock()
|
||||||
|
|
||||||
|
alice = iri("http://example.com/alice")
|
||||||
|
bob = iri("http://example.com/bob")
|
||||||
|
charlie = iri("http://example.com/charlie")
|
||||||
|
|
||||||
|
left_triple1 = make_triple(alice, iri("http://example.com/knows"), bob)
|
||||||
|
left_triple2 = make_triple(alice, iri("http://example.com/knows"), charlie)
|
||||||
|
exists_triple = make_triple(bob, iri("http://example.com/likes"), alice)
|
||||||
|
|
||||||
|
left_bgp = make_bgp(
|
||||||
|
(Variable("s"), URIRef("http://example.com/knows"), Variable("o"))
|
||||||
|
)
|
||||||
|
exists_bgp = make_bgp(
|
||||||
|
(Variable("o"), URIRef("http://example.com/likes"), Variable("_any"))
|
||||||
|
)
|
||||||
|
|
||||||
|
async def mock_query(**kwargs):
|
||||||
|
pred = kwargs.get("p")
|
||||||
|
if pred and pred.iri == "http://example.com/knows":
|
||||||
|
return [left_triple1, left_triple2]
|
||||||
|
elif pred and pred.iri == "http://example.com/likes":
|
||||||
|
return [exists_triple]
|
||||||
|
return []
|
||||||
|
|
||||||
|
tc.query.side_effect = mock_query
|
||||||
|
|
||||||
|
not_exists_expr = CompValue("Builtin_NOTEXISTS")
|
||||||
|
not_exists_expr.graph = exists_bgp
|
||||||
|
|
||||||
|
tree = make_select(
|
||||||
|
make_project(
|
||||||
|
make_filter(left_bgp, not_exists_expr),
|
||||||
|
["s", "o"]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
solutions = await evaluate(tree, tc, collection="default")
|
||||||
|
|
||||||
|
# bob has a "likes" triple so is removed; charlie stays
|
||||||
|
result_objects = [s["o"].iri for s in solutions]
|
||||||
|
assert "http://example.com/charlie" in result_objects
|
||||||
|
assert "http://example.com/bob" not in result_objects
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_unsupported_node_returns_empty_solution(self):
|
async def test_unsupported_node_returns_empty_solution(self):
|
||||||
tc = AsyncMock()
|
tc = AsyncMock()
|
||||||
|
|
|
||||||
|
|
@ -300,6 +300,438 @@ class TestBuiltinFunctions:
|
||||||
flags=None)
|
flags=None)
|
||||||
assert evaluate_expression(expr, {"x": lit("hello")}) is False
|
assert evaluate_expression(expr, {"x": lit("hello")}) is False
|
||||||
|
|
||||||
|
def test_substr_three_args(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("SUBSTR",
|
||||||
|
arg=Variable("x"),
|
||||||
|
start=Literal(1),
|
||||||
|
length=Literal(4))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == "2024"
|
||||||
|
|
||||||
|
def test_substr_two_args(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("SUBSTR",
|
||||||
|
arg=Variable("x"),
|
||||||
|
start=Literal(6),
|
||||||
|
length=None)
|
||||||
|
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == "03-15"
|
||||||
|
|
||||||
|
def test_substr_middle(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("SUBSTR",
|
||||||
|
arg=Variable("x"),
|
||||||
|
start=Literal(6),
|
||||||
|
length=Literal(2))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == "03"
|
||||||
|
|
||||||
|
def test_substr_null_start(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("SUBSTR",
|
||||||
|
arg=Variable("x"),
|
||||||
|
start=Variable("missing"),
|
||||||
|
length=None)
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_year(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("YEAR", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||||
|
)
|
||||||
|
assert result == 2024
|
||||||
|
|
||||||
|
def test_month(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("MONTH", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||||
|
)
|
||||||
|
assert result == 3
|
||||||
|
|
||||||
|
def test_day(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("DAY", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||||
|
)
|
||||||
|
assert result == 15
|
||||||
|
|
||||||
|
def test_hours(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("HOURS", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||||
|
)
|
||||||
|
assert result == 10
|
||||||
|
|
||||||
|
def test_minutes(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("MINUTES", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||||
|
)
|
||||||
|
assert result == 30
|
||||||
|
|
||||||
|
def test_seconds(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("SECONDS", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||||
|
)
|
||||||
|
assert result == 45
|
||||||
|
|
||||||
|
def test_year_from_datetime(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("YEAR", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15T10:30:45", datatype=XSD + "dateTime")}
|
||||||
|
)
|
||||||
|
assert result == 2024
|
||||||
|
|
||||||
|
def test_hours_from_date_returns_zero(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("HOURS", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15", datatype=XSD + "date")}
|
||||||
|
)
|
||||||
|
assert result == 0
|
||||||
|
|
||||||
|
def test_year_invalid_date(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("YEAR", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("not-a-date")}
|
||||||
|
)
|
||||||
|
assert result is None
|
||||||
|
|
||||||
|
def test_floor(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("FLOOR", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("3.7")}) == 3
|
||||||
|
|
||||||
|
def test_floor_negative(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("FLOOR", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("-2.3")}) == -3
|
||||||
|
|
||||||
|
def test_floor_none(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("FLOOR", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("abc")}) is None
|
||||||
|
|
||||||
|
def test_ceil(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("CEIL", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("3.2")}) == 4
|
||||||
|
|
||||||
|
def test_ceil_negative(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("CEIL", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("-2.7")}) == -2
|
||||||
|
|
||||||
|
def test_abs_positive(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ABS", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("42")}) == 42
|
||||||
|
|
||||||
|
def test_abs_negative(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ABS", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("-42")}) == 42
|
||||||
|
|
||||||
|
def test_abs_none(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ABS", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("abc")}) is None
|
||||||
|
|
||||||
|
def test_replace_simple(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("REPLACE",
|
||||||
|
arg=Variable("x"),
|
||||||
|
pattern=Literal(" BC"),
|
||||||
|
replacement=Literal(""),
|
||||||
|
flags=None)
|
||||||
|
result = evaluate_expression(expr, {"x": lit("500 BC")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == "500"
|
||||||
|
|
||||||
|
def test_replace_regex(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("REPLACE",
|
||||||
|
arg=Variable("x"),
|
||||||
|
pattern=Literal("[0-9]+"),
|
||||||
|
replacement=Literal("X"),
|
||||||
|
flags=None)
|
||||||
|
result = evaluate_expression(expr, {"x": lit("abc123def456")})
|
||||||
|
assert result.value == "abcXdefX"
|
||||||
|
|
||||||
|
def test_replace_case_insensitive(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("REPLACE",
|
||||||
|
arg=Variable("x"),
|
||||||
|
pattern=Literal("hello"),
|
||||||
|
replacement=Literal("world"),
|
||||||
|
flags=Literal("i"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("HELLO there")})
|
||||||
|
assert result.value == "world there"
|
||||||
|
|
||||||
|
def test_round_up(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ROUND", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("3.7")}) == 4
|
||||||
|
|
||||||
|
def test_round_down(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ROUND", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("3.2")}) == 3
|
||||||
|
|
||||||
|
def test_round_none(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ROUND", arg=Variable("x"))
|
||||||
|
assert evaluate_expression(expr, {"x": lit("abc")}) is None
|
||||||
|
|
||||||
|
def test_strbefore(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("STRBEFORE",
|
||||||
|
arg1=Variable("x"), arg2=Literal("-"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||||
|
assert result.value == "2024"
|
||||||
|
|
||||||
|
def test_strbefore_not_found(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("STRBEFORE",
|
||||||
|
arg1=Variable("x"), arg2=Literal("/"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||||
|
assert result.value == ""
|
||||||
|
|
||||||
|
def test_strafter(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("STRAFTER",
|
||||||
|
arg1=Variable("x"), arg2=Literal("-"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("2024-03-15")})
|
||||||
|
assert result.value == "03-15"
|
||||||
|
|
||||||
|
def test_strafter_not_found(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("STRAFTER",
|
||||||
|
arg1=Variable("x"), arg2=Literal("/"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||||
|
assert result.value == ""
|
||||||
|
|
||||||
|
def test_encode_for_uri(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello world")})
|
||||||
|
assert result.value == "hello%20world"
|
||||||
|
|
||||||
|
def test_encode_for_uri_special_chars(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("ENCODE_FOR_URI", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("a/b?c=d&e")})
|
||||||
|
assert result.value == "a%2Fb%3Fc%3Dd%26e"
|
||||||
|
|
||||||
|
def test_langmatches_basic(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("LANGMATCHES",
|
||||||
|
arg1=Literal("en"), arg2=Literal("en"))
|
||||||
|
assert evaluate_expression(expr, {}) is True
|
||||||
|
|
||||||
|
def test_langmatches_subtag(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("LANGMATCHES",
|
||||||
|
arg1=Literal("en-US"), arg2=Literal("en"))
|
||||||
|
assert evaluate_expression(expr, {}) is True
|
||||||
|
|
||||||
|
def test_langmatches_wildcard(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("LANGMATCHES",
|
||||||
|
arg1=Literal("fr"), arg2=Literal("*"))
|
||||||
|
assert evaluate_expression(expr, {}) is True
|
||||||
|
|
||||||
|
def test_langmatches_wildcard_empty(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("LANGMATCHES",
|
||||||
|
arg1=Literal(""), arg2=Literal("*"))
|
||||||
|
assert evaluate_expression(expr, {}) is False
|
||||||
|
|
||||||
|
def test_langmatches_no_match(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("LANGMATCHES",
|
||||||
|
arg1=Literal("fr"), arg2=Literal("en"))
|
||||||
|
assert evaluate_expression(expr, {}) is False
|
||||||
|
|
||||||
|
def test_iri_constructor(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("IRI", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("http://example.com/test")}
|
||||||
|
)
|
||||||
|
assert result.type == IRI
|
||||||
|
assert result.iri == "http://example.com/test"
|
||||||
|
|
||||||
|
def test_uri_constructor(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("URI", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("http://example.com/test")}
|
||||||
|
)
|
||||||
|
assert result.type == IRI
|
||||||
|
assert result.iri == "http://example.com/test"
|
||||||
|
|
||||||
|
def test_bnode_no_arg(self):
|
||||||
|
expr = self._make_builtin("BNODE")
|
||||||
|
result = evaluate_expression(expr, {})
|
||||||
|
assert result.type == BLANK
|
||||||
|
assert len(result.id) > 0
|
||||||
|
|
||||||
|
def test_bnode_with_label(self):
|
||||||
|
from rdflib import Literal
|
||||||
|
expr = self._make_builtin("BNODE", arg=Literal("mynode"))
|
||||||
|
result = evaluate_expression(expr, {})
|
||||||
|
assert result.type == BLANK
|
||||||
|
assert result.id == "mynode"
|
||||||
|
|
||||||
|
def test_now(self):
|
||||||
|
import re as re_mod
|
||||||
|
expr = self._make_builtin("NOW")
|
||||||
|
result = evaluate_expression(expr, {})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.datatype == XSD + "dateTime"
|
||||||
|
assert re_mod.match(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", result.value)
|
||||||
|
|
||||||
|
def test_tz_with_utc(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("TZ", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15T10:30:45+0000",
|
||||||
|
datatype=XSD + "dateTime")}
|
||||||
|
)
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == "+00:00"
|
||||||
|
|
||||||
|
def test_tz_no_timezone(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("TZ", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(
|
||||||
|
expr, {"x": lit("2024-03-15T10:30:45",
|
||||||
|
datatype=XSD + "dateTime")}
|
||||||
|
)
|
||||||
|
assert result.value == ""
|
||||||
|
|
||||||
|
def test_rand(self):
|
||||||
|
expr = self._make_builtin("RAND")
|
||||||
|
result = evaluate_expression(expr, {})
|
||||||
|
assert isinstance(result, float)
|
||||||
|
assert 0.0 <= result < 1.0
|
||||||
|
|
||||||
|
def test_uuid(self):
|
||||||
|
import re as re_mod
|
||||||
|
expr = self._make_builtin("UUID")
|
||||||
|
result = evaluate_expression(expr, {})
|
||||||
|
assert result.type == IRI
|
||||||
|
assert result.iri.startswith("urn:uuid:")
|
||||||
|
uuid_part = result.iri[len("urn:uuid:"):]
|
||||||
|
assert re_mod.match(
|
||||||
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
|
||||||
|
uuid_part
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_struuid(self):
|
||||||
|
import re as re_mod
|
||||||
|
expr = self._make_builtin("STRUUID")
|
||||||
|
result = evaluate_expression(expr, {})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert re_mod.match(
|
||||||
|
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
|
||||||
|
result.value
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_md5(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("MD5", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == "5d41402abc4b2a76b9719d911017c592"
|
||||||
|
|
||||||
|
def test_sha1(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("SHA1", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d"
|
||||||
|
|
||||||
|
def test_sha256(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("SHA256", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert result.value == (
|
||||||
|
"2cf24dba5fb0a30e26e83b2ac5b9e29e"
|
||||||
|
"1b161e5c1fa7425e73043362938b9824"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_sha512(self):
|
||||||
|
from rdflib.term import Variable
|
||||||
|
expr = self._make_builtin("SHA512", arg=Variable("x"))
|
||||||
|
result = evaluate_expression(expr, {"x": lit("hello")})
|
||||||
|
assert result.type == LITERAL
|
||||||
|
assert len(result.value) == 128
|
||||||
|
|
||||||
|
def test_exists_with_callback(self):
|
||||||
|
from rdflib.plugins.sparql.parserutils import CompValue
|
||||||
|
graph = CompValue("BGP")
|
||||||
|
expr = self._make_builtin("EXISTS", graph=graph)
|
||||||
|
cb = lambda g, s: True
|
||||||
|
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
def test_exists_callback_false(self):
|
||||||
|
from rdflib.plugins.sparql.parserutils import CompValue
|
||||||
|
graph = CompValue("BGP")
|
||||||
|
expr = self._make_builtin("EXISTS", graph=graph)
|
||||||
|
cb = lambda g, s: False
|
||||||
|
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
def test_notexists_with_callback(self):
|
||||||
|
from rdflib.plugins.sparql.parserutils import CompValue
|
||||||
|
graph = CompValue("BGP")
|
||||||
|
expr = self._make_builtin("NOTEXISTS", graph=graph)
|
||||||
|
cb = lambda g, s: True
|
||||||
|
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||||
|
assert result is False
|
||||||
|
|
||||||
|
def test_notexists_callback_false(self):
|
||||||
|
from rdflib.plugins.sparql.parserutils import CompValue
|
||||||
|
graph = CompValue("BGP")
|
||||||
|
expr = self._make_builtin("NOTEXISTS", graph=graph)
|
||||||
|
cb = lambda g, s: False
|
||||||
|
result = evaluate_expression(expr, {}, exists_cb=cb)
|
||||||
|
assert result is True
|
||||||
|
|
||||||
|
|
||||||
class TestEffectiveBoolean:
|
class TestEffectiveBoolean:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ Tests for SPARQL solution sequence operations.
|
||||||
import pytest
|
import pytest
|
||||||
from trustgraph.schema import Term, IRI, LITERAL
|
from trustgraph.schema import Term, IRI, LITERAL
|
||||||
from trustgraph.query.sparql.solutions import (
|
from trustgraph.query.sparql.solutions import (
|
||||||
hash_join, left_join, union, project, distinct,
|
hash_join, left_join, minus, union, project, distinct,
|
||||||
order_by, slice_solutions, _terms_equal, _compatible,
|
order_by, slice_solutions, _terms_equal, _compatible,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -311,6 +311,30 @@ class TestOrderBy:
|
||||||
result = order_by(solutions, [])
|
result = order_by(solutions, [])
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_order_by_numeric_literals(self):
|
||||||
|
solutions = [
|
||||||
|
{"year": lit("1950")},
|
||||||
|
{"year": lit("700")},
|
||||||
|
{"year": lit("2000")},
|
||||||
|
{"year": lit("450")},
|
||||||
|
{"year": lit("1200")},
|
||||||
|
]
|
||||||
|
key_fns = [(lambda sol: sol.get("year"), True)]
|
||||||
|
result = order_by(solutions, key_fns)
|
||||||
|
values = [s["year"].value for s in result]
|
||||||
|
assert values == ["450", "700", "1200", "1950", "2000"]
|
||||||
|
|
||||||
|
def test_order_by_numeric_descending(self):
|
||||||
|
solutions = [
|
||||||
|
{"year": lit("1950")},
|
||||||
|
{"year": lit("700")},
|
||||||
|
{"year": lit("2000")},
|
||||||
|
]
|
||||||
|
key_fns = [(lambda sol: sol.get("year"), False)]
|
||||||
|
result = order_by(solutions, key_fns)
|
||||||
|
values = [s["year"].value for s in result]
|
||||||
|
assert values == ["2000", "1950", "700"]
|
||||||
|
|
||||||
|
|
||||||
class TestSlice:
|
class TestSlice:
|
||||||
|
|
||||||
|
|
@ -343,3 +367,37 @@ class TestSlice:
|
||||||
solutions = [{"s": alice}, {"s": bob}]
|
solutions = [{"s": alice}, {"s": bob}]
|
||||||
result = slice_solutions(solutions)
|
result = slice_solutions(solutions)
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
|
|
||||||
|
|
||||||
|
class TestMinus:
|
||||||
|
|
||||||
|
def test_removes_compatible(self, alice, bob):
|
||||||
|
left = [{"s": alice}, {"s": bob}]
|
||||||
|
right = [{"s": alice}]
|
||||||
|
result = minus(left, right)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]["s"].iri == "http://example.com/bob"
|
||||||
|
|
||||||
|
def test_empty_right_preserves_all(self, alice, bob):
|
||||||
|
left = [{"s": alice}, {"s": bob}]
|
||||||
|
result = minus(left, [])
|
||||||
|
assert len(result) == 2
|
||||||
|
|
||||||
|
def test_no_shared_variables_preserves_all(self, alice, bob):
|
||||||
|
left = [{"s": alice}]
|
||||||
|
right = [{"t": bob}]
|
||||||
|
result = minus(left, right)
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_all_removed(self, alice):
|
||||||
|
left = [{"s": alice}]
|
||||||
|
right = [{"s": alice}]
|
||||||
|
result = minus(left, right)
|
||||||
|
assert len(result) == 0
|
||||||
|
|
||||||
|
def test_partial_shared_variables(self, alice, bob):
|
||||||
|
left = [{"s": alice, "p": lit("x")}, {"s": bob, "p": lit("y")}]
|
||||||
|
right = [{"s": alice}]
|
||||||
|
result = minus(left, right)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]["s"].iri == "http://example.com/bob"
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from ... knowledge import Uri
|
||||||
from ... knowledge import Literal as KgLiteral
|
from ... knowledge import Literal as KgLiteral
|
||||||
from . parser import rdflib_term_to_term
|
from . parser import rdflib_term_to_term
|
||||||
from . solutions import (
|
from . solutions import (
|
||||||
hash_join, left_join, union, project, distinct,
|
hash_join, left_join, minus, union, project, distinct,
|
||||||
order_by, slice_solutions, _term_key,
|
order_by, slice_solutions, _term_key,
|
||||||
)
|
)
|
||||||
from . expressions import evaluate_expression, _effective_boolean
|
from . expressions import evaluate_expression, _effective_boolean
|
||||||
|
|
@ -159,14 +159,69 @@ async def _eval_union(node, tc, collection, limit):
|
||||||
return union(left, right)[:limit]
|
return union(left, right)[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
async def _eval_minus(node, tc, collection, limit):
|
||||||
|
"""Evaluate a Minus node."""
|
||||||
|
left = await evaluate(node.p1, tc, collection, limit)
|
||||||
|
right = await evaluate(node.p2, tc, collection, limit)
|
||||||
|
return minus(left, right)
|
||||||
|
|
||||||
|
|
||||||
|
async def _check_exists(graph_node, sol, tc, collection, limit):
|
||||||
|
"""Evaluate an EXISTS graph pattern against a solution."""
|
||||||
|
results = await evaluate(graph_node, tc, collection, limit)
|
||||||
|
for r in results:
|
||||||
|
shared = set(sol.keys()) & set(r.keys())
|
||||||
|
if all(
|
||||||
|
_term_key(sol[v]) == _term_key(r[v])
|
||||||
|
for v in shared
|
||||||
|
if sol.get(v) is not None and r.get(v) is not None
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _pre_eval_exists(expr, sol, tc, collection, limit, cache):
|
||||||
|
"""Walk an expression tree, pre-evaluate EXISTS/NOT EXISTS, cache results."""
|
||||||
|
if not isinstance(expr, CompValue):
|
||||||
|
return
|
||||||
|
if expr.name in ("Builtin_EXISTS", "Builtin_NOTEXISTS"):
|
||||||
|
key = id(expr.graph), id(sol)
|
||||||
|
if key not in cache:
|
||||||
|
cache[key] = await _check_exists(
|
||||||
|
expr.graph, sol, tc, collection, limit
|
||||||
|
)
|
||||||
|
return
|
||||||
|
for attr in ("expr", "other", "arg", "arg1", "arg2", "arg3"):
|
||||||
|
child = getattr(expr, attr, None)
|
||||||
|
if child is None:
|
||||||
|
continue
|
||||||
|
if isinstance(child, CompValue):
|
||||||
|
await _pre_eval_exists(child, sol, tc, collection, limit, cache)
|
||||||
|
elif isinstance(child, (list, tuple)):
|
||||||
|
for item in child:
|
||||||
|
if isinstance(item, CompValue):
|
||||||
|
await _pre_eval_exists(
|
||||||
|
item, sol, tc, collection, limit, cache
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _eval_filter(node, tc, collection, limit):
|
async def _eval_filter(node, tc, collection, limit):
|
||||||
"""Evaluate a Filter node."""
|
"""Evaluate a Filter node."""
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
solutions = await evaluate(node.p, tc, collection, limit)
|
||||||
expr = node.expr
|
expr = node.expr
|
||||||
return [
|
exists_cache = {}
|
||||||
sol for sol in solutions
|
|
||||||
if _effective_boolean(evaluate_expression(expr, sol))
|
def exists_cb(graph_node, sol):
|
||||||
]
|
key = id(graph_node), id(sol)
|
||||||
|
return exists_cache.get(key, False)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for sol in solutions:
|
||||||
|
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
||||||
|
if _effective_boolean(evaluate_expression(expr, sol, exists_cb=exists_cb)):
|
||||||
|
result.append(sol)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def _eval_distinct(node, tc, collection, limit):
|
async def _eval_distinct(node, tc, collection, limit):
|
||||||
|
|
@ -222,10 +277,16 @@ async def _eval_extend(node, tc, collection, limit):
|
||||||
solutions = await evaluate(node.p, tc, collection, limit)
|
solutions = await evaluate(node.p, tc, collection, limit)
|
||||||
var_name = str(node.var)
|
var_name = str(node.var)
|
||||||
expr = node.expr
|
expr = node.expr
|
||||||
|
exists_cache = {}
|
||||||
|
|
||||||
|
def exists_cb(graph_node, sol):
|
||||||
|
key = id(graph_node), id(sol)
|
||||||
|
return exists_cache.get(key, False)
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for sol in solutions:
|
for sol in solutions:
|
||||||
val = evaluate_expression(expr, sol)
|
await _pre_eval_exists(expr, sol, tc, collection, limit, exists_cache)
|
||||||
|
val = evaluate_expression(expr, sol, exists_cb=exists_cb)
|
||||||
new_sol = dict(sol)
|
new_sol = dict(sol)
|
||||||
if isinstance(val, Term):
|
if isinstance(val, Term):
|
||||||
new_sol[var_name] = val
|
new_sol[var_name] = val
|
||||||
|
|
@ -525,6 +586,7 @@ _HANDLERS = {
|
||||||
"Join": _eval_join,
|
"Join": _eval_join,
|
||||||
"LeftJoin": _eval_left_join,
|
"LeftJoin": _eval_left_join,
|
||||||
"Union": _eval_union,
|
"Union": _eval_union,
|
||||||
|
"Minus": _eval_minus,
|
||||||
"Filter": _eval_filter,
|
"Filter": _eval_filter,
|
||||||
"Distinct": _eval_distinct,
|
"Distinct": _eval_distinct,
|
||||||
"Reduced": _eval_reduced,
|
"Reduced": _eval_reduced,
|
||||||
|
|
|
||||||
|
|
@ -5,9 +5,15 @@ Evaluates rdflib algebra expression nodes against a solution (variable
|
||||||
binding) to produce a value or boolean result.
|
binding) to produce a value or boolean result.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import math
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import logging
|
import logging
|
||||||
import operator
|
import operator
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, date, timezone
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
from rdflib.term import Variable, URIRef, Literal, BNode
|
from rdflib.term import Variable, URIRef, Literal, BNode
|
||||||
from rdflib.plugins.sparql.parserutils import CompValue
|
from rdflib.plugins.sparql.parserutils import CompValue
|
||||||
|
|
@ -17,23 +23,31 @@ from . parser import rdflib_term_to_term
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_exists_callback = None
|
||||||
|
|
||||||
|
|
||||||
class ExpressionError(Exception):
|
class ExpressionError(Exception):
|
||||||
"""Raised when a SPARQL expression cannot be evaluated."""
|
"""Raised when a SPARQL expression cannot be evaluated."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def evaluate_expression(expr, solution):
|
def evaluate_expression(expr, solution, exists_cb=None):
|
||||||
"""
|
"""
|
||||||
Evaluate a SPARQL expression against a solution binding.
|
Evaluate a SPARQL expression against a solution binding.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
expr: rdflib algebra expression node
|
expr: rdflib algebra expression node
|
||||||
solution: dict mapping variable names to Term values
|
solution: dict mapping variable names to Term values
|
||||||
|
exists_cb: optional callback(graph_node, solution) -> bool for
|
||||||
|
EXISTS/NOT EXISTS evaluation; provided by algebra.py
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The result value (Term, bool, number, string, or None)
|
The result value (Term, bool, number, string, or None)
|
||||||
"""
|
"""
|
||||||
|
global _exists_callback
|
||||||
|
if exists_cb is not None:
|
||||||
|
_exists_callback = exists_cb
|
||||||
|
|
||||||
if expr is None:
|
if expr is None:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
@ -119,15 +133,7 @@ def _evaluate_comp_value(node, solution):
|
||||||
if name == "Function":
|
if name == "Function":
|
||||||
return _eval_function(node, solution)
|
return _eval_function(node, solution)
|
||||||
|
|
||||||
# Exists / NotExists
|
# Exists / NotExists — handled via _eval_builtin now
|
||||||
if name == "Builtin_EXISTS":
|
|
||||||
# EXISTS requires graph pattern evaluation - not handled here
|
|
||||||
logger.warning("EXISTS not supported in filter expressions")
|
|
||||||
return True
|
|
||||||
|
|
||||||
if name == "Builtin_NOTEXISTS":
|
|
||||||
logger.warning("NOT EXISTS not supported in filter expressions")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# TrueFilter (used with OPTIONAL)
|
# TrueFilter (used with OPTIONAL)
|
||||||
if name == "TrueFilter":
|
if name == "TrueFilter":
|
||||||
|
|
@ -335,6 +341,197 @@ def _eval_builtin(name, node, solution):
|
||||||
return val
|
return val
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if builtin == "YEAR":
|
||||||
|
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||||
|
return dt.year if dt is not None else None
|
||||||
|
|
||||||
|
if builtin == "MONTH":
|
||||||
|
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||||
|
return dt.month if dt is not None else None
|
||||||
|
|
||||||
|
if builtin == "DAY":
|
||||||
|
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||||
|
return dt.day if dt is not None else None
|
||||||
|
|
||||||
|
if builtin == "HOURS":
|
||||||
|
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||||
|
if dt is None:
|
||||||
|
return None
|
||||||
|
return dt.hour if isinstance(dt, datetime) else 0
|
||||||
|
|
||||||
|
if builtin == "MINUTES":
|
||||||
|
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||||
|
if dt is None:
|
||||||
|
return None
|
||||||
|
return dt.minute if isinstance(dt, datetime) else 0
|
||||||
|
|
||||||
|
if builtin == "SECONDS":
|
||||||
|
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||||
|
if dt is None:
|
||||||
|
return None
|
||||||
|
return dt.second if isinstance(dt, datetime) else 0
|
||||||
|
|
||||||
|
if builtin == "FLOOR":
|
||||||
|
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
return int(math.floor(val))
|
||||||
|
|
||||||
|
if builtin == "CEIL":
|
||||||
|
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
return int(math.ceil(val))
|
||||||
|
|
||||||
|
if builtin == "ABS":
|
||||||
|
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
return abs(val)
|
||||||
|
|
||||||
|
if builtin == "ROUND":
|
||||||
|
val = _to_numeric(evaluate_expression(node.arg, solution))
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
return round(val)
|
||||||
|
|
||||||
|
if builtin == "STRBEFORE":
|
||||||
|
string = _to_string(evaluate_expression(node.arg1, solution))
|
||||||
|
sep = _to_string(evaluate_expression(node.arg2, solution))
|
||||||
|
idx = string.find(sep)
|
||||||
|
if idx < 0:
|
||||||
|
return Term(type=LITERAL, value="")
|
||||||
|
return Term(type=LITERAL, value=string[:idx])
|
||||||
|
|
||||||
|
if builtin == "STRAFTER":
|
||||||
|
string = _to_string(evaluate_expression(node.arg1, solution))
|
||||||
|
sep = _to_string(evaluate_expression(node.arg2, solution))
|
||||||
|
idx = string.find(sep)
|
||||||
|
if idx < 0:
|
||||||
|
return Term(type=LITERAL, value="")
|
||||||
|
return Term(type=LITERAL, value=string[idx + len(sep):])
|
||||||
|
|
||||||
|
if builtin == "ENCODE_FOR_URI":
|
||||||
|
val = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
return Term(type=LITERAL, value=quote(val, safe=""))
|
||||||
|
|
||||||
|
if builtin == "REPLACE":
|
||||||
|
string = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
pattern = _to_string(evaluate_expression(node.pattern, solution))
|
||||||
|
replacement = _to_string(
|
||||||
|
evaluate_expression(node.replacement, solution)
|
||||||
|
)
|
||||||
|
flags_str = ""
|
||||||
|
if hasattr(node, "flags") and node.flags is not None:
|
||||||
|
flags_str = _to_string(evaluate_expression(node.flags, solution))
|
||||||
|
re_flags = 0
|
||||||
|
if "i" in flags_str:
|
||||||
|
re_flags |= re.IGNORECASE
|
||||||
|
if "m" in flags_str:
|
||||||
|
re_flags |= re.MULTILINE
|
||||||
|
if "s" in flags_str:
|
||||||
|
re_flags |= re.DOTALL
|
||||||
|
try:
|
||||||
|
result = re.sub(pattern, replacement, string, flags=re_flags)
|
||||||
|
return Term(type=LITERAL, value=result)
|
||||||
|
except re.error:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if builtin == "SUBSTR":
|
||||||
|
string = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
start = _to_numeric(evaluate_expression(node.start, solution))
|
||||||
|
if start is None:
|
||||||
|
return None
|
||||||
|
start_idx = max(int(start) - 1, 0)
|
||||||
|
if hasattr(node, "length") and node.length is not None:
|
||||||
|
length = _to_numeric(evaluate_expression(node.length, solution))
|
||||||
|
if length is None:
|
||||||
|
return None
|
||||||
|
return Term(
|
||||||
|
type=LITERAL, value=string[start_idx:start_idx + int(length)]
|
||||||
|
)
|
||||||
|
return Term(type=LITERAL, value=string[start_idx:])
|
||||||
|
|
||||||
|
if builtin == "EXISTS":
|
||||||
|
if _exists_callback is not None:
|
||||||
|
return _exists_callback(node.graph, solution)
|
||||||
|
logger.warning("EXISTS requires an exists_cb; not available")
|
||||||
|
return True
|
||||||
|
|
||||||
|
if builtin == "NOTEXISTS":
|
||||||
|
if _exists_callback is not None:
|
||||||
|
return not _exists_callback(node.graph, solution)
|
||||||
|
logger.warning("NOT EXISTS requires an exists_cb; not available")
|
||||||
|
return True
|
||||||
|
|
||||||
|
if builtin == "LANGMATCHES":
|
||||||
|
tag = _to_string(evaluate_expression(node.arg1, solution))
|
||||||
|
rng = _to_string(evaluate_expression(node.arg2, solution))
|
||||||
|
if rng == "*":
|
||||||
|
return len(tag) > 0
|
||||||
|
return tag.lower().startswith(rng.lower())
|
||||||
|
|
||||||
|
if builtin == "IRI" or builtin == "URI":
|
||||||
|
val = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
return Term(type=IRI, iri=val)
|
||||||
|
|
||||||
|
if builtin == "BNODE":
|
||||||
|
if hasattr(node, "arg") and node.arg is not None:
|
||||||
|
label = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
return Term(type=BLANK, id=label)
|
||||||
|
return Term(type=BLANK, id=str(uuid.uuid4()))
|
||||||
|
|
||||||
|
if builtin == "NOW":
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
return Term(
|
||||||
|
type=LITERAL,
|
||||||
|
value=now.strftime("%Y-%m-%dT%H:%M:%S%z"),
|
||||||
|
datatype="http://www.w3.org/2001/XMLSchema#dateTime",
|
||||||
|
)
|
||||||
|
|
||||||
|
if builtin == "TZ":
|
||||||
|
dt = _to_datetime(evaluate_expression(node.arg, solution))
|
||||||
|
if dt is None:
|
||||||
|
return Term(type=LITERAL, value="")
|
||||||
|
if dt.tzinfo is not None:
|
||||||
|
offset = dt.strftime("%z")
|
||||||
|
if offset:
|
||||||
|
return Term(type=LITERAL, value=offset[:3] + ":" + offset[3:])
|
||||||
|
return Term(type=LITERAL, value="")
|
||||||
|
|
||||||
|
if builtin == "RAND":
|
||||||
|
return random.random()
|
||||||
|
|
||||||
|
if builtin == "UUID":
|
||||||
|
return Term(type=IRI, iri="urn:uuid:" + str(uuid.uuid4()))
|
||||||
|
|
||||||
|
if builtin == "STRUUID":
|
||||||
|
return Term(type=LITERAL, value=str(uuid.uuid4()))
|
||||||
|
|
||||||
|
if builtin == "MD5":
|
||||||
|
val = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
return Term(
|
||||||
|
type=LITERAL, value=hashlib.md5(val.encode()).hexdigest()
|
||||||
|
)
|
||||||
|
|
||||||
|
if builtin == "SHA1":
|
||||||
|
val = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
return Term(
|
||||||
|
type=LITERAL, value=hashlib.sha1(val.encode()).hexdigest()
|
||||||
|
)
|
||||||
|
|
||||||
|
if builtin == "SHA256":
|
||||||
|
val = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
return Term(
|
||||||
|
type=LITERAL, value=hashlib.sha256(val.encode()).hexdigest()
|
||||||
|
)
|
||||||
|
|
||||||
|
if builtin == "SHA512":
|
||||||
|
val = _to_string(evaluate_expression(node.arg, solution))
|
||||||
|
return Term(
|
||||||
|
type=LITERAL, value=hashlib.sha512(val.encode()).hexdigest()
|
||||||
|
)
|
||||||
|
|
||||||
if builtin == "sameTerm":
|
if builtin == "sameTerm":
|
||||||
left = evaluate_expression(node.arg1, solution)
|
left = evaluate_expression(node.arg1, solution)
|
||||||
right = evaluate_expression(node.arg2, solution)
|
right = evaluate_expression(node.arg2, solution)
|
||||||
|
|
@ -454,6 +651,27 @@ def _to_numeric(val):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _to_datetime(val):
|
||||||
|
"""Convert a value to a date or datetime object."""
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
s = _to_string(val)
|
||||||
|
for fmt in (
|
||||||
|
"%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z",
|
||||||
|
"%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M",
|
||||||
|
"%Y-%m-%d",
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(s, fmt)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
return datetime.fromisoformat(s)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _comparable_value(val):
|
def _comparable_value(val):
|
||||||
"""
|
"""
|
||||||
Convert a value to a form suitable for comparison.
|
Convert a value to a form suitable for comparison.
|
||||||
|
|
|
||||||
|
|
@ -150,6 +150,30 @@ def left_join(left, right, filter_fn=None):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def minus(left, right):
|
||||||
|
"""
|
||||||
|
MINUS operation: remove left solutions that are compatible with any
|
||||||
|
right solution sharing at least one variable.
|
||||||
|
"""
|
||||||
|
if not right:
|
||||||
|
return list(left)
|
||||||
|
|
||||||
|
right_vars = set()
|
||||||
|
for sol in right:
|
||||||
|
right_vars.update(sol.keys())
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for sol_l in left:
|
||||||
|
shared = set(sol_l.keys()) & right_vars
|
||||||
|
if not shared:
|
||||||
|
results.append(sol_l)
|
||||||
|
continue
|
||||||
|
if not any(_compatible(sol_l, sol_r) for sol_r in right):
|
||||||
|
results.append(sol_l)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def union(left, right):
|
def union(left, right):
|
||||||
"""Union two solution sequences (concatenation)."""
|
"""Union two solution sequences (concatenation)."""
|
||||||
return list(left) + list(right)
|
return list(left) + list(right)
|
||||||
|
|
@ -177,6 +201,28 @@ def distinct(solutions):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _sort_comparable(val):
|
||||||
|
"""Convert a value to a form suitable for sort ordering."""
|
||||||
|
if val is None:
|
||||||
|
return (0, "")
|
||||||
|
if isinstance(val, (int, float)):
|
||||||
|
return (2, val)
|
||||||
|
if isinstance(val, Term):
|
||||||
|
if val.type == LITERAL:
|
||||||
|
try:
|
||||||
|
if "." in val.value:
|
||||||
|
return (2, float(val.value))
|
||||||
|
return (2, int(val.value))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
return (3, val.value)
|
||||||
|
elif val.type == IRI:
|
||||||
|
return (4, val.iri)
|
||||||
|
elif val.type == BLANK:
|
||||||
|
return (5, val.id)
|
||||||
|
return (6, str(val))
|
||||||
|
|
||||||
|
|
||||||
def order_by(solutions, key_fns):
|
def order_by(solutions, key_fns):
|
||||||
"""
|
"""
|
||||||
Sort solutions by the given key functions.
|
Sort solutions by the given key functions.
|
||||||
|
|
@ -191,14 +237,7 @@ def order_by(solutions, key_fns):
|
||||||
keys = []
|
keys = []
|
||||||
for fn, ascending in key_fns:
|
for fn, ascending in key_fns:
|
||||||
val = fn(sol)
|
val = fn(sol)
|
||||||
# Convert to comparable form
|
keys.append(_sort_comparable(val))
|
||||||
if val is None:
|
|
||||||
comparable = ("", "")
|
|
||||||
elif isinstance(val, Term):
|
|
||||||
comparable = _term_key(val)
|
|
||||||
else:
|
|
||||||
comparable = ("v", str(val))
|
|
||||||
keys.append(comparable)
|
|
||||||
return keys
|
return keys
|
||||||
|
|
||||||
# Handle ascending/descending
|
# Handle ascending/descending
|
||||||
|
|
@ -224,10 +263,8 @@ def _mixed_sort(solutions, key_fns):
|
||||||
|
|
||||||
def compare(a, b):
|
def compare(a, b):
|
||||||
for fn, ascending in key_fns:
|
for fn, ascending in key_fns:
|
||||||
va = fn(a)
|
ka = _sort_comparable(fn(a))
|
||||||
vb = fn(b)
|
kb = _sort_comparable(fn(b))
|
||||||
ka = _term_key(va) if isinstance(va, Term) else ("v", str(va)) if va is not None else ("", "")
|
|
||||||
kb = _term_key(vb) if isinstance(vb, Term) else ("v", str(vb)) if vb is not None else ("", "")
|
|
||||||
|
|
||||||
if ka < kb:
|
if ka < kb:
|
||||||
return -1 if ascending else 1
|
return -1 if ascending else 1
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue