Add comprehensive tests from morphological matrix analysis

Unit tests covering gaps identified by systematic matrix of:
topology (fan-out, fan-in, cycle) × deferral × filter type × direction.

New unit tests:
- fan-out: one root fans to two deferred destinations via different edges
- fan-in: two sources converge on one destination via reverse expand
- cycle: deferred binding + genuine cycle-closing on return edge
- multiple filters on single deferred binding (name + age)
- param filter on deferred binding (IRExpr::Param in dst_filters)
- negation with inner binding (documents current NodeScan+cycle-close behavior)

New integration tests:
- fan-out projection (friend × company cross-product per source)
- deferred filter matching nothing (empty result propagation)
- negation with inner destination binding filter

Also: guard anti-join fast path against non-empty dst_filters. The bulk
CSR existence check only tests neighbor existence, not destination
properties — it must fall back to the slow path when dst_filters are
present to avoid false negatives.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ragnor Comerford 2026-04-13 15:31:08 +02:00
parent 3461aa123d
commit 6e43ceac08
No known key found for this signature in database
3 changed files with 321 additions and 0 deletions

View file

@ -1019,4 +1019,221 @@ query q() {
if src_var == "p" && dst_var == "_"
));
}
/// Fan-out: one root fans to two deferred destinations via different edges.
#[test]
fn test_lower_fan_out_topology() {
let catalog = setup();
let qf = parse_query(
r#"
query q() {
match {
$p: Person { name: "Alice" }
$p knows $f
$f: Person { name: "Bob" }
$p worksAt $c
$c: Company { name: "Acme" }
}
return { $f.name, $c.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
// Root: $p. Deferred: $f, $c (both reachable from $p).
assert_eq!(ir.pipeline.len(), 3);
assert!(matches!(&ir.pipeline[0], IROp::NodeScan { variable, .. } if variable == "p"));
assert!(matches!(
&ir.pipeline[1],
IROp::Expand { src_var, dst_var, dst_filters, .. }
if src_var == "p" && dst_var == "f" && dst_filters.len() == 1
));
assert!(matches!(
&ir.pipeline[2],
IROp::Expand { src_var, dst_var, dst_filters, .. }
if src_var == "p" && dst_var == "c" && dst_filters.len() == 1
));
}
/// Fan-in: two sources converge on one destination; second source is
/// introduced via reverse expand from the shared destination.
#[test]
fn test_lower_fan_in_topology() {
let catalog = setup();
let qf = parse_query(
r#"
query q() {
match {
$a: Person { name: "Alice" }
$a knows $c
$b: Person { name: "Bob" }
$b knows $c
$c: Person
}
return { $a.name, $b.name, $c.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
// Root: $a (first in component {a,b,c}). Deferred: $b, $c.
// $a knows $c: expand(a→c). $b knows $c: reverse expand(c→b).
assert_eq!(ir.pipeline.len(), 3);
assert!(matches!(&ir.pipeline[0], IROp::NodeScan { variable, .. } if variable == "a"));
assert!(matches!(
&ir.pipeline[1],
IROp::Expand { src_var, dst_var, dst_filters, .. }
if src_var == "a" && dst_var == "c" && dst_filters.is_empty()
));
assert!(matches!(
&ir.pipeline[2],
IROp::Expand { src_var, dst_var, dst_filters, .. }
if src_var == "c" && dst_var == "b" && dst_filters.len() == 1
));
}
/// Genuine graph cycle: deferred binding is introduced by first traversal,
/// second traversal triggers cycle-closing.
#[test]
fn test_lower_cycle_with_deferred_binding() {
let catalog = setup();
let qf = parse_query(
r#"
query q() {
match {
$a: Person
$a knows $b
$b: Person { name: "Bob" }
$b knows $a
}
return { $a.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
// $b is deferred, introduced by first expand.
// Second traversal ($b knows $a) is genuine cycle-closing.
assert_eq!(ir.pipeline.len(), 4);
assert!(matches!(&ir.pipeline[0], IROp::NodeScan { variable, .. } if variable == "a"));
assert!(matches!(
&ir.pipeline[1],
IROp::Expand { src_var, dst_var, dst_filters, .. }
if src_var == "a" && dst_var == "b" && dst_filters.len() == 1
));
// Cycle-closing expand to __temp_a
assert!(matches!(
&ir.pipeline[2],
IROp::Expand { src_var, dst_var, dst_filters, .. }
if src_var == "b" && dst_var.starts_with("__temp_") && dst_filters.is_empty()
));
// Cycle-closing filter: __temp_a.id == a.id
assert!(matches!(&ir.pipeline[3], IROp::Filter(_)));
}
/// Multiple filters on a single deferred binding.
#[test]
fn test_lower_multiple_filters_on_deferred_binding() {
let catalog = setup();
let qf = parse_query(
r#"
query q() {
match {
$p: Person
$p knows $f
$f: Person { name: "Bob", age: 25 }
}
return { $f.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
// Two prop_matches → two dst_filters on the Expand.
assert_eq!(ir.pipeline.len(), 2);
assert!(matches!(
&ir.pipeline[1],
IROp::Expand { dst_filters, .. }
if dst_filters.len() == 2
));
}
/// Parameter in a deferred binding filter (unit test level).
#[test]
fn test_lower_param_filter_on_deferred_binding() {
let catalog = setup();
let qf = parse_query(
r#"
query q($company: String) {
match {
$p: Person
$p worksAt $c
$c: Company { name: $company }
}
return { $p.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
assert_eq!(ir.pipeline.len(), 2);
assert!(matches!(
&ir.pipeline[1],
IROp::Expand { dst_filters, .. }
if dst_filters.len() == 1
));
// The filter's right-hand side should be a Param, not a Literal
if let IROp::Expand { dst_filters, .. } = &ir.pipeline[1] {
assert!(matches!(&dst_filters[0].right, IRExpr::Param(name) if name == "company"));
}
}
/// Negation with inner binding: inner binding is NOT deferred because
/// bound_vars (from outer scope) is not in binding_set for the inner call.
/// This documents current behavior — the inner pipeline uses a NodeScan +
/// cycle-closing, which is correct but less efficient than deferral.
#[test]
fn test_lower_negation_with_inner_binding() {
let catalog = setup();
let qf = parse_query(
r#"
query q() {
match {
$p: Person
not {
$p worksAt $c
$c: Company { name: "Acme" }
}
}
return { $p.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
// Outer: NodeScan($p) + AntiJoin
assert_eq!(ir.pipeline.len(), 2);
assert!(matches!(&ir.pipeline[0], IROp::NodeScan { variable, .. } if variable == "p"));
let IROp::AntiJoin { inner, .. } = &ir.pipeline[1] else {
panic!("expected AntiJoin");
};
// Inner pipeline: $c is NOT deferred (it's the only binding in the
// inner scope), so it gets a NodeScan + cycle-closing (3 ops).
assert_eq!(inner.len(), 3);
assert!(matches!(&inner[0], IROp::NodeScan { variable, .. } if variable == "c"));
assert!(matches!(&inner[1], IROp::Expand { .. }));
assert!(matches!(&inner[2], IROp::Filter(_)));
}
}

View file

@ -901,6 +901,7 @@ fn try_bulk_anti_join_mask(
src_var,
edge_type,
direction,
dst_filters,
..
} = &inner_pipeline[0]
else {
@ -909,6 +910,11 @@ fn try_bulk_anti_join_mask(
if src_var != outer_var {
return None;
}
// Bulk CSR check only tests neighbor existence, not destination
// properties. Fall back to the slow path when dst_filters are present.
if !dst_filters.is_empty() {
return None;
}
let gi = graph_index?;
let edge_def = catalog.edge_types.get(edge_type.as_str())?;

View file

@ -613,3 +613,101 @@ query at_company($company: String) {
assert_eq!(person.value(0), "Bob");
assert_eq!(company.value(0), "Globex");
}
/// Fan-out: one source expanded to two different destination types.
/// Each (friend, company) pair should be a cross-product per source row.
#[tokio::test]
async fn fan_out_two_destinations() {
let dir = tempfile::tempdir().unwrap();
let mut db = init_and_load(&dir).await;
let queries = r#"
query fan_out($name: String) {
match {
$p: Person { name: $name }
$p knows $f
$p worksAt $c
}
return { $f.name, $c.name }
}
"#;
// Alice knows Bob and Charlie, works at Acme.
// Each friend paired with her company → 2 rows.
let result = query_main(
&mut db,
queries,
"fan_out",
&params(&[("$name", "Alice")]),
)
.await
.unwrap();
let batch = result.concat_batches().unwrap();
assert_eq!(batch.num_rows(), 2);
let friends = batch.column(0).as_any().downcast_ref::<StringArray>().unwrap();
let companies = batch.column(1).as_any().downcast_ref::<StringArray>().unwrap();
let mut pairs: Vec<(&str, &str)> = (0..batch.num_rows())
.map(|i| (friends.value(i), companies.value(i)))
.collect();
pairs.sort();
assert_eq!(pairs, vec![("Bob", "Acme"), ("Charlie", "Acme")]);
}
/// Deferred destination filter that matches nothing → empty result.
#[tokio::test]
async fn traversal_destination_filter_no_match() {
let dir = tempfile::tempdir().unwrap();
let mut db = init_and_load(&dir).await;
let queries = r#"
query at_phantom() {
match {
$p: Person
$p worksAt $c
$c: Company { name: "NonExistent" }
}
return { $p.name }
}
"#;
let result = query_main(&mut db, queries, "at_phantom", &ParamMap::new())
.await
.unwrap();
assert_eq!(result.num_rows(), 0);
}
/// Negation with inner destination binding filter.
/// "People who do NOT work at Acme" — uses binding syntax inside negation.
#[tokio::test]
async fn negation_with_inner_destination_binding() {
let dir = tempfile::tempdir().unwrap();
let mut db = init_and_load(&dir).await;
let queries = r#"
query not_at_acme_binding() {
match {
$p: Person
not {
$p worksAt $c
$c: Company { name: "Acme" }
}
}
return { $p.name }
}
"#;
// Alice→Acme. Everyone else should be returned.
let result = query_main(&mut db, queries, "not_at_acme_binding", &ParamMap::new())
.await
.unwrap();
let batch = result.concat_batches().unwrap();
let names = batch
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut names_vec: Vec<&str> = (0..names.len()).map(|i| names.value(i)).collect();
names_vec.sort();
assert_eq!(names_vec, vec!["Bob", "Charlie", "Diana"]);
}