test(engine): cover cycle/self-loop termination + nested anti-join (C5 edge cases)

- variable_hops_terminate_and_dedup_on_cycle: a 3-cycle a->b->c->a traversed with
  knows{1,5} (ceiling above the cycle length) terminates and emits each node once
  (the c->a back-edge hits the seeded source); both_modes confirms indexed == csr.
  Uses a bounded range deliberately — unbounded {1,} is a typecheck error, not a
  runtime path.
- variable_hops_handle_self_loop: a->a self-loop does not loop forever and does
  not re-emit the seeded source.
- nested_anti_join_double_negation: not { worksAt; not { name = Acme } } recurses
  through execute_pipeline, yielding [Alice,Charlie,Diana] (people with no non-Acme
  employer) — distinct from plain unemployed [Charlie,Diana].
This commit is contained in:
Ragnor Comerford 2026-06-09 14:06:37 +02:00
parent 052dbcb69a
commit 1348685ff4
No known key found for this signature in database
2 changed files with 93 additions and 0 deletions

View file

@ -46,6 +46,45 @@ query not_at_acme() {
assert_eq!(names_vec, vec!["Bob", "Charlie", "Diana"]);
}
// Nested anti-join (double negation): proves `not { … not { … } }` recurses
// through execute_pipeline. "People who do NOT work at any NON-Acme company":
// inner `not { $c.name = "Acme" }` keeps the non-Acme employers, the outer `not`
// removes anyone who has one. Alice (Acme only), Charlie & Diana (no employer)
// remain — distinct from plain unemployed {Charlie, Diana}.
#[tokio::test]
async fn nested_anti_join_double_negation() {
let dir = tempfile::tempdir().unwrap();
let mut db = init_and_load(&dir).await;
let queries = r#"
query no_nonacme_employer() {
match {
$p: Person
not {
$p worksAt $c
not {
$c.name = "Acme"
}
}
}
return { $p.name }
}
"#;
let result = query_main(&mut db, queries, "no_nonacme_employer", &ParamMap::new())
.await
.unwrap();
let batch = result.concat_batches().unwrap();
let names = batch
.column(0)
.as_any()
.downcast_ref::<StringArray>()
.unwrap();
let mut names_vec: Vec<&str> = (0..names.len()).map(|i| names.value(i)).collect();
names_vec.sort();
assert_eq!(names_vec, vec!["Alice", "Charlie", "Diana"]);
}
// ─── Variable-length hops ───────────────────────────────────────────────────
const CHAIN_SCHEMA: &str = r#"

View file

@ -233,3 +233,57 @@ query reach($name: String) {
result means the id-string collision bled across types"
);
}
const REACH_5: &str = r#"
query reach($name: String) {
match {
$p: Person { name: $name }
$p knows{1,5} $f
}
return { $f.name }
}
"#;
// A directed 3-cycle a->b->c->a, traversed with a hop ceiling (5) ABOVE the cycle
// length. Variable-length traversal must terminate and dedup (the source is
// seeded into `visited`, so the c->a back-edge does not re-emit a). Uses a
// bounded range deliberately: an unbounded `{1,}` is a typecheck error, not a
// runtime path. `both_modes` also confirms indexed == csr on the cycle.
#[tokio::test]
#[serial]
async fn variable_hops_terminate_and_dedup_on_cycle() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let data = r#"{"type":"Person","data":{"name":"a"}}
{"type":"Person","data":{"name":"b"}}
{"type":"Person","data":{"name":"c"}}
{"edge":"Knows","from":"a","to":"b"}
{"edge":"Knows","from":"b","to":"c"}
{"edge":"Knows","from":"c","to":"a"}"#;
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap();
let got = both_modes(&mut db, REACH_5, "reach", &params(&[("$name", "a")])).await;
// From a: b (1 hop), c (2 hops); the c->a back-edge hits the seeded source
// and is not re-emitted. No infinite loop, each node at most once.
assert_eq!(got, vec!["b", "c"]);
}
// A self-loop a->a plus a->b. Variable-length traversal must not loop forever and
// must not re-emit the seeded source.
#[tokio::test]
#[serial]
async fn variable_hops_handle_self_loop() {
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let data = r#"{"type":"Person","data":{"name":"a"}}
{"type":"Person","data":{"name":"b"}}
{"edge":"Knows","from":"a","to":"a"}
{"edge":"Knows","from":"a","to":"b"}"#;
let mut db = Omnigraph::init(uri, TEST_SCHEMA).await.unwrap();
load_jsonl(&mut db, data, LoadMode::Overwrite).await.unwrap();
let got = both_modes(&mut db, REACH_5, "reach", &params(&[("$name", "a")])).await;
// a->a hits the seeded source (pruned); only b is reached.
assert_eq!(got, vec!["b"]);
}