test(engine): regression for cross-type id-collision in indexed traversal

A node id is unique only within a type, so a Person and a Company can share an
id string. A variable-length traversal over a cross-type edge (WorksAt) must
structurally stop after one hop. This test builds a graph where 'shared' is both
a Person and a Company id and asserts worksAt{1,2} returns only the one-hop
company. It fails today: the indexed path's single string interner de-interns
the hop-1 Company id back to the colliding Person id and runs a hop-2 scan that
matches that Person's edges, emitting a spurious second-hop company (indexed
["other","shared"] vs csr ["shared"]).
This commit is contained in:
Ragnor Comerford 2026-06-09 13:02:15 +02:00
parent f81f046543
commit ec38e2f9c7
No known key found for this signature in database

View file

@ -14,6 +14,7 @@ mod helpers;
use arrow_array::{Array, StringArray};
use omnigraph::db::Omnigraph;
use omnigraph::loader::{LoadMode, load_jsonl};
use omnigraph::table_store::{IndexCoverage, TableStore};
use omnigraph_compiler::ir::ParamMap;
use serial_test::serial;
@ -183,3 +184,52 @@ async fn indexed_finds_unindexed_appended_edge() {
"indexed traversal must see the freshly-appended, unindexed edge"
);
}
// Regression: a node `id` is unique only WITHIN a type, so a `Person` and a
// `Company` can share an id string. A variable-length traversal over a
// cross-type edge (`worksAt`, Person -> Company) must structurally stop after
// one hop — a Company is not a `worksAt` source — so `worksAt{1,2}` returns
// exactly the one-hop companies. Before the structural hop-cap, the indexed
// path's single string interner de-interned the hop-1 Company id back to the
// colliding Person id and ran a hop-2 `worksAt src IN (...)` scan that matched
// that same-string Person's edges, emitting a spurious second-hop company the
// CSR path never produces. `both_modes` (csr == indexed == auto) plus the
// golden assert catch both the divergence and an over-emitting shared bug.
#[tokio::test]
#[serial]
async fn cross_type_id_collision_does_not_bleed_into_second_hop() {
const SCHEMA: &str = r#"
node Person { name: String @key }
node Company { name: String @key }
edge WorksAt: Person -> Company
"#;
// `shared` is BOTH a Person id and a Company id. alice worksAt the Company
// `shared`; the Person `shared` worksAt the Company `other`.
const DATA: &str = r#"{"type":"Person","data":{"name":"alice"}}
{"type":"Person","data":{"name":"shared"}}
{"type":"Company","data":{"name":"shared"}}
{"type":"Company","data":{"name":"other"}}
{"edge":"WorksAt","from":"alice","to":"shared"}
{"edge":"WorksAt","from":"shared","to":"other"}"#;
const QUERY: &str = r#"
query reach($name: String) {
match {
$p: Person { name: $name }
$p worksAt{1,2} $c
}
return { $c.name }
}
"#;
let dir = tempfile::tempdir().unwrap();
let uri = dir.path().to_str().unwrap();
let mut db = Omnigraph::init(uri, SCHEMA).await.unwrap();
load_jsonl(&mut db, DATA, LoadMode::Overwrite).await.unwrap();
let got = both_modes(&mut db, QUERY, "reach", &params(&[("$name", "alice")])).await;
assert_eq!(
got,
vec!["shared"],
"cross-type worksAt{{1,2}} must return only the one-hop company; a hop-2 \
result means the id-string collision bled across types"
);
}