From ec38e2f9c776e12c3d3580df8e86c2763b0c9c2c Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Tue, 9 Jun 2026 13:02:15 +0200 Subject: [PATCH] test(engine): regression for cross-type id-collision in indexed traversal A node id is unique only within a type, so a Person and a Company can share an id string. A variable-length traversal over a cross-type edge (WorksAt) must structurally stop after one hop. This test builds a graph where 'shared' is both a Person and a Company id and asserts worksAt{1,2} returns only the one-hop company. It fails today: the indexed path's single string interner de-interns the hop-1 Company id back to the colliding Person id and runs a hop-2 scan that matches that Person's edges, emitting a spurious second-hop company (indexed ["other","shared"] vs csr ["shared"]). --- crates/omnigraph/tests/traversal_indexed.rs | 50 +++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/crates/omnigraph/tests/traversal_indexed.rs b/crates/omnigraph/tests/traversal_indexed.rs index ecfab3d..ab68ff1 100644 --- a/crates/omnigraph/tests/traversal_indexed.rs +++ b/crates/omnigraph/tests/traversal_indexed.rs @@ -14,6 +14,7 @@ mod helpers; use arrow_array::{Array, StringArray}; use omnigraph::db::Omnigraph; +use omnigraph::loader::{LoadMode, load_jsonl}; use omnigraph::table_store::{IndexCoverage, TableStore}; use omnigraph_compiler::ir::ParamMap; use serial_test::serial; @@ -183,3 +184,52 @@ async fn indexed_finds_unindexed_appended_edge() { "indexed traversal must see the freshly-appended, unindexed edge" ); } + +// Regression: a node `id` is unique only WITHIN a type, so a `Person` and a +// `Company` can share an id string. A variable-length traversal over a +// cross-type edge (`worksAt`, Person -> Company) must structurally stop after +// one hop — a Company is not a `worksAt` source — so `worksAt{1,2}` returns +// exactly the one-hop companies. Before the structural hop-cap, the indexed +// path's single string interner de-interned the hop-1 Company id back to the +// colliding Person id and ran a hop-2 `worksAt src IN (...)` scan that matched +// that same-string Person's edges, emitting a spurious second-hop company the +// CSR path never produces. `both_modes` (csr == indexed == auto) plus the +// golden assert catch both the divergence and an over-emitting shared bug. +#[tokio::test] +#[serial] +async fn cross_type_id_collision_does_not_bleed_into_second_hop() { + const SCHEMA: &str = r#" +node Person { name: String @key } +node Company { name: String @key } +edge WorksAt: Person -> Company +"#; + // `shared` is BOTH a Person id and a Company id. alice worksAt the Company + // `shared`; the Person `shared` worksAt the Company `other`. + const DATA: &str = r#"{"type":"Person","data":{"name":"alice"}} +{"type":"Person","data":{"name":"shared"}} +{"type":"Company","data":{"name":"shared"}} +{"type":"Company","data":{"name":"other"}} +{"edge":"WorksAt","from":"alice","to":"shared"} +{"edge":"WorksAt","from":"shared","to":"other"}"#; + const QUERY: &str = r#" +query reach($name: String) { + match { + $p: Person { name: $name } + $p worksAt{1,2} $c + } + return { $c.name } +} +"#; + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let mut db = Omnigraph::init(uri, SCHEMA).await.unwrap(); + load_jsonl(&mut db, DATA, LoadMode::Overwrite).await.unwrap(); + + let got = both_modes(&mut db, QUERY, "reach", ¶ms(&[("$name", "alice")])).await; + assert_eq!( + got, + vec!["shared"], + "cross-type worksAt{{1,2}} must return only the one-hop company; a hop-2 \ + result means the id-string collision bled across types" + ); +}