diff --git a/crates/omnigraph/tests/traversal_indexed.rs b/crates/omnigraph/tests/traversal_indexed.rs index ecfab3d..ab68ff1 100644 --- a/crates/omnigraph/tests/traversal_indexed.rs +++ b/crates/omnigraph/tests/traversal_indexed.rs @@ -14,6 +14,7 @@ mod helpers; use arrow_array::{Array, StringArray}; use omnigraph::db::Omnigraph; +use omnigraph::loader::{LoadMode, load_jsonl}; use omnigraph::table_store::{IndexCoverage, TableStore}; use omnigraph_compiler::ir::ParamMap; use serial_test::serial; @@ -183,3 +184,52 @@ async fn indexed_finds_unindexed_appended_edge() { "indexed traversal must see the freshly-appended, unindexed edge" ); } + +// Regression: a node `id` is unique only WITHIN a type, so a `Person` and a +// `Company` can share an id string. A variable-length traversal over a +// cross-type edge (`worksAt`, Person -> Company) must structurally stop after +// one hop — a Company is not a `worksAt` source — so `worksAt{1,2}` returns +// exactly the one-hop companies. Before the structural hop-cap, the indexed +// path's single string interner de-interned the hop-1 Company id back to the +// colliding Person id and ran a hop-2 `worksAt src IN (...)` scan that matched +// that same-string Person's edges, emitting a spurious second-hop company the +// CSR path never produces. `both_modes` (csr == indexed == auto) plus the +// golden assert catch both the divergence and an over-emitting shared bug. +#[tokio::test] +#[serial] +async fn cross_type_id_collision_does_not_bleed_into_second_hop() { + const SCHEMA: &str = r#" +node Person { name: String @key } +node Company { name: String @key } +edge WorksAt: Person -> Company +"#; + // `shared` is BOTH a Person id and a Company id. alice worksAt the Company + // `shared`; the Person `shared` worksAt the Company `other`. + const DATA: &str = r#"{"type":"Person","data":{"name":"alice"}} +{"type":"Person","data":{"name":"shared"}} +{"type":"Company","data":{"name":"shared"}} +{"type":"Company","data":{"name":"other"}} +{"edge":"WorksAt","from":"alice","to":"shared"} +{"edge":"WorksAt","from":"shared","to":"other"}"#; + const QUERY: &str = r#" +query reach($name: String) { + match { + $p: Person { name: $name } + $p worksAt{1,2} $c + } + return { $c.name } +} +"#; + let dir = tempfile::tempdir().unwrap(); + let uri = dir.path().to_str().unwrap(); + let mut db = Omnigraph::init(uri, SCHEMA).await.unwrap(); + load_jsonl(&mut db, DATA, LoadMode::Overwrite).await.unwrap(); + + let got = both_modes(&mut db, QUERY, "reach", ¶ms(&[("$name", "alice")])).await; + assert_eq!( + got, + vec!["shared"], + "cross-type worksAt{{1,2}} must return only the one-hop company; a hop-2 \ + result means the id-string collision bled across types" + ); +}