From f6a0e537370047425c7c0948f25c0735cd773ecb Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Tue, 9 Jun 2026 13:06:05 +0200 Subject: [PATCH] fix(engine): structurally cap cross-type Expand at one hop A cross-type edge cannot chain (e.g. a Company is not a WorksAt source), so a variable-length traversal over one is structurally single-hop. Both traversal paths now enforce this by capping max hops at 1 when from_type != to_type, instead of relying on the hop-2 scan returning empty. That reliance was a correctness hole on the indexed path: it interns every endpoint string into one dense id space, so a cross-type id-string collision (a Person and a Company sharing an id) let hop 2 de-intern a destination id back to the colliding source-type id and match its edges, emitting rows the CSR path never produces. With the cap the cross-type second-hop scan never runs, so the shared interner can no longer alias across types. Turns the regression test green (indexed == csr == ["shared"]). --- crates/omnigraph/src/exec/query.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/crates/omnigraph/src/exec/query.rs b/crates/omnigraph/src/exec/query.rs index 435fec0..d69c03f 100644 --- a/crates/omnigraph/src/exec/query.rs +++ b/crates/omnigraph/src/exec/query.rs @@ -1121,6 +1121,14 @@ async fn execute_expand_indexed( let (key_col, opp_col) = endpoint_columns(direction); let max = max_hops.unwrap_or(min_hops.max(1)); + // Cross-type edges cannot chain (a Company is not a `WorksAt` source), so a + // variable-length traversal over one is structurally single-hop. Enforce it + // here instead of relying on the hop-2 scan returning empty: this BFS interns + // every endpoint string into ONE dense id space, so a cross-type id-string + // collision (a Person and a Company sharing an id) would otherwise let hop 2 + // de-intern a destination id back to the colliding source-type id and match + // its edges, emitting rows the CSR path never produces. + let max = if same_type { max } else { max.min(1) }; // Per-source BFS state in DENSE id space: intern node ids to u32 once via a // per-traversal interner so visited/seen/frontier/neighbor-map avoid string @@ -1365,6 +1373,9 @@ async fn execute_expand_csr( let max = max_hops.unwrap_or(min_hops.max(1)); let same_type = src_type_name == dst_type_name; + // Cross-type edges cannot chain; a variable-length traversal over one is + // structurally single-hop (mirrors the indexed path's guarantee). + let max = if same_type { max } else { max.min(1) }; // BFS to collect (src_row_idx, dst_dense) pairs with per-source dedup. // Dense u32 ids stay in hand through BFS, dedup, and align — we only