fix(engine): cap cross-type hops in the Expand cost model

gather_cost_inputs fed the requested max_hops into choose_expand_mode even though execute_expand_indexed runs at most one hop for a cross-type edge. So a cross-type variable-length expand (e.g. worksAt{1,5}) had its indexed cost scaled by 5 while only one hop runs, skewing the chooser toward CSR (an unnecessary whole-graph build) near the crossover. Results were unaffected (modes are equivalent); this is a plan-accuracy fix. Add cost_effective_hops(requested, same_type) — caps to 1 for cross-type — and apply it in gather_cost_inputs so the estimate matches what executes. Unit test covers the cap and the crossover consequence (capped 1 hop stays indexed where the requested 5 would have flipped to CSR).
2026-06-21 02:28:07 +02:00 · 2026-06-09 15:12:06 +02:00 · 2026-06-09 15:12:06 +02:00 · bdf31afb4c
commit bdf31afb4c
parent 5cca914822
1 changed files with 33 additions and 0 deletions
--- a/crates/omnigraph/src/exec/query.rs
+++ b/crates/omnigraph/src/exec/query.rs
@ -869,6 +869,18 @@ fn choose_expand_mode(i: &ExpandCostInputs) -> ExpandMode {
    }
 }

+/// Hops the indexed path will actually run, for cost-model purposes. A cross-type
+/// edge cannot chain, so `execute_expand_indexed` caps it at one hop regardless of
+/// the requested range; the cost model must use that, or it over-estimates the
+/// indexed cost of a cross-type variable-length expand and skews toward CSR.
+fn cost_effective_hops(requested_max_hops: u32, same_type: bool) -> u32 {
+    if same_type {
+        requested_max_hops
+    } else {
+        requested_max_hops.min(1)
+    }
+}
+
 /// Gather the cost-model inputs from cheap manifest counts. `None` when the
 /// edge type, its source node type, or their manifest entries are absent (e.g.
 /// a not-yet-materialized table) — the caller then falls back to the legacy
@ -884,6 +896,10 @@ fn gather_cost_inputs(
 ) -> Option<ExpandCostInputs> {
    let edge_entry = snapshot.entry(&format!("edge:{}", edge_type))?;
    let edge_def = catalog.edge_types.get(edge_type)?;
+    // Match the indexed path's cross-type one-hop cap so the cost estimate
+    // reflects what actually runs (see `cost_effective_hops`).
+    let effective_max_hops =
+        cost_effective_hops(effective_max_hops, edge_def.from_type == edge_def.to_type);
    // The frontier source vertices are the keyed endpoint's type: `from` for an
    // Out traversal (keyed on `src`), `to` for In (keyed on `dst`).
    let src_type = match direction {
@ -2226,4 +2242,21 @@ mod expand_chooser_tests {
        i.csr_cached = true;
        assert_eq!(choose_expand_mode(&i), ExpandMode::Csr);
    }
+
+    #[test]
+    fn cost_model_caps_cross_type_hops() {
+        // Same-type passes the requested range through; cross-type caps at 1,
+        // matching execute_expand_indexed.
+        assert_eq!(cost_effective_hops(5, true), 5);
+        assert_eq!(cost_effective_hops(5, false), 1);
+        assert_eq!(cost_effective_hops(1, false), 1);
+
+        // Consequence: a selective frontier where the requested 5 hops would
+        // (wrongly) flip cross-type to CSR, but the capped 1 hop — what actually
+        // runs — keeps it indexed.
+        let mut i = inputs(50, 10_000, 100, cost_effective_hops(5, false), IndexCoverage::Indexed);
+        assert_eq!(choose_expand_mode(&i), ExpandMode::IndexedScan);
+        i.effective_max_hops = 5; // as if the cross-type cap were not applied
+        assert_eq!(choose_expand_mode(&i), ExpandMode::Csr);
+    }
 }