From 1498ed821e6c381e07403e6d839b4db0ddf32686 Mon Sep 17 00:00:00 2001 From: aaltshuler Date: Tue, 16 Jun 2026 01:14:01 +0300 Subject: [PATCH] fix(embedding): from_parts honors an explicit model for the mock provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `EmbeddingConfig::from_parts(Some("mock"), _, Some(model), _)` short-circuited to `Self::mock()` and silently dropped the provided `model`, falling back to `OMNIGRAPH_EMBED_MODEL`. A cluster `embeddings` profile that sets `provider: mock, model: X` (RFC-012 Phase 5) would therefore not resolve to X — the query-time same-space check (`exec/query.rs`) compares the recorded model against the resolved one, so a dropped model makes that check fire on the wrong value (a spurious mismatch, or env-dependent). Mock vectors are model-independent so this is not silent cross-space ranking, but it breaks the contract `from_parts` documents ("model defaults exactly as from_env does") and the cluster path that Phase 5 wiring will feed it. Honor an explicit `model` for mock; fall back to `mock()`'s env-based model only when none is supplied. Regression: `from_parts_mock_honors_an_explicit_model` (red before this change). Addresses the bot-review finding on #248. Co-Authored-By: Claude Opus 4.8 --- crates/omnigraph/src/embedding.rs | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/crates/omnigraph/src/embedding.rs b/crates/omnigraph/src/embedding.rs index 0210fe2..2af6167 100644 --- a/crates/omnigraph/src/embedding.rs +++ b/crates/omnigraph/src/embedding.rs @@ -113,7 +113,16 @@ impl EmbeddingConfig { api_key: String, ) -> Result { if provider == Some("mock") { - return Ok(Self::mock()); + // An explicit `model` (e.g. a cluster `embeddings` profile) is + // authoritative — it is what the same-space check compares against — + // so honor it; fall back to `mock()`'s env-based model only when the + // caller supplied none. Without this, a profile's `model` is silently + // dropped and the same-space check resolves to OMNIGRAPH_EMBED_MODEL. + let mut config = Self::mock(); + if let Some(model) = model { + config.model = model; + } + return Ok(config); } let (provider, default_base, default_model, _key_envs) = provider_profile(provider)?; let base_url = base_url @@ -939,6 +948,25 @@ mod tests { ); } + #[test] + #[serial] + fn from_parts_mock_honors_an_explicit_model() { + // A cluster `embeddings` profile that sets `provider: mock, model: X` + // must resolve to model X — it is what the query-time same-space check + // compares against. Env cleared so the assertion isolates the arg. + let _guard = cleared_env(&[]); + let pinned = + EmbeddingConfig::from_parts(Some("mock"), None, Some("recorded-x".to_string()), String::new()) + .unwrap(); + assert_eq!(pinned.provider, Provider::Mock); + assert_eq!(pinned.model, "recorded-x"); + // With no explicit model, mock falls back to its env-based default (here + // empty, since the env is cleared). + let bare = EmbeddingConfig::from_parts(Some("mock"), None, None, String::new()).unwrap(); + assert_eq!(bare.provider, Provider::Mock); + assert_eq!(bare.model, ""); + } + #[test] #[serial] fn from_env_openai_compatible_prefers_openrouter_key() {