diff --git a/AGENTS.md b/AGENTS.md index 97ab51f..8894278 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -80,7 +80,7 @@ Full diagram and concurrency model: [docs/dev/architecture.md](docs/dev/architec | Mutations — insert/update/delete, D2, atomicity | [docs/user/mutations/index.md](docs/user/mutations/index.md) | | Search funcs (`nearest`/`bm25`/`rrf`), hybrid ranking | [docs/user/search/index.md](docs/user/search/index.md) | | Indexes (BTREE / inverted / vector / graph topology) | [docs/user/search/indexes.md](docs/user/search/indexes.md) | -| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/user/search/embeddings.md](docs/user/search/embeddings.md) | +| Embeddings (engine client, env vars, `@embed`) | [docs/user/search/embeddings.md](docs/user/search/embeddings.md) | | Concepts — what OmniGraph is, L1/L2 framing | [docs/user/concepts/index.md](docs/user/concepts/index.md) | | Quickstart — init → load → query → branch | [docs/user/quickstart.md](docs/user/quickstart.md) | | Branches, commit graph, system branches | [docs/user/branching/index.md](docs/user/branching/index.md) | diff --git a/Cargo.lock b/Cargo.lock index 33dd652..2419e9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4915,12 +4915,10 @@ dependencies = [ "arrow-select", "pest", "pest_derive", - "reqwest 0.12.28", "serde", "serde_json", "sha2 0.10.9", "thiserror", - "tokio", ] [[package]] diff --git a/crates/omnigraph-cli/src/embed.rs b/crates/omnigraph-cli/src/embed.rs index 2e1c6d9..a0603b7 100644 --- a/crates/omnigraph-cli/src/embed.rs +++ b/crates/omnigraph-cli/src/embed.rs @@ -9,8 +9,6 @@ use omnigraph::embedding::EmbeddingClient; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value, json}; -const DEFAULT_EMBED_MODEL: &str = "gemini-embedding-2-preview"; - #[derive(Debug, Args, Clone)] pub(crate) struct EmbedArgs { /// Seed manifest path @@ -85,8 +83,6 @@ impl EmbedMode { #[derive(Debug, Clone, Deserialize)] struct EmbedSpec { - #[serde(default = "default_embed_model")] - model: String, dimension: usize, types: BTreeMap, } @@ -180,13 +176,6 @@ pub(crate) fn resolve_embed_job(args: &EmbedArgs) -> Result { (input, output, spec) }; - if spec.model != DEFAULT_EMBED_MODEL { - bail!( - "only {} is supported for explicit seed embeddings right now", - DEFAULT_EMBED_MODEL - ); - } - Ok(EmbedJob { input, output, @@ -305,7 +294,14 @@ pub(crate) async fn run_embed_job(job: &EmbedJob) -> Result { cleaned_rows, mode: job.mode.as_str(!job.selectors.is_empty()), dimension: job.spec.dimension, - model: job.spec.model.clone(), + // The embedding model is resolved solely from the provider config; the + // spec carries no model field, so there is no second source of truth to + // silently disagree with the API. Report what was actually used (empty + // for `--clean`, which builds no client). + model: client + .as_ref() + .map(|c| c.config().model.clone()) + .unwrap_or_default(), }) } @@ -315,10 +311,6 @@ fn temp_output_path(output: &Path) -> PathBuf { PathBuf::from(temp) } -fn default_embed_model() -> String { - DEFAULT_EMBED_MODEL.to_string() -} - fn load_embed_spec(path: &Path) -> Result { Ok(serde_json::from_str(&fs::read_to_string(path)?)?) } diff --git a/crates/omnigraph-cli/src/output.rs b/crates/omnigraph-cli/src/output.rs index d0f5add..d6903f4 100644 --- a/crates/omnigraph-cli/src/output.rs +++ b/crates/omnigraph-cli/src/output.rs @@ -696,9 +696,24 @@ pub(crate) fn render_constraint(constraint: &omnigraph_compiler::schema::ast::Co pub(crate) fn render_annotations(annotations: &[omnigraph_compiler::schema::ast::Annotation]) -> String { annotations .iter() - .map(|annotation| match &annotation.value { - Some(value) => format!("@{}({})", annotation.name, value), - None => format!("@{}", annotation.name), + .map(|annotation| { + let mut args: Vec = Vec::new(); + // Values are parsed via `decode_string_literal` (quotes stripped), so + // re-quote them as string literals on render — otherwise a value with + // non-ident chars (e.g. `model=openai/text-embedding-3-large`) fails to + // round-trip back through the schema parser (`annotation_kwarg` wants a + // quoted `literal`, not a bare token). + if let Some(value) = &annotation.value { + args.push(format!("\"{}\"", value)); + } + for (key, val) in &annotation.kwargs { + args.push(format!("{}=\"{}\"", key, val)); + } + if args.is_empty() { + format!("@{}", annotation.name) + } else { + format!("@{}({})", annotation.name, args.join(", ")) + } }) .collect::>() .join(", ") @@ -970,3 +985,43 @@ pub(crate) fn resolve_table_render_options() -> ReadRenderOptions { cell_layout: operator.defaults.table_cell_layout.unwrap_or_default(), } } + +#[cfg(test)] +mod tests { + use omnigraph_compiler::schema::ast::Annotation; + use omnigraph_compiler::schema::parser::parse_schema; + use std::collections::BTreeMap; + + use super::render_annotations; + + #[test] + fn render_annotations_quotes_values_so_embed_round_trips() { + let mut kwargs = BTreeMap::new(); + kwargs.insert( + "model".to_string(), + "openai/text-embedding-3-large".to_string(), + ); + let embed = Annotation { + name: "embed".to_string(), + value: Some("title".to_string()), + kwargs, + }; + + let rendered = render_annotations(std::slice::from_ref(&embed)); + assert_eq!( + rendered, + r#"@embed("title", model="openai/text-embedding-3-large")"# + ); + + // The bug: an unquoted `model=openai/text-embedding-3-large` is not a + // valid `annotation_kwarg` literal, so `schema show` output did not + // re-parse. The rendered form must round-trip through the grammar. + let schema = format!("node Doc {{\ntitle: String\nembedding: Vector(3) {rendered}\n}}\n"); + let parsed = parse_schema(&schema); + assert!( + parsed.is_ok(), + "rendered @embed must re-parse: {:?}", + parsed.err() + ); + } +} diff --git a/crates/omnigraph-cli/tests/system_local.rs b/crates/omnigraph-cli/tests/system_local.rs index e971076..e9e4b2f 100644 --- a/crates/omnigraph-cli/tests/system_local.rs +++ b/crates/omnigraph-cli/tests/system_local.rs @@ -1036,6 +1036,11 @@ query vector_search($q: String) { let result = parse_stdout_json(&output_success( cli() + // Stored vectors above were produced with gemini-embedding-2-preview; + // pin the query-time embedder to the same provider/model so the + // auto-embedded `$q` lands in the same vector space. + .env("OMNIGRAPH_EMBED_PROVIDER", "gemini") + .env("OMNIGRAPH_EMBED_MODEL", "gemini-embedding-2-preview") .arg("read") .arg("--store") .arg(&graph) diff --git a/crates/omnigraph-cluster/src/config.rs b/crates/omnigraph-cluster/src/config.rs index acc954d..d0e0edd 100644 --- a/crates/omnigraph-cluster/src/config.rs +++ b/crates/omnigraph-cluster/src/config.rs @@ -42,7 +42,12 @@ pub(crate) fn resolve_query_decls( return ( map.iter() .map(|(name, config)| { - (name.clone(), QueryConfig { file: config.file.clone() }) + ( + name.clone(), + QueryConfig { + file: config.file.clone(), + }, + ) }) .collect(), BTreeMap::new(), @@ -66,7 +71,10 @@ pub(crate) fn resolve_query_decls( diagnostics.push(Diagnostic::error( "query_dir_unreadable", format!("graphs.{graph_id}.queries"), - format!("could not list query directory '{}': {err}", resolved.display()), + format!( + "could not list query directory '{}': {err}", + resolved.display() + ), )); continue; } @@ -76,7 +84,10 @@ pub(crate) fn resolve_query_decls( diagnostics.push(Diagnostic::warning( "query_dir_empty", format!("graphs.{graph_id}.queries"), - format!("query directory '{}' contains no .gq files", resolved.display()), + format!( + "query directory '{}' contains no .gq files", + resolved.display() + ), )); } for path in entries { @@ -132,7 +143,12 @@ pub(crate) fn resolve_query_decls( continue; } origin.insert(name.clone(), declared.clone()); - registry.insert(name, QueryConfig { file: declared.clone() }); + registry.insert( + name, + QueryConfig { + file: declared.clone(), + }, + ); } contents.insert(declared, source); } @@ -269,8 +285,6 @@ pub(crate) fn validate_cluster_header( } } - - pub(crate) fn state_resource_digests(state: &ClusterState) -> BTreeMap { state .applied_revision @@ -295,7 +309,6 @@ pub(crate) fn initial_import_state(desired: &DesiredCluster) -> ClusterState { } } - pub(crate) async fn observe_declared_graphs( desired: &DesiredCluster, backend: &ClusterStore, @@ -350,19 +363,28 @@ pub(crate) async fn observe_declared_graphs( StateResource { digest: observation.schema_digest.clone(), applies_to: None, + embedding_provider: None, + embedding_profile: None, }, ); let query_digests = state_query_digests_for_graph(state, &graph.id); + let embedding_provider = state_graph_embedding_provider(state, &graph.id); + let embedding_provider_digest = + state_embedding_provider_digest(state, embedding_provider.as_deref()); let graph_digest_value = graph_digest( &graph.id, Some(&observation.schema_digest), Some(&query_digests), + embedding_provider.as_deref(), + embedding_provider_digest.as_ref(), ); state.applied_revision.resources.insert( graph_address.clone(), StateResource { digest: graph_digest_value, applies_to: None, + embedding_provider, + embedding_profile: None, }, ); state.observations.insert( @@ -499,7 +521,6 @@ pub(crate) fn graph_observation_json(observation: GraphObservationJson<'_>) -> s }) } - pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome { let parsed = parse_cluster_config(config_dir); let config_dir = parsed.config_dir; @@ -519,6 +540,35 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome { let mut dependencies = BTreeSet::new(); let mut graph_query_digests: BTreeMap> = BTreeMap::new(); let mut graph_schema_digests: BTreeMap = BTreeMap::new(); + let mut graph_embedding_providers: BTreeMap = BTreeMap::new(); + let mut embedding_provider_digests: BTreeMap = BTreeMap::new(); + let mut embedding_providers: BTreeMap = BTreeMap::new(); + + for (provider_name, profile) in &raw.providers.embedding { + validate_id( + "embedding provider name", + &format!("providers.embedding.{provider_name}"), + provider_name, + &mut diagnostics, + ); + let address = embedding_provider_address(provider_name); + profile.validate( + format!("providers.embedding.{provider_name}"), + &mut diagnostics, + ); + let digest = embedding_provider_digest(profile); + embedding_provider_digests.insert(address.clone(), digest.clone()); + embedding_providers.insert(address.clone(), profile.clone()); + resources.insert( + address.clone(), + ResourceSummary { + address, + kind: "embedding_provider".to_string(), + digest, + path: None, + }, + ); + } for (graph_id, graph) in &raw.graphs { validate_id( @@ -533,6 +583,35 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome { from: schema_address.clone(), to: graph_address.clone(), }); + if let Some(provider_ref) = graph.embedding_provider.as_deref() { + match normalize_embedding_provider_target(provider_ref) { + EmbeddingProviderTarget::Provider(provider_name) => { + let provider_address = embedding_provider_address(&provider_name); + if raw.providers.embedding.contains_key(&provider_name) { + dependencies.insert(Dependency { + from: graph_address.clone(), + to: provider_address.clone(), + }); + graph_embedding_providers.insert(graph_id.clone(), provider_address); + } else { + diagnostics.push(Diagnostic::error( + "dangling_embedding_provider_reference", + format!("graphs.{graph_id}.embedding_provider"), + format!( + "graph references embedding provider `{provider_name}`, but no providers.embedding.{provider_name} profile is declared" + ), + )); + } + } + EmbeddingProviderTarget::WrongKind(kind) => diagnostics.push(Diagnostic::error( + "wrong_kind_reference", + format!("graphs.{graph_id}.embedding_provider"), + format!( + "embedding_provider expects a providers.embedding ref or bare provider name, got `{kind}`" + ), + )), + } + } let schema_path = resolve_config_path(&config_dir, &graph.schema); let schema_source = match fs::read_to_string(&schema_path) { @@ -646,10 +725,15 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome { } for graph_id in raw.graphs.keys() { + let embedding_provider = graph_embedding_providers.get(graph_id); + let embedding_provider_digest = + embedding_provider.and_then(|address| embedding_provider_digests.get(address)); let digest = graph_digest( graph_id, graph_schema_digests.get(graph_id), graph_query_digests.get(graph_id), + embedding_provider.map(String::as_str), + embedding_provider_digest, ); resources.insert( graph_address(graph_id), @@ -754,6 +838,7 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome { .get(graph_id) .cloned() .unwrap_or_default(), + embedding_provider: graph_embedding_providers.get(graph_id).cloned(), }) .collect(); let config_digest = desired_config_digest(&raw, &resource_digests); @@ -769,6 +854,7 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome { resources: resource_list, dependencies, policy_bindings, + embedding_providers, }), diagnostics, config_dir, @@ -828,7 +914,6 @@ pub(crate) fn future_field_diagnostics(text: &str) -> Vec { let future_fields = [ "apply", "env_file", - "providers", "pipelines", "embeddings", "ui", @@ -882,6 +967,21 @@ pub(crate) fn normalize_policy_target(value: &str) -> PolicyTarget { } } +enum EmbeddingProviderTarget { + Provider(String), + WrongKind(String), +} + +fn normalize_embedding_provider_target(value: &str) -> EmbeddingProviderTarget { + if let Some(name) = value.strip_prefix("provider.embedding.") { + EmbeddingProviderTarget::Provider(name.to_string()) + } else if value.contains('.') { + EmbeddingProviderTarget::WrongKind(value.to_string()) + } else { + EmbeddingProviderTarget::Provider(value.to_string()) + } +} + pub(crate) fn graph_address(graph_id: &str) -> String { format!("graph.{graph_id}") } @@ -898,6 +998,10 @@ pub(crate) fn policy_address(policy_name: &str) -> String { format!("policy.{policy_name}") } +pub(crate) fn embedding_provider_address(provider_name: &str) -> String { + format!("provider.embedding.{provider_name}") +} + pub(crate) fn resolve_config_path(config_dir: &Path, path: &Path) -> PathBuf { if path.is_absolute() { path.to_path_buf() diff --git a/crates/omnigraph-cluster/src/diff.rs b/crates/omnigraph-cluster/src/diff.rs index 593b2fa..516a86e 100644 --- a/crates/omnigraph-cluster/src/diff.rs +++ b/crates/omnigraph-cluster/src/diff.rs @@ -152,7 +152,9 @@ pub(crate) fn approved_resources( let candidates: Vec<&ApprovalArtifact> = artifacts .iter() .map(|(_, artifact)| artifact) - .filter(|artifact| artifact.consumed_at.is_none() && artifact.resource == change.resource) + .filter(|artifact| { + artifact.consumed_at.is_none() && artifact.resource == change.resource + }) .collect(); if candidates.is_empty() { continue; @@ -181,6 +183,7 @@ pub(crate) enum ResourceKind { Schema(String), Query { graph: String, name: String }, Policy(String), + EmbeddingProvider(String), Unknown, } @@ -199,6 +202,8 @@ pub(crate) fn resource_kind(address: &str) -> ResourceKind { } } else if let Some(name) = address.strip_prefix("policy.") { ResourceKind::Policy(name.to_string()) + } else if let Some(name) = address.strip_prefix("provider.embedding.") { + ResourceKind::EmbeddingProvider(name.to_string()) } else { ResourceKind::Unknown } @@ -261,8 +266,7 @@ pub(crate) fn classify_changes( let (disposition, reason) = match resource_kind(&change.resource) { ResourceKind::Schema(graph) => match change.operation { PlanOperation::Create - if graph_creates.contains(&graph) - && !pending_recovery.contains(&graph) => + if graph_creates.contains(&graph) && !pending_recovery.contains(&graph) => { // Applied with the graph create — the init carries it. (ApplyDisposition::Applied, None) @@ -325,10 +329,7 @@ pub(crate) fn classify_changes( if pending_recovery.contains(&graph) { (ApplyDisposition::Blocked, Some("cluster_recovery_pending")) } else if schema_pending.contains(&graph) { - ( - ApplyDisposition::Blocked, - Some("dependency_not_applied"), - ) + (ApplyDisposition::Blocked, Some("dependency_not_applied")) } else { // A graph create in the same plan no longer blocks: // creates execute first in the same apply run. @@ -353,9 +354,8 @@ pub(crate) fn classify_changes( } } }, - ResourceKind::Unknown => { - (ApplyDisposition::Deferred, Some("apply_unsupported_kind")) - } + ResourceKind::EmbeddingProvider(_) => (ApplyDisposition::Applied, None), + ResourceKind::Unknown => (ApplyDisposition::Deferred, Some("apply_unsupported_kind")), }; change.disposition = Some(disposition); change.reason = reason.map(str::to_string); diff --git a/crates/omnigraph-cluster/src/lib.rs b/crates/omnigraph-cluster/src/lib.rs index 32abe66..1c4e4fc 100644 --- a/crates/omnigraph-cluster/src/lib.rs +++ b/crates/omnigraph-cluster/src/lib.rs @@ -20,18 +20,35 @@ use ulid::Ulid; pub mod failpoints; mod config; -mod types; mod diff; mod serve; -mod sweep; mod store; +mod sweep; +mod types; +use config::{ + QueriesDecl, future_field_diagnostics, graph_address, initial_import_state, load_desired, + normalize_policy_target, observe_declared_graphs, observe_live_graph, parse_cluster_config, + policy_address, preview_schema_migration, query_address, resolve_config_path, + resolve_query_decls, schema_address, state_resource_digests, validate_cluster_header, + validate_id, validate_query_source, +}; +use diff::{ + FailedGraphOrigin, ResourceKind, append_policy_binding_changes, approved_resources, + classify_changes, compute_approvals, compute_blast_radius, demote_dependents_of_failed_graphs, + diff_resources, resource_kind, +}; +pub use serve::{ + ServingGraph, ServingPolicy, ServingQuery, ServingSnapshot, cluster_graph_ids, + cluster_root_for_graph_uri, read_serving_snapshot, read_serving_snapshot_from_storage, + resolve_graph_storage_uri, +}; use store::{ClusterStore, StateLockGuard, StateSnapshot}; +use sweep::{ + mark_approvals_consumed, record_approval_consumed, sweep_recovery_sidecars, + tombstone_graph_subtree, warn_pending_recovery_sidecars, +}; pub use types::*; use types::*; -pub use serve::{ServingGraph, ServingPolicy, ServingQuery, ServingSnapshot, cluster_graph_ids, cluster_root_for_graph_uri, read_serving_snapshot, read_serving_snapshot_from_storage, resolve_graph_storage_uri}; -use config::{QueriesDecl, observe_declared_graphs, validate_cluster_header, future_field_diagnostics, initial_import_state, observe_live_graph, preview_schema_migration, state_resource_digests, graph_address, policy_address, query_address, schema_address, load_desired, normalize_policy_target, parse_cluster_config, resolve_config_path, resolve_query_decls, validate_id, validate_query_source}; -use diff::{FailedGraphOrigin, ResourceKind, append_policy_binding_changes, approved_resources, classify_changes, compute_approvals, compute_blast_radius, demote_dependents_of_failed_graphs, diff_resources, resource_kind}; -use sweep::{mark_approvals_consumed, record_approval_consumed, sweep_recovery_sidecars, tombstone_graph_subtree, warn_pending_recovery_sidecars}; pub const CLUSTER_CONFIG_FILE: &str = "cluster.yaml"; pub const CLUSTER_GRAPHS_DIR: &str = "graphs"; @@ -44,10 +61,7 @@ pub const CLUSTER_APPROVALS_DIR: &str = "__cluster/approvals"; /// The store for a load outcome: the declared `storage:` root when present, /// the config directory itself otherwise. A bad root is a loud error. -fn store_for( - config_dir: &Path, - storage_root: Option<&str>, -) -> Result { +fn store_for(config_dir: &Path, storage_root: Option<&str>) -> Result { match storage_root { Some(root) => ClusterStore::for_storage_root(root), None => Ok(ClusterStore::for_config_dir(config_dir)), @@ -179,7 +193,12 @@ pub async fn plan_config_dir(config_dir: impl AsRef) -> PlanOutput { &desired.config_digest, &mut diagnostics, ); - classify_changes(&mut changes, &desired.dependencies, &BTreeSet::new(), &approved); + classify_changes( + &mut changes, + &desired.dependencies, + &BTreeSet::new(), + &approved, + ); // Embed real migration steps for schema updates so plan is a data-aware // preview; failures degrade to the digest diff with a warning. @@ -282,9 +301,7 @@ pub async fn apply_config_dir_with_options( ok: !has_errors(&diagnostics), config_dir, actor: actor_for_output.clone(), - desired_revision: DesiredRevision { - config_digest, - }, + desired_revision: DesiredRevision { config_digest }, state_observations: observations, changes, applied_count: 0, @@ -464,8 +481,7 @@ pub async fn apply_config_dir_with_options( failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::GraphCreate); continue; } - let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) - else { + let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) else { continue; }; let graph_uri = backend.graph_root(graph_id); @@ -604,8 +620,7 @@ pub async fn apply_config_dir_with_options( failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply); continue; } - let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) - else { + let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) else { continue; }; let graph_uri = backend.graph_root(graph_id); @@ -955,8 +970,10 @@ pub async fn apply_config_dir_with_options( .expect("create/update always carries an after digest"), // Policies record their applied bindings so the // ledger is serving-sufficient (RFC-005 §D3). - applies_to: desired - .policy_bindings + applies_to: desired.policy_bindings.get(&change.resource).cloned(), + embedding_provider: None, + embedding_profile: desired + .embedding_providers .get(&change.resource) .cloned(), }, @@ -964,7 +981,10 @@ pub async fn apply_config_dir_with_options( set_resource_status_applied(&mut new_state, &change.resource); } PlanOperation::Delete => { - new_state.applied_revision.resources.remove(&change.resource); + new_state + .applied_revision + .resources + .remove(&change.resource); new_state.resource_statuses.remove(&change.resource); } }, @@ -1219,7 +1239,6 @@ pub async fn approve_config_dir( } } - pub async fn status_config_dir(config_dir: impl AsRef) -> StatusOutput { let parsed = parse_cluster_config(config_dir.as_ref()); let mut diagnostics = parsed.diagnostics; @@ -1238,7 +1257,9 @@ pub async fn status_config_dir(config_dir: impl AsRef) -> StatusOutput { } }; let mut observations = backend.observations(); - backend.observe_lock(&mut observations, &mut diagnostics).await; + backend + .observe_lock(&mut observations, &mut diagnostics) + .await; warn_pending_recovery_sidecars(&parsed.config_dir, &mut diagnostics); let mut resource_digests = BTreeMap::new(); @@ -1254,9 +1275,7 @@ pub async fn status_config_dir(config_dir: impl AsRef) -> StatusOutput { // Read-only point-in-time catalog check: report the // findings as diagnostics; persisting Drifted statuses // is refresh's job. Status never writes state. - for (address, finding) in - verify_catalog_payloads(&backend, &state).await - { + for (address, finding) in verify_catalog_payloads(&backend, &state).await { diagnostics.push(payload_finding_diagnostic(&address, &finding)); } resource_digests = state_resource_digests(&state); @@ -1312,7 +1331,10 @@ pub async fn force_unlock_config_dir( if let Some(raw) = parsed.raw.as_ref() { let _settings = validate_cluster_header(raw, &mut diagnostics); if !has_errors(&diagnostics) { - match backend.force_unlock(lock_id.as_ref(), &mut observations).await { + match backend + .force_unlock(lock_id.as_ref(), &mut observations) + .await + { Ok(()) => lock_removed = true, Err(diagnostic) => diagnostics.push(diagnostic), } @@ -1380,7 +1402,10 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St let operation_label = state_sync_operation_label(operation); let _lock_guard = if desired.state_lock { - match backend.acquire_lock(operation_label, &mut observations).await { + match backend + .acquire_lock(operation_label, &mut observations) + .await + { Ok(guard) => Some(guard), Err(diagnostic) => { diagnostics.push(diagnostic); @@ -1542,7 +1567,10 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St state.state_revision = state.state_revision.saturating_add(1); } - match backend.write_state(&state, expected_cas.as_deref(), &mut observations).await { + match backend + .write_state(&state, expected_cas.as_deref(), &mut observations) + .await + { Ok(()) => { // Completed sweep sidecars are deleted only after their outcome // is durably recorded; on failure they stay and re-sweep. @@ -1569,9 +1597,6 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St } } - - - #[derive(Debug, PartialEq, Eq)] enum PayloadFinding { Missing, @@ -1650,7 +1675,10 @@ async fn write_resource_payload( Diagnostic::error( "resource_payload_write_error", resource, - format!("could not read resource source '{}': {err}", source.display()), + format!( + "could not read resource source '{}': {err}", + source.display() + ), ) })?; if sha256_hex(&bytes) != expected_digest { @@ -1692,7 +1720,11 @@ async fn write_resource_payload( fn recompute_state_graph_digests(state: &mut ClusterState, desired: &DesiredCluster) { for graph in &desired.graphs { let graph_address = graph_address(&graph.id); - if !state.applied_revision.resources.contains_key(&graph_address) { + if !state + .applied_revision + .resources + .contains_key(&graph_address) + { continue; } let schema_digest = state @@ -1701,11 +1733,26 @@ fn recompute_state_graph_digests(state: &mut ClusterState, desired: &DesiredClus .get(&schema_address(&graph.id)) .map(|resource| resource.digest.clone()); let query_digests = state_query_digests_for_graph(state, &graph.id); - let digest = graph_digest(&graph.id, schema_digest.as_ref(), Some(&query_digests)); - state - .applied_revision - .resources - .insert(graph_address, StateResource { digest, applies_to: None }); + let embedding_provider = graph.embedding_provider.as_deref(); + let embedding_provider_digest = embedding_provider + .and_then(|address| state.applied_revision.resources.get(address)) + .map(|resource| resource.digest.clone()); + let digest = graph_digest( + &graph.id, + schema_digest.as_ref(), + Some(&query_digests), + embedding_provider, + embedding_provider_digest.as_ref(), + ); + state.applied_revision.resources.insert( + graph_address, + StateResource { + digest, + applies_to: None, + embedding_provider: graph.embedding_provider.clone(), + embedding_profile: None, + }, + ); } } @@ -1773,7 +1820,6 @@ fn duplicate_key_diagnostics(text: &str) -> Vec { diagnostics } - fn strip_comment(line: &str) -> String { let mut in_single_quote = false; let mut in_double_quote = false; @@ -1796,7 +1842,6 @@ fn strip_comment(line: &str) -> String { line.to_string() } - fn state_query_digests_for_graph(state: &ClusterState, graph_id: &str) -> BTreeMap { let prefix = format!("query.{graph_id}."); state @@ -1811,6 +1856,23 @@ fn state_query_digests_for_graph(state: &ClusterState, graph_id: &str) -> BTreeM .collect() } +fn state_graph_embedding_provider(state: &ClusterState, graph_id: &str) -> Option { + state + .applied_revision + .resources + .get(&graph_address(graph_id)) + .and_then(|resource| resource.embedding_provider.clone()) +} + +fn state_embedding_provider_digest( + state: &ClusterState, + embedding_provider: Option<&str>, +) -> Option { + embedding_provider + .and_then(|address| state.applied_revision.resources.get(address)) + .map(|resource| resource.digest.clone()) +} + fn set_resource_status_applied(state: &mut ClusterState, address: &str) { state.resource_statuses.insert( address.to_string(), @@ -1843,6 +1905,8 @@ fn graph_digest( graph_id: &str, schema_digest: Option<&String>, query_digests: Option<&BTreeMap>, + embedding_provider: Option<&str>, + embedding_provider_digest: Option<&String>, ) -> String { let mut input = format!( "graph\0{graph_id}\0schema\0{}\0", @@ -1857,6 +1921,21 @@ fn graph_digest( input.push('\0'); } } + if let Some(provider) = embedding_provider { + input.push_str("embedding_provider\0"); + input.push_str(provider); + input.push('\0'); + input.push_str(embedding_provider_digest.map_or("", String::as_str)); + input.push('\0'); + } + sha256_hex(input.as_bytes()) +} + +fn embedding_provider_digest(profile: &EmbeddingProviderConfig) -> String { + let mut input = String::from("embedding-provider\0"); + let config_semantics = + serde_json::to_string(profile).expect("embedding provider config must serialize"); + input.push_str(&config_semantics); sha256_hex(input.as_bytes()) } @@ -1930,7 +2009,6 @@ fn display_path(path: &Path) -> String { path.display().to_string() } - #[cfg(test)] #[path = "tests.rs"] mod tests; diff --git a/crates/omnigraph-cluster/src/serve.rs b/crates/omnigraph-cluster/src/serve.rs index d0c67b4..6f89e2d 100644 --- a/crates/omnigraph-cluster/src/serve.rs +++ b/crates/omnigraph-cluster/src/serve.rs @@ -8,6 +8,7 @@ use super::*; pub struct ServingGraph { pub graph_id: String, pub root: PathBuf, + pub embedding: Option, } /// One stored query: its graph binding, registry name, and verified source. @@ -225,15 +226,73 @@ async fn read_snapshot_with_store( return Err(diagnostics); }; + let mut embedding_profiles: BTreeMap = BTreeMap::new(); + for (address, entry) in &state.applied_revision.resources { + if !matches!(resource_kind(address), ResourceKind::EmbeddingProvider(_)) { + continue; + } + let Some(profile) = entry.embedding_profile.clone() else { + diagnostics.push(Diagnostic::error( + "embedding_provider_profile_missing", + address.clone(), + "no applied embedding provider profile recorded; re-run `cluster apply` to backfill", + )); + continue; + }; + let actual_digest = embedding_provider_digest(&profile); + if actual_digest != entry.digest { + diagnostics.push(Diagnostic::error( + "embedding_provider_digest_mismatch", + address.clone(), + format!( + "applied embedding provider profile does not match its recorded digest (actual sha256:{actual_digest}); run `cluster refresh` then `cluster apply`, and restart" + ), + )); + continue; + } + embedding_profiles.insert(address.clone(), profile); + } + let mut graphs = Vec::new(); let mut queries = Vec::new(); let mut policies = Vec::new(); for (address, entry) in &state.applied_revision.resources { match resource_kind(address) { ResourceKind::Graph(graph_id) => { + let embedding = match entry.embedding_provider.as_deref() { + Some(provider_address) => match resource_kind(provider_address) { + ResourceKind::EmbeddingProvider(_) => { + match embedding_profiles.get(provider_address) { + Some(profile) => Some(profile.clone()), + None => { + diagnostics.push(Diagnostic::error( + "embedding_provider_missing", + address.clone(), + format!( + "graph references `{provider_address}`, but no applied embedding provider profile is available; re-run `cluster apply`" + ), + )); + None + } + } + } + _ => { + diagnostics.push(Diagnostic::error( + "wrong_kind_reference", + address.clone(), + format!( + "graph embedding_provider expects `provider.embedding.`, got `{provider_address}`" + ), + )); + None + } + }, + None => None, + }; graphs.push(ServingGraph { root: PathBuf::from(backend.graph_root(&graph_id)), graph_id, + embedding, }); } ResourceKind::Schema(_) => {} @@ -241,7 +300,10 @@ async fn read_snapshot_with_store( let ResourceKind::Query { graph, name } = &kind else { unreachable!() }; - match backend.read_verified_payload(&kind, &entry.digest, address).await { + match backend + .read_verified_payload(&kind, &entry.digest, address) + .await + { Ok(source) => queries.push(ServingQuery { graph_id: graph.clone(), name: name.clone(), @@ -262,7 +324,10 @@ async fn read_snapshot_with_store( )); continue; }; - match backend.read_verified_payload(&kind, &entry.digest, address).await { + match backend + .read_verified_payload(&kind, &entry.digest, address) + .await + { Ok(source) => policies.push(ServingPolicy { name: name.clone(), source, @@ -271,6 +336,7 @@ async fn read_snapshot_with_store( Err(diagnostic) => diagnostics.push(diagnostic), } } + ResourceKind::EmbeddingProvider(_) => {} ResourceKind::Unknown => {} } } @@ -338,4 +404,3 @@ mod tests { ); } } - diff --git a/crates/omnigraph-cluster/src/sweep.rs b/crates/omnigraph-cluster/src/sweep.rs index 7aecb01..6539cae 100644 --- a/crates/omnigraph-cluster/src/sweep.rs +++ b/crates/omnigraph-cluster/src/sweep.rs @@ -19,13 +19,29 @@ pub(crate) async fn sweep_recovery_sidecars( for (path, sidecar) in backend.list_recovery_sidecars(diagnostics).await { match sidecar.kind { RecoverySidecarKind::GraphCreate => { - sweep_graph_create_sidecar(backend, path, sidecar, state, diagnostics, &mut outcome).await; + sweep_graph_create_sidecar( + backend, + path, + sidecar, + state, + diagnostics, + &mut outcome, + ) + .await; } RecoverySidecarKind::SchemaApply => { sweep_schema_apply_sidecar(path, sidecar, state, diagnostics, &mut outcome).await; } RecoverySidecarKind::GraphDelete => { - sweep_graph_delete_sidecar(backend, path, sidecar, state, diagnostics, &mut outcome).await; + sweep_graph_delete_sidecar( + backend, + path, + sidecar, + state, + diagnostics, + &mut outcome, + ) + .await; } } } @@ -71,15 +87,30 @@ pub(crate) async fn sweep_graph_create_sidecar( StateResource { digest: live_digest.clone(), applies_to: None, + embedding_provider: None, + embedding_profile: None, }, ); let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id); - let composite = - graph_digest(&sidecar.graph_id, Some(&live_digest), Some(&query_digests)); - state - .applied_revision - .resources - .insert(graph_address.clone(), StateResource { digest: composite, applies_to: None }); + let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id); + let embedding_provider_digest = + state_embedding_provider_digest(state, embedding_provider.as_deref()); + let composite = graph_digest( + &sidecar.graph_id, + Some(&live_digest), + Some(&query_digests), + embedding_provider.as_deref(), + embedding_provider_digest.as_ref(), + ); + state.applied_revision.resources.insert( + graph_address.clone(), + StateResource { + digest: composite, + applies_to: None, + embedding_provider, + embedding_profile: None, + }, + ); set_resource_status_applied(state, &graph_address); set_resource_status_applied(state, &schema_addr); state.recovery_records.insert( @@ -200,14 +231,30 @@ pub(crate) async fn sweep_schema_apply_sidecar( StateResource { digest: live_digest.clone(), applies_to: None, + embedding_provider: None, + embedding_profile: None, }, ); let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id); - let composite = graph_digest(&sidecar.graph_id, Some(&live_digest), Some(&query_digests)); - state - .applied_revision - .resources - .insert(graph_address.clone(), StateResource { digest: composite, applies_to: None }); + let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id); + let embedding_provider_digest = + state_embedding_provider_digest(state, embedding_provider.as_deref()); + let composite = graph_digest( + &sidecar.graph_id, + Some(&live_digest), + Some(&query_digests), + embedding_provider.as_deref(), + embedding_provider_digest.as_ref(), + ); + state.applied_revision.resources.insert( + graph_address.clone(), + StateResource { + digest: composite, + applies_to: None, + embedding_provider, + embedding_profile: None, + }, + ); set_resource_status_applied(state, &graph_address); set_resource_status_applied(state, &schema_addr); state.recovery_records.insert( @@ -274,7 +321,11 @@ pub(crate) async fn sweep_graph_delete_sidecar( return; } - if !state.applied_revision.resources.contains_key(&graph_address) { + if !state + .applied_revision + .resources + .contains_key(&graph_address) + { // Row 7: already tombstoned (or never recorded); crash fell between // the state CAS and sidecar delete. outcome.completed_sidecars.push(path); @@ -283,7 +334,12 @@ pub(crate) async fn sweep_graph_delete_sidecar( // Row 7b: the root is gone, the ledger is stale — roll forward the // tombstone, consume the approval the sidecar carries, audit. - tombstone_graph_subtree(state, &sidecar.graph_id, sidecar.approval_id.as_deref(), sidecar.actor.as_deref()); + tombstone_graph_subtree( + state, + &sidecar.graph_id, + sidecar.approval_id.as_deref(), + sidecar.actor.as_deref(), + ); state.recovery_records.insert( sidecar.operation_id.clone(), json!({ @@ -342,7 +398,11 @@ pub(crate) fn tombstone_graph_subtree( /// Record approval consumption in the state ledger. The artifact FILE is /// rewritten with consumed_at only after the state write lands, so a failed /// CAS leaves the approval valid for the retry. -pub(crate) fn record_approval_consumed(state: &mut ClusterState, approval_id: &str, operation_id: &str) { +pub(crate) fn record_approval_consumed( + state: &mut ClusterState, + approval_id: &str, + operation_id: &str, +) { state.approval_records.insert( approval_id.to_string(), json!({ diff --git a/crates/omnigraph-cluster/src/tests.rs b/crates/omnigraph-cluster/src/tests.rs index 805ecda..ac448cf 100644 --- a/crates/omnigraph-cluster/src/tests.rs +++ b/crates/omnigraph-cluster/src/tests.rs @@ -56,6 +56,39 @@ policies: dir } + fn write_mock_embedding_cluster(config_dir: &Path, model: &str) { + fs::write( + config_dir.join(CLUSTER_CONFIG_FILE), + format!( + r#" +version: 1 +metadata: + name: test +state: + backend: cluster + lock: true +providers: + embedding: + default: + kind: mock + model: {model} +graphs: + knowledge: + schema: ./people.pg + embedding_provider: default + queries: + find_person: + file: ./people.gq +policies: + base: + file: ./base.policy.yaml + applies_to: [knowledge] +"# + ), + ) + .unwrap(); + } + async fn init_derived_graph(root: &Path) { let graph_dir = root.join(CLUSTER_GRAPHS_DIR); fs::create_dir_all(&graph_dir).unwrap(); @@ -194,6 +227,95 @@ policies: assert!(codes.contains("dangling_graph_reference")); } + #[test] + fn embedding_provider_config_accepts_provider_resources_and_graph_refs() { + let dir = fixture(); + write_mock_embedding_cluster(dir.path(), "recorded-x"); + + let out = validate_config_dir(dir.path()); + assert!(out.ok, "{:?}", out.diagnostics); + let provider_digest = out + .resource_digests + .get("provider.embedding.default") + .expect("provider resource digest"); + assert!( + out.resources + .iter() + .any(|resource| resource.address == "provider.embedding.default" + && resource.kind == "embedding_provider" + && resource.path.is_none()) + ); + assert!( + out.dependencies + .iter() + .any(|dep| dep.from == "graph.knowledge" && dep.to == "provider.embedding.default"), + "{:?}", + out.dependencies + ); + let schema_digest = out.resource_digests.get("schema.knowledge").unwrap(); + let query_digest = out + .resource_digests + .get("query.knowledge.find_person") + .unwrap(); + let expected_graph_digest = graph_digest( + "knowledge", + Some(schema_digest), + Some( + &[("find_person".to_string(), query_digest.clone())] + .into_iter() + .collect(), + ), + Some("provider.embedding.default"), + Some(provider_digest), + ); + assert_eq!( + out.resource_digests["graph.knowledge"], + expected_graph_digest + ); + } + + #[test] + fn embedding_provider_config_rejects_bad_refs_and_inline_secrets() { + let dir = fixture(); + fs::write( + dir.path().join(CLUSTER_CONFIG_FILE), + r#" +version: 1 +providers: + embedding: + default: + kind: openai-compatible + api_key: sk-inline +graphs: + knowledge: + schema: ./people.pg + embedding_provider: provider.policy.default + missing_provider: + schema: ./people.pg + embedding_provider: absent +"#, + ) + .unwrap(); + let out = validate_config_dir(dir.path()); + assert!(!out.ok); + let codes: BTreeSet<_> = out.diagnostics.iter().map(|d| d.code.as_str()).collect(); + assert!( + codes.contains("embedding_api_key_inline"), + "{:?}", + out.diagnostics + ); + assert!( + codes.contains("wrong_kind_reference"), + "{:?}", + out.diagnostics + ); + assert!( + codes.contains("dangling_embedding_provider_reference"), + "{:?}", + out.diagnostics + ); + } + #[test] fn query_key_mismatch_fails() { let dir = fixture(); @@ -1012,8 +1134,13 @@ graphs: let out = validate_config_dir(config_dir); assert!(out.ok, "{:?}", out.diagnostics); let schema_digest = out.resource_digests.get("schema.knowledge").unwrap().clone(); - let graph_composite = - graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new())); + let graph_composite = graph_digest( + "knowledge", + Some(&schema_digest), + Some(&BTreeMap::new()), + None, + None, + ); write_state_resources( config_dir, &[ @@ -1122,6 +1249,8 @@ graphs: .into_iter() .collect(), ), + None, + None, ); assert_eq!(resources["graph.knowledge"]["digest"], expected_composite); assert_eq!( @@ -1136,6 +1265,117 @@ graphs: assert!(!dir.path().join(CLUSTER_LOCK_FILE).exists()); } + #[tokio::test] + async fn apply_records_embedding_provider_profile_and_graph_binding() { + let dir = fixture(); + write_mock_embedding_cluster(dir.path(), "recorded-x"); + write_applyable_state(dir.path()); + let desired = validate_config_dir(dir.path()); + let query_digest = desired + .resource_digests + .get("query.knowledge.find_person") + .unwrap() + .clone(); + let schema_digest = desired + .resource_digests + .get("schema.knowledge") + .unwrap() + .clone(); + let provider_digest = desired + .resource_digests + .get("provider.embedding.default") + .unwrap() + .clone(); + + let out = apply_config_dir(dir.path()).await; + assert!(out.ok, "{:?}", out.diagnostics); + assert!(out.converged, "{out:?}"); + + let state = read_state_json(dir.path()); + let resources = &state["applied_revision"]["resources"]; + let provider = resources["provider.embedding.default"] + .as_object() + .expect("provider resource"); + assert_eq!(provider["digest"], provider_digest); + assert_eq!(provider["embedding_profile"]["kind"], "mock"); + assert_eq!(provider["embedding_profile"]["model"], "recorded-x"); + assert!(provider["embedding_profile"].get("api_key").is_none()); + assert_eq!( + resources["graph.knowledge"]["embedding_provider"], + "provider.embedding.default" + ); + let expected_graph_digest = graph_digest( + "knowledge", + Some(&schema_digest), + Some( + &[("find_person".to_string(), query_digest)] + .into_iter() + .collect(), + ), + Some("provider.embedding.default"), + Some(&provider_digest), + ); + assert_eq!(resources["graph.knowledge"]["digest"], expected_graph_digest); + } + + #[tokio::test] + async fn embedding_provider_changes_update_provider_and_graph_plan() { + let dir = fixture(); + write_mock_embedding_cluster(dir.path(), "recorded-x"); + write_applyable_state(dir.path()); + let first = apply_config_dir(dir.path()).await; + assert!(first.ok && first.converged, "{first:?}"); + + write_mock_embedding_cluster(dir.path(), "recorded-y"); + let plan = plan_config_dir(dir.path()).await; + assert!(plan.ok, "{:?}", plan.diagnostics); + let by_resource: BTreeMap<&str, &PlanChange> = plan + .changes + .iter() + .map(|change| (change.resource.as_str(), change)) + .collect(); + assert_eq!( + by_resource["provider.embedding.default"].operation, + PlanOperation::Update + ); + assert_eq!( + by_resource["provider.embedding.default"].disposition, + Some(ApplyDisposition::Applied) + ); + assert_eq!( + by_resource["graph.knowledge"].operation, + PlanOperation::Update + ); + assert_eq!( + by_resource["graph.knowledge"].disposition, + Some(ApplyDisposition::Derived) + ); + } + + #[tokio::test] + async fn embedding_binding_survives_refresh() { + let dir = fixture(); + init_derived_graph(dir.path()).await; + write_mock_embedding_cluster(dir.path(), "recorded-x"); + write_applyable_state(dir.path()); + let apply = apply_config_dir(dir.path()).await; + assert!(apply.ok && apply.converged, "{apply:?}"); + + let refresh = refresh_config_dir(dir.path()).await; + assert!(refresh.ok, "{:?}", refresh.diagnostics); + + let state = read_state_json(dir.path()); + let resources = &state["applied_revision"]["resources"]; + assert_eq!( + resources["graph.knowledge"]["embedding_provider"], + "provider.embedding.default" + ); + assert_eq!( + resources["provider.embedding.default"]["embedding_profile"]["model"], + "recorded-x" + ); + } + fn desired_revision_digest(out: &ApplyOutput) -> String { out.desired_revision.config_digest.clone().unwrap() } @@ -1150,8 +1390,13 @@ graphs: .unwrap() .clone(); let old_digest = "0".repeat(64); - let graph_composite = - graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new())); + let graph_composite = graph_digest( + "knowledge", + Some(&schema_digest), + Some(&BTreeMap::new()), + None, + None, + ); write_state_resources( dir.path(), &[ @@ -1190,8 +1435,13 @@ graphs: .clone(); let stale_query_digest = "1".repeat(64); let stale_policy_digest = "2".repeat(64); - let graph_composite = - graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new())); + let graph_composite = graph_digest( + "knowledge", + Some(&schema_digest), + Some(&BTreeMap::new()), + None, + None, + ); write_state_resources( dir.path(), &[ @@ -1234,6 +1484,8 @@ graphs: "knowledge", Some(&schema_digest), Some(&[("find_person".to_string(), query_digest)].into_iter().collect()), + None, + None, ); assert_eq!(resources["graph.knowledge"]["digest"], expected_composite); } @@ -1494,8 +1746,13 @@ graphs: .get("schema.knowledge") .unwrap() .clone(); - let graph_composite = - graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new())); + let graph_composite = graph_digest( + "knowledge", + Some(&schema_digest), + Some(&BTreeMap::new()), + None, + None, + ); write_state_resources( dir.path(), &[ @@ -2864,6 +3121,54 @@ policies: assert!(snapshot.policies[0].source.contains("rules:")); } + #[tokio::test] + async fn serving_snapshot_uses_applied_embedding_provider_profile() { + let dir = fixture(); + init_derived_graph(dir.path()).await; + write_mock_embedding_cluster(dir.path(), "recorded-x"); + write_applyable_state(dir.path()); + let converge = apply_config_dir(dir.path()).await; + assert!(converge.converged, "{converge:?}"); + + let snapshot = read_serving_snapshot(dir.path()).await.unwrap(); + let profile = snapshot.graphs[0].embedding.as_ref().unwrap(); + assert_eq!(profile.kind.as_deref(), Some("mock")); + assert_eq!(profile.model.as_deref(), Some("recorded-x")); + } + + #[tokio::test] + async fn serving_snapshot_refuses_missing_embedding_provider_metadata() { + let dir = fixture(); + init_derived_graph(dir.path()).await; + write_mock_embedding_cluster(dir.path(), "recorded-x"); + write_applyable_state(dir.path()); + let converge = apply_config_dir(dir.path()).await; + assert!(converge.converged, "{converge:?}"); + + let mut state = read_state_json(dir.path()); + state["applied_revision"]["resources"]["provider.embedding.default"] + .as_object_mut() + .unwrap() + .remove("embedding_profile"); + fs::write( + dir.path().join(CLUSTER_STATE_FILE), + serde_json::to_string_pretty(&state).unwrap(), + ) + .unwrap(); + + let err = read_serving_snapshot(dir.path()).await.unwrap_err(); + assert!( + err.iter() + .any(|diagnostic| diagnostic.code == "embedding_provider_profile_missing"), + "{err:?}" + ); + assert!( + err.iter() + .any(|diagnostic| diagnostic.code == "embedding_provider_missing"), + "{err:?}" + ); + } + #[tokio::test] async fn serving_snapshot_refuses_missing_state() { let dir = fixture(); diff --git a/crates/omnigraph-cluster/src/types.rs b/crates/omnigraph-cluster/src/types.rs index e44e2f4..97ad406 100644 --- a/crates/omnigraph-cluster/src/types.rs +++ b/crates/omnigraph-cluster/src/types.rs @@ -325,6 +325,7 @@ pub(crate) struct DesiredCluster { /// The declared `storage:` root, if any (None ⇒ the config dir itself). pub(crate) storage_root: Option, pub(crate) state_lock: bool, + pub(crate) embedding_providers: BTreeMap, pub(crate) graphs: Vec, pub(crate) resource_digests: BTreeMap, pub(crate) resources: Vec, @@ -337,6 +338,7 @@ pub(crate) struct DesiredCluster { pub(crate) struct DesiredGraph { pub(crate) id: String, pub(crate) schema_digest: String, + pub(crate) embedding_provider: Option, } #[derive(Debug)] @@ -376,6 +378,8 @@ pub(crate) struct RawClusterConfig { #[serde(default)] pub(crate) state: StateConfig, #[serde(default)] + pub(crate) providers: ProvidersConfig, + #[serde(default)] pub(crate) graphs: BTreeMap, #[serde(default)] pub(crate) policies: BTreeMap, @@ -394,12 +398,123 @@ pub(crate) struct StateConfig { pub(crate) lock: Option, } +#[derive(Debug, Default, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub(crate) struct ProvidersConfig { + #[serde(default)] + pub(crate) embedding: BTreeMap, +} + #[derive(Debug, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub(crate) struct GraphConfig { pub(crate) schema: PathBuf, #[serde(default)] pub(crate) queries: QueriesDecl, + /// Optional reference to a top-level `providers.embedding.` profile. + #[serde(default)] + pub(crate) embedding_provider: Option, +} + +/// A named cluster embedding provider profile (RFC-012 Phase 5). `kind`/`base_url`/ +/// `model` default exactly as the engine's `EmbeddingConfig::from_env` does. +/// `api_key`, when required, must be a `${NAME}` env reference resolved at +/// serving boot, never an inline secret. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct EmbeddingProviderConfig { + #[serde(default, alias = "provider", skip_serializing_if = "Option::is_none")] + pub kind: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub base_url: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub model: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub api_key: Option, +} + +impl EmbeddingProviderConfig { + pub(crate) fn validate(&self, path: String, diagnostics: &mut Vec) { + if let Err(error) = omnigraph::embedding::EmbeddingConfig::from_parts( + self.kind.as_deref(), + self.base_url.clone(), + self.model.clone(), + "validation-placeholder".to_string(), + ) { + diagnostics.push(Diagnostic::error( + "invalid_embedding_provider", + path.clone(), + error.to_string(), + )); + } + + if self.kind.as_deref() == Some("mock") { + if let Some(api_key) = self.api_key.as_deref() { + if secret_ref_name(api_key).is_err() { + diagnostics.push(Diagnostic::error( + "embedding_api_key_inline", + format!("{path}.api_key"), + "embedding api_key must be a ${NAME} env reference, not an inline secret", + )); + } + } + return; + } + + match self.api_key.as_deref() { + Some(api_key) if secret_ref_name(api_key).is_err() => diagnostics.push( + Diagnostic::error( + "embedding_api_key_inline", + format!("{path}.api_key"), + "embedding api_key must be a ${NAME} env reference, not an inline secret", + ), + ), + Some(_) => {} + None => diagnostics.push(Diagnostic::error( + "embedding_api_key_required", + format!("{path}.api_key"), + "non-mock embedding providers must set api_key to a ${NAME} env reference", + )), + } + } + + /// Resolve into an engine `EmbeddingConfig`, reading the `${NAME}` api-key + /// reference from process env. Mock profiles do not read env and may omit + /// `api_key`; real providers error if the reference is missing or unset. + pub fn resolve(&self) -> Result { + let api_key = if self.kind.as_deref() == Some("mock") { + String::new() + } else { + resolve_secret_ref(self.api_key.as_deref().ok_or_else(|| { + "embedding api_key is required for non-mock providers".to_string() + })?)? + }; + omnigraph::embedding::EmbeddingConfig::from_parts( + self.kind.as_deref(), + self.base_url.clone(), + self.model.clone(), + api_key, + ) + .map_err(|e| e.to_string()) + } +} + +fn secret_ref_name(value: &str) -> Result<&str, String> { + value + .trim() + .strip_prefix("${") + .and_then(|s| s.strip_suffix('}')) + .filter(|name| !name.trim().is_empty()) + .ok_or_else(|| { + format!("embedding api_key must be a ${{NAME}} env reference, got '{}'", value.trim()) + }) +} + +/// Resolve a `${NAME}` secret reference from process env. Rejects an inline value +/// (anything not wrapped in `${…}`) so secrets never sit in the cluster config. +fn resolve_secret_ref(value: &str) -> Result { + let name = secret_ref_name(value)?; + std::env::var(name).map_err(|_| format!("embedding api_key env var '{name}' is not set")) } /// How a graph declares its stored queries. Terraform-style: the `.gq` @@ -457,6 +572,16 @@ pub(crate) struct StateResource { /// non-policy resources. #[serde(default, skip_serializing_if = "Option::is_none")] pub(crate) applies_to: Option>, + /// Graph resources only: the applied `provider.embedding.` binding. + /// The provider profile itself is stored on the provider resource so + /// serving can boot without re-reading mutable desired config. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub(crate) embedding_provider: Option, + /// Embedding provider resources only: the applied profile with unresolved + /// `${ENV}` references. The server resolves the referenced env var exactly + /// once at boot and injects the resulting engine config into the graph. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub(crate) embedding_profile: Option, } #[derive(Debug, Serialize, Deserialize)] @@ -518,3 +643,74 @@ pub(crate) struct SweepOutcome { /// files are rewritten with consumed_at only after the state write lands. pub(crate) consumed_approvals: Vec, } + +#[cfg(test)] +mod embedding_provider_config_tests { + use super::EmbeddingProviderConfig; + + #[test] + fn resolves_secret_from_env_and_applies_defaults() { + // SAFETY: a unique var name, no concurrent reader. + unsafe { std::env::set_var("OG_TEST_EMBED_KEY_A", "secret-x") }; + let profile = EmbeddingProviderConfig { + kind: Some("openai-compatible".to_string()), + base_url: None, + model: Some("m".to_string()), + api_key: Some("${OG_TEST_EMBED_KEY_A}".to_string()), + }; + let config = profile.resolve().unwrap(); + assert_eq!(config.api_key, "secret-x"); + assert_eq!(config.model, "m"); + unsafe { std::env::remove_var("OG_TEST_EMBED_KEY_A") }; + } + + #[test] + fn rejects_inline_api_key() { + let profile = EmbeddingProviderConfig { + kind: None, + base_url: None, + model: None, + api_key: Some("sk-inline".to_string()), + }; + let err = profile.resolve().unwrap_err(); + assert!(err.contains("${NAME}"), "got: {err}"); + } + + #[test] + fn errors_on_unset_secret() { + let profile = EmbeddingProviderConfig { + kind: None, + base_url: None, + model: None, + api_key: Some("${OG_TEST_DEFINITELY_UNSET_VAR}".to_string()), + }; + let err = profile.resolve().unwrap_err(); + assert!(err.contains("not set"), "got: {err}"); + } + + #[test] + fn rejects_unknown_provider() { + unsafe { std::env::set_var("OG_TEST_EMBED_KEY_B", "x") }; + let profile = EmbeddingProviderConfig { + kind: Some("cohere".to_string()), + base_url: None, + model: None, + api_key: Some("${OG_TEST_EMBED_KEY_B}".to_string()), + }; + let err = profile.resolve().unwrap_err(); + assert!(err.contains("unknown embedding provider"), "got: {err}"); + unsafe { std::env::remove_var("OG_TEST_EMBED_KEY_B") }; + } + + #[test] + fn mock_does_not_require_secret_env() { + let profile = EmbeddingProviderConfig { + kind: Some("mock".to_string()), + base_url: None, + model: Some("cluster-mock".to_string()), + api_key: None, + }; + let config = profile.resolve().unwrap(); + assert_eq!(config.model, "cluster-mock"); + } +} diff --git a/crates/omnigraph-compiler/Cargo.toml b/crates/omnigraph-compiler/Cargo.toml index bbf03f1..4645b81 100644 --- a/crates/omnigraph-compiler/Cargo.toml +++ b/crates/omnigraph-compiler/Cargo.toml @@ -20,10 +20,5 @@ pest_derive = { workspace = true } thiserror = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } -reqwest = { workspace = true } ahash = { workspace = true } -tokio = { workspace = true } sha2 = { workspace = true } - -[dev-dependencies] -tokio = { workspace = true } diff --git a/crates/omnigraph-compiler/src/catalog/mod.rs b/crates/omnigraph-compiler/src/catalog/mod.rs index 0bb536d..93f8d89 100644 --- a/crates/omnigraph-compiler/src/catalog/mod.rs +++ b/crates/omnigraph-compiler/src/catalog/mod.rs @@ -26,6 +26,15 @@ pub struct InterfaceType { pub properties: HashMap, } +/// The `@embed` binding for a vector property: its source text property and, +/// optionally, the embedding model recorded by `@embed("source", model="…")`. +/// The model is what the query-time same-space check validates against. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EmbedSource { + pub source: String, + pub model: Option, +} + #[derive(Debug, Clone)] pub struct NodeType { pub name: String, @@ -42,8 +51,8 @@ pub struct NodeType { pub range_constraints: Vec, /// Regex check constraints pub check_constraints: Vec, - /// Maps @embed target property -> source text property - pub embed_sources: HashMap, + /// Maps @embed target property -> its source text property + recorded model. + pub embed_sources: HashMap, pub blob_properties: HashSet, pub arrow_schema: SchemaRef, } @@ -156,14 +165,18 @@ pub fn build_catalog(schema: &SchemaFile) -> Result { if matches!(prop.prop_type.scalar, ScalarType::Blob) { blob_properties.insert(prop.name.clone()); } - // Extract @embed from property annotations (stays as annotation) - if let Some(source_prop) = prop - .annotations - .iter() - .find(|ann| ann.name == "embed") - .and_then(|ann| ann.value.clone()) - { - embed_sources.insert(prop.name.clone(), source_prop); + // Extract @embed: the source text property (positional) and the + // optional recorded model (the `model` kwarg). + if let Some(ann) = prop.annotations.iter().find(|ann| ann.name == "embed") { + if let Some(source) = ann.value.clone() { + embed_sources.insert( + prop.name.clone(), + EmbedSource { + source, + model: ann.kwargs.get("model").cloned(), + }, + ); + } } } diff --git a/crates/omnigraph-compiler/src/catalog/schema_plan.rs b/crates/omnigraph-compiler/src/catalog/schema_plan.rs index a9e26b2..dc9d466 100644 --- a/crates/omnigraph-compiler/src/catalog/schema_plan.rs +++ b/crates/omnigraph-compiler/src/catalog/schema_plan.rs @@ -1137,6 +1137,7 @@ node Person @description("new") { annotations: vec![Annotation { name: "description".to_string(), value: Some("new".to_string()), + kwargs: Default::default(), }], })); } diff --git a/crates/omnigraph-compiler/src/catalog/tests.rs b/crates/omnigraph-compiler/src/catalog/tests.rs index 883b4a9..4ab3956 100644 --- a/crates/omnigraph-compiler/src/catalog/tests.rs +++ b/crates/omnigraph-compiler/src/catalog/tests.rs @@ -31,6 +31,33 @@ fn test_build_catalog() { assert!(catalog.node_types.contains_key("Company")); } +#[test] +fn test_embed_source_records_model_kwarg() { + let schema = parse_schema( + r#" +node Doc { +title: String +embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large") +plain: Vector(3) @embed("title") +} +"#, + ) + .unwrap(); + let catalog = build_catalog(&schema).unwrap(); + let doc = catalog.node_types.get("Doc").unwrap(); + + let embedding = doc.embed_sources.get("embedding").unwrap(); + assert_eq!(embedding.source, "title"); + assert_eq!( + embedding.model.as_deref(), + Some("openai/text-embedding-3-large") + ); + + let plain = doc.embed_sources.get("plain").unwrap(); + assert_eq!(plain.source, "title"); + assert_eq!(plain.model, None); +} + #[test] fn test_edge_lookup() { let schema = parse_schema(test_schema()).unwrap(); diff --git a/crates/omnigraph-compiler/src/embedding.rs b/crates/omnigraph-compiler/src/embedding.rs deleted file mode 100644 index 6c9e6f3..0000000 --- a/crates/omnigraph-compiler/src/embedding.rs +++ /dev/null @@ -1,379 +0,0 @@ -#![allow(dead_code)] - -use std::time::Duration; - -use reqwest::Client; -use serde::Deserialize; -use tokio::time::sleep; - -use crate::error::{NanoError, Result}; - -const DEFAULT_EMBED_MODEL: &str = "text-embedding-3-small"; -const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; -const DEFAULT_TIMEOUT_MS: u64 = 30_000; -const DEFAULT_RETRY_ATTEMPTS: usize = 4; -const DEFAULT_RETRY_BACKOFF_MS: u64 = 200; - -#[derive(Clone)] -enum EmbeddingTransport { - Mock, - OpenAi { - api_key: String, - base_url: String, - http: Client, - }, -} - -#[derive(Clone)] -pub(crate) struct EmbeddingClient { - model: String, - retry_attempts: usize, - retry_backoff_ms: u64, - transport: EmbeddingTransport, -} - -struct EmbedCallError { - message: String, - retryable: bool, -} - -#[derive(Debug, Deserialize)] -struct OpenAiEmbeddingResponse { - data: Vec, -} - -#[derive(Debug, Deserialize)] -struct OpenAiEmbeddingDatum { - index: usize, - embedding: Vec, -} - -#[derive(Debug, Deserialize)] -struct OpenAiErrorEnvelope { - error: OpenAiErrorBody, -} - -#[derive(Debug, Deserialize)] -struct OpenAiErrorBody { - message: String, -} - -impl EmbeddingClient { - pub(crate) fn from_env() -> Result { - let model = std::env::var("NANOGRAPH_EMBED_MODEL") - .ok() - .map(|v| v.trim().to_string()) - .filter(|v| !v.is_empty()) - .unwrap_or_else(|| DEFAULT_EMBED_MODEL.to_string()); - let retry_attempts = - parse_env_usize("NANOGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS); - let retry_backoff_ms = - parse_env_u64("NANOGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS); - - if env_flag("NANOGRAPH_EMBEDDINGS_MOCK") { - return Ok(Self { - model, - retry_attempts, - retry_backoff_ms, - transport: EmbeddingTransport::Mock, - }); - } - - let api_key = std::env::var("OPENAI_API_KEY") - .ok() - .map(|v| v.trim().to_string()) - .filter(|v| !v.is_empty()) - .ok_or_else(|| { - NanoError::Execution( - "OPENAI_API_KEY is required when an embedding call is needed".to_string(), - ) - })?; - let base_url = std::env::var("OPENAI_BASE_URL") - .ok() - .map(|v| v.trim_end_matches('/').to_string()) - .filter(|v| !v.is_empty()) - .unwrap_or_else(|| DEFAULT_OPENAI_BASE_URL.to_string()); - let timeout_ms = parse_env_u64("NANOGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS); - let http = Client::builder() - .timeout(Duration::from_millis(timeout_ms)) - .build() - .map_err(|e| { - NanoError::Execution(format!("failed to initialize HTTP client: {}", e)) - })?; - - Ok(Self { - model, - retry_attempts, - retry_backoff_ms, - transport: EmbeddingTransport::OpenAi { - api_key, - base_url, - http, - }, - }) - } - - #[cfg(test)] - pub(crate) fn mock_for_tests() -> Self { - Self { - model: DEFAULT_EMBED_MODEL.to_string(), - retry_attempts: DEFAULT_RETRY_ATTEMPTS, - retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS, - transport: EmbeddingTransport::Mock, - } - } - - pub(crate) fn model(&self) -> &str { - &self.model - } - - pub(crate) async fn embed_text(&self, input: &str, expected_dim: usize) -> Result> { - let mut vectors = self.embed_texts(&[input.to_string()], expected_dim).await?; - vectors.pop().ok_or_else(|| { - NanoError::Execution("embedding provider returned no vector".to_string()) - }) - } - - pub(crate) async fn embed_texts( - &self, - inputs: &[String], - expected_dim: usize, - ) -> Result>> { - if expected_dim == 0 { - return Err(NanoError::Execution( - "embedding dimension must be greater than zero".to_string(), - )); - } - if inputs.is_empty() { - return Ok(Vec::new()); - } - - match &self.transport { - EmbeddingTransport::Mock => Ok(inputs - .iter() - .map(|input| mock_embedding(input, expected_dim)) - .collect()), - EmbeddingTransport::OpenAi { .. } => { - self.embed_texts_openai_with_retry(inputs, expected_dim) - .await - } - } - } - - async fn embed_texts_openai_with_retry( - &self, - inputs: &[String], - expected_dim: usize, - ) -> Result>> { - let max_attempt = self.retry_attempts.max(1); - let mut attempt = 0usize; - loop { - attempt += 1; - match self.embed_texts_openai_once(inputs, expected_dim).await { - Ok(vectors) => return Ok(vectors), - Err(err) => { - if !err.retryable || attempt >= max_attempt { - return Err(NanoError::Execution(err.message)); - } - let shift = (attempt - 1).min(10) as u32; - let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift); - sleep(Duration::from_millis(delay)).await; - } - } - } - } - - async fn embed_texts_openai_once( - &self, - inputs: &[String], - expected_dim: usize, - ) -> std::result::Result>, EmbedCallError> { - let (api_key, base_url, http) = match &self.transport { - EmbeddingTransport::OpenAi { - api_key, - base_url, - http, - } => (api_key, base_url, http), - EmbeddingTransport::Mock => unreachable!("mock transport should not call OpenAI"), - }; - - let request = serde_json::json!({ - "model": self.model, - "input": inputs, - "dimensions": expected_dim, - }); - let url = format!("{}/embeddings", base_url); - let response = http - .post(&url) - .bearer_auth(api_key) - .json(&request) - .send() - .await; - - let response = match response { - Ok(resp) => resp, - Err(err) => { - let retryable = err.is_timeout() || err.is_connect() || err.is_request(); - return Err(EmbedCallError { - message: format!("embedding request failed: {}", err), - retryable, - }); - } - }; - - let status = response.status(); - let body = match response.text().await { - Ok(body) => body, - Err(err) => { - return Err(EmbedCallError { - message: format!( - "embedding response read failed (status {}): {}", - status, err - ), - retryable: status.is_server_error() || status.as_u16() == 429, - }); - } - }; - - if !status.is_success() { - let message = parse_openai_error_message(&body).unwrap_or_else(|| body.clone()); - return Err(EmbedCallError { - message: format!( - "embedding request failed with status {}: {}", - status, message - ), - retryable: status.is_server_error() || status.as_u16() == 429, - }); - } - - let mut parsed: OpenAiEmbeddingResponse = - serde_json::from_str(&body).map_err(|err| EmbedCallError { - message: format!("embedding response decode failed: {}", err), - retryable: false, - })?; - - if parsed.data.len() != inputs.len() { - return Err(EmbedCallError { - message: format!( - "embedding response size mismatch: expected {}, got {}", - inputs.len(), - parsed.data.len() - ), - retryable: false, - }); - } - - parsed.data.sort_by_key(|item| item.index); - let mut vectors = Vec::with_capacity(parsed.data.len()); - for (idx, item) in parsed.data.into_iter().enumerate() { - if item.index != idx { - return Err(EmbedCallError { - message: format!( - "embedding response index mismatch at position {}: got {}", - idx, item.index - ), - retryable: false, - }); - } - if item.embedding.len() != expected_dim { - return Err(EmbedCallError { - message: format!( - "embedding dimension mismatch: expected {}, got {}", - expected_dim, - item.embedding.len() - ), - retryable: false, - }); - } - vectors.push(item.embedding); - } - Ok(vectors) - } -} - -fn parse_openai_error_message(body: &str) -> Option { - serde_json::from_str::(body) - .ok() - .map(|e| e.error.message) - .filter(|msg| !msg.trim().is_empty()) -} - -fn parse_env_usize(name: &str, default: usize) -> usize { - std::env::var(name) - .ok() - .and_then(|v| v.parse::().ok()) - .filter(|v| *v > 0) - .unwrap_or(default) -} - -fn parse_env_u64(name: &str, default: u64) -> u64 { - std::env::var(name) - .ok() - .and_then(|v| v.parse::().ok()) - .filter(|v| *v > 0) - .unwrap_or(default) -} - -fn env_flag(name: &str) -> bool { - std::env::var(name) - .ok() - .map(|v| { - let s = v.trim().to_ascii_lowercase(); - s == "1" || s == "true" || s == "yes" || s == "on" - }) - .unwrap_or(false) -} - -fn mock_embedding(input: &str, dim: usize) -> Vec { - let mut seed = fnv1a64(input.as_bytes()); - let mut out = Vec::with_capacity(dim); - for _ in 0..dim { - seed = xorshift64(seed); - let ratio = (seed as f64 / u64::MAX as f64) as f32; - out.push((ratio * 2.0) - 1.0); - } - - let norm = out - .iter() - .map(|v| (*v as f64) * (*v as f64)) - .sum::() - .sqrt() as f32; - if norm > f32::EPSILON { - for value in &mut out { - *value /= norm; - } - } - out -} - -fn fnv1a64(bytes: &[u8]) -> u64 { - let mut hash = 14695981039346656037u64; - for byte in bytes { - hash ^= *byte as u64; - hash = hash.wrapping_mul(1099511628211u64); - } - hash -} - -fn xorshift64(mut x: u64) -> u64 { - x ^= x << 13; - x ^= x >> 7; - x ^= x << 17; - x -} - -#[cfg(test)] -mod tests { - use super::*; - - #[tokio::test] - async fn mock_embeddings_are_deterministic() { - let client = EmbeddingClient::mock_for_tests(); - let a = client.embed_text("alpha", 8).await.unwrap(); - let b = client.embed_text("alpha", 8).await.unwrap(); - let c = client.embed_text("beta", 8).await.unwrap(); - assert_eq!(a, b); - assert_ne!(a, c); - assert_eq!(a.len(), 8); - } -} diff --git a/crates/omnigraph-compiler/src/lib.rs b/crates/omnigraph-compiler/src/lib.rs index ba1aba2..4f85c08 100644 --- a/crates/omnigraph-compiler/src/lib.rs +++ b/crates/omnigraph-compiler/src/lib.rs @@ -1,5 +1,4 @@ pub mod catalog; -pub mod embedding; pub mod error; pub mod ir; pub mod json_output; diff --git a/crates/omnigraph-compiler/src/query/typecheck.rs b/crates/omnigraph-compiler/src/query/typecheck.rs index 658f083..b2c235a 100644 --- a/crates/omnigraph-compiler/src/query/typecheck.rs +++ b/crates/omnigraph-compiler/src/query/typecheck.rs @@ -261,13 +261,13 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param]) continue; } - if let Some(source_prop) = node_type.embed_sources.get(prop_name) { - if assigned_props.contains(source_prop.as_str()) { + if let Some(embed) = node_type.embed_sources.get(prop_name) { + if assigned_props.contains(embed.source.as_str()) { continue; } return Err(NanoError::Type(format!( "T12: insert for `{}` must provide non-nullable property `{}` or @embed source `{}`", - insert.type_name, prop_name, source_prop + insert.type_name, prop_name, embed.source ))); } diff --git a/crates/omnigraph-compiler/src/schema/ast.rs b/crates/omnigraph-compiler/src/schema/ast.rs index f8ed18a..9be0e56 100644 --- a/crates/omnigraph-compiler/src/schema/ast.rs +++ b/crates/omnigraph-compiler/src/schema/ast.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use crate::types::PropType; use serde::{Deserialize, Serialize}; @@ -50,6 +52,11 @@ pub struct PropDecl { pub struct Annotation { pub name: String, pub value: Option, + /// Keyword arguments, e.g. `model="…"` on `@embed("source", model="…")`. + /// Empty is skipped in serialization so existing schemas' IR JSON (and + /// hash) stay byte-identical; `BTreeMap` keeps the order deterministic. + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub kwargs: BTreeMap, } /// A typed constraint declared in a node or edge body. diff --git a/crates/omnigraph-compiler/src/schema/parser.rs b/crates/omnigraph-compiler/src/schema/parser.rs index 43e11ed..c5f4355 100644 --- a/crates/omnigraph-compiler/src/schema/parser.rs +++ b/crates/omnigraph-compiler/src/schema/parser.rs @@ -556,12 +556,32 @@ fn parse_type_ref(pair: pest::iterators::Pair) -> Result { fn parse_annotation(pair: pest::iterators::Pair) -> Result { let mut inner = pair.into_inner(); let name = inner.next().unwrap().as_str().to_string(); - let value = inner - .next() - .map(|p| decode_string_literal(p.as_str())) - .transpose()?; + let mut value = None; + let mut kwargs = std::collections::BTreeMap::new(); + if let Some(args) = inner.next() { + // `annotation_args`: one positional arg followed by zero or more + // `key = literal` kwargs (e.g. `@embed("source", model="…")`). + for arg in args.into_inner() { + match arg.as_rule() { + Rule::annotation_arg => { + value = Some(decode_string_literal(arg.as_str())?); + } + Rule::annotation_kwarg => { + let mut kw = arg.into_inner(); + let key = kw.next().unwrap().as_str().to_string(); + let raw = kw.next().unwrap().as_str(); + kwargs.insert(key, decode_string_literal(raw)?); + } + _ => {} + } + } + } - Ok(Annotation { name, value }) + Ok(Annotation { + name, + value, + kwargs, + }) } fn validate_string_annotation( @@ -823,6 +843,17 @@ fn validate_property_annotations( type_name, source_prop ))); } + + // `model` is the only supported kwarg; reject the rest loudly so + // a typo can't be silently ignored (it would never validate). + for key in ann.kwargs.keys() { + if key != "model" { + return Err(NanoError::Parse(format!( + "@embed on {}.{} has unknown argument '{}=' (only 'model' is supported)", + type_name, prop.name, key + ))); + } + } } _ => {} } diff --git a/crates/omnigraph-compiler/src/schema/parser_tests.rs b/crates/omnigraph-compiler/src/schema/parser_tests.rs index 2302cfb..9a2e1ba 100644 --- a/crates/omnigraph-compiler/src/schema/parser_tests.rs +++ b/crates/omnigraph-compiler/src/schema/parser_tests.rs @@ -508,6 +508,66 @@ embedding: Vector(3) @embed(title) } } +#[test] +fn test_parse_embed_annotation_with_model_kwarg() { + let input = r#" +node Doc { +title: String +embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large") +} +"#; + let schema = parse_schema(input).unwrap(); + match &schema.declarations[0] { + SchemaDecl::Node(n) => { + let ann = &n.properties[1].annotations[0]; + assert_eq!(ann.name, "embed"); + assert_eq!(ann.value.as_deref(), Some("title")); + assert_eq!( + ann.kwargs.get("model").map(String::as_str), + Some("openai/text-embedding-3-large") + ); + } + _ => panic!("expected Node"), + } +} + +#[test] +fn test_parse_embed_annotation_without_model_has_empty_kwargs() { + let input = r#" +node Doc { +title: String +embedding: Vector(3) @embed("title") +} +"#; + let schema = parse_schema(input).unwrap(); + match &schema.declarations[0] { + SchemaDecl::Node(n) => { + let ann = &n.properties[1].annotations[0]; + assert!(ann.kwargs.is_empty()); + // Empty kwargs must NOT serialize, so existing schemas' IR JSON (and + // thus the schema hash) stay byte-identical after this field is added. + let json = serde_json::to_string(ann).unwrap(); + assert!(!json.contains("kwargs"), "unexpected kwargs in {json}"); + } + _ => panic!("expected Node"), + } +} + +#[test] +fn test_parse_embed_annotation_rejects_unknown_kwarg() { + let input = r#" +node Doc { +title: String +embedding: Vector(3) @embed("title", provider="openai") +} +"#; + let err = parse_schema(input).unwrap_err(); + assert!( + err.to_string().contains("only 'model' is supported"), + "got: {err}" + ); +} + #[test] fn test_parse_edge_no_body() { let input = "edge WorksAt: Person -> Company\n"; diff --git a/crates/omnigraph-compiler/src/schema/schema.pest b/crates/omnigraph-compiler/src/schema/schema.pest index 395c516..b02724e 100644 --- a/crates/omnigraph-compiler/src/schema/schema.pest +++ b/crates/omnigraph-compiler/src/schema/schema.pest @@ -42,8 +42,10 @@ enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ } base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" } // Annotation rule excludes constraint keywords followed by "(" — those are body_constraints -annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? } +annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_args ~ ")")? } +annotation_args = { annotation_arg ~ ("," ~ annotation_kwarg)* } annotation_arg = { literal | ident } +annotation_kwarg = { ident ~ "=" ~ literal } literal = { string_lit | float_lit | integer | bool_lit } diff --git a/crates/omnigraph-server/src/lib.rs b/crates/omnigraph-server/src/lib.rs index b83a166..5451b05 100644 --- a/crates/omnigraph-server/src/lib.rs +++ b/crates/omnigraph-server/src/lib.rs @@ -209,6 +209,9 @@ pub struct GraphStartupConfig { pub graph_id: String, pub uri: String, pub policy: Option, + /// Pre-resolved embedding config from an applied cluster provider profile. + /// Legacy config paths leave this unset and continue to use env resolution. + pub embedding: Option, /// Per-graph stored-query registry, loaded and identity-checked at /// settings-build time; type-checked against the schema when this /// graph's engine opens. @@ -1088,6 +1091,11 @@ async fn open_single_graph(cfg: GraphStartupConfig) -> Result> let db = Omnigraph::open(&uri) .await .map_err(|err| color_eyre::eyre::eyre!("open graph '{}' at {}: {err}", graph_id, uri))?; + let db = if let Some(embedding) = cfg.embedding { + db.with_embedding_config(Arc::new(embedding)) + } else { + db + }; // Validate this graph's stored queries against the live schema and // resolve them to an attachable handle (refuse boot on breakage). diff --git a/crates/omnigraph-server/src/settings.rs b/crates/omnigraph-server/src/settings.rs index 34a76bd..bb6febd 100644 --- a/crates/omnigraph-server/src/settings.rs +++ b/crates/omnigraph-server/src/settings.rs @@ -98,6 +98,15 @@ pub(crate) async fn load_cluster_settings( graph_id: graph.graph_id.clone(), uri: graph.root.to_string_lossy().to_string(), policy: graph_policies.get(&graph.graph_id).cloned(), + embedding: graph + .embedding + .as_ref() + .map(|profile| { + profile.resolve().map_err(|err| { + eyre!("embedding provider for graph '{}': {err}", graph.graph_id) + }) + }) + .transpose()?, queries: registry, }); } @@ -517,6 +526,7 @@ mod tests { .to_string_lossy() .into_owned(), policy: None, + embedding: None, queries: crate::queries::QueryRegistry::default(), }], config_path: temp.path().join("omnigraph.yaml"), @@ -568,6 +578,7 @@ mod tests { .to_string_lossy() .into_owned(), policy: None, + embedding: None, queries: crate::queries::QueryRegistry::default(), }], config_path: temp.path().join("cluster"), diff --git a/crates/omnigraph-server/tests/multi_graph.rs b/crates/omnigraph-server/tests/multi_graph.rs index 1d3905d..617cc66 100644 --- a/crates/omnigraph-server/tests/multi_graph.rs +++ b/crates/omnigraph-server/tests/multi_graph.rs @@ -5,9 +5,12 @@ use std::fs; use axum::body::{Body, to_bytes}; use axum::http::{Method, Request, StatusCode}; -use omnigraph_server::api::ErrorOutput; +use omnigraph::db::Omnigraph; +use omnigraph::loader::{LoadMode, load_jsonl}; +use omnigraph_server::api::{ErrorOutput, ReadRequest}; use omnigraph_server::{AppState, build_app}; use serde_json::Value; +use serial_test::serial; use tower::ServiceExt; @@ -457,6 +460,180 @@ async fn cluster_boot_serves_applied_state() { assert_eq!(status, StatusCode::OK, "{body}"); } +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn cluster_boot_injects_embedding_provider_config() { + const EMBED_SCHEMA: &str = r#" +node Doc { + slug: String @key + title: String @index + embedding: Vector(4) @embed("title", model="cluster-mock") @index +} +"#; + const EMBED_QUERY: &str = r#" +query vector_search_string($q: String) { + match { $d: Doc } + return { $d.slug, $d.title } + order { nearest($d.embedding, $q) } + limit 3 +} +"#; + + let alpha = mock_embedding("alpha", 4); + let beta = mock_embedding("beta", 4); + let gamma = mock_embedding("gamma", 4); + let data = format!( + concat!( + r#"{{"type":"Doc","data":{{"slug":"alpha-doc","title":"alpha guide","embedding":[{}]}}}}"#, + "\n", + r#"{{"type":"Doc","data":{{"slug":"beta-doc","title":"beta guide","embedding":[{}]}}}}"#, + "\n", + r#"{{"type":"Doc","data":{{"slug":"gamma-doc","title":"gamma handbook","embedding":[{}]}}}}"# + ), + format_vector(&alpha), + format_vector(&beta), + format_vector(&gamma), + ); + + let temp = tempfile::tempdir().unwrap(); + fs::write(temp.path().join("docs.pg"), EMBED_SCHEMA).unwrap(); + fs::write(temp.path().join("search.gq"), EMBED_QUERY).unwrap(); + fs::write( + temp.path().join("cluster.yaml"), + r#" +version: 1 +providers: + embedding: + default: + kind: mock + model: cluster-mock +graphs: + knowledge: + schema: ./docs.pg + embedding_provider: default + queries: + vector_search_string: + file: ./search.gq +"#, + ) + .unwrap(); + let import = omnigraph_cluster::import_config_dir(temp.path()).await; + assert!(import.ok, "{:?}", import.diagnostics); + let apply = omnigraph_cluster::apply_config_dir(temp.path()).await; + assert!(apply.ok && apply.converged, "{:?}", apply.diagnostics); + + let graph_uri = temp + .path() + .join("graphs/knowledge.omni") + .to_string_lossy() + .to_string(); + let mut db = Omnigraph::open(&graph_uri).await.unwrap(); + load_jsonl(&mut db, &data, LoadMode::Overwrite) + .await + .unwrap(); + + let _guard = EnvGuard::set(&[ + ("OMNIGRAPH_EMBEDDINGS_MOCK", None), + ("OMNIGRAPH_EMBED_PROVIDER", None), + ("OMNIGRAPH_EMBED_BASE_URL", None), + ("OMNIGRAPH_EMBED_MODEL", None), + ("OPENROUTER_API_KEY", None), + ("OPENAI_API_KEY", None), + ("GEMINI_API_KEY", None), + ]); + let settings = cluster_settings(temp.path()).await.unwrap(); + let omnigraph_server::ServerConfigMode::Multi { + graphs, + config_path, + server_policy, + } = settings.mode + else { + panic!("cluster boot must select multi-graph routing"); + }; + let state = omnigraph_server::open_multi_graph_state( + graphs, + Vec::new(), + server_policy.as_ref(), + config_path, + ) + .await + .unwrap(); + let app = build_app(state); + + let read = ReadRequest { + query_source: EMBED_QUERY.to_string(), + query_name: Some("vector_search_string".to_string()), + params: Some(serde_json::json!({ "q": "alpha" })), + branch: Some("main".to_string()), + snapshot: None, + }; + let (status, body) = json_response( + &app, + Request::builder() + .uri("/graphs/knowledge/read") + .method(Method::POST) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&read).unwrap())) + .unwrap(), + ) + .await; + + assert_eq!(status, StatusCode::OK, "{body}"); + assert_eq!(body["row_count"], 3); + assert_eq!(body["rows"][0]["d.slug"], "alpha-doc"); +} + +#[tokio::test(flavor = "multi_thread")] +#[serial] +async fn cluster_boot_refuses_missing_embedding_secret_env() { + let temp = tempfile::tempdir().unwrap(); + fs::write( + temp.path().join("people.pg"), + "\nnode Person {\n name: String @key\n}\n", + ) + .unwrap(); + fs::write( + temp.path().join("people.gq"), + "\nquery find_person($name: String) {\n match { $p: Person { name: $name } }\n return { $p.name }\n}\n", + ) + .unwrap(); + fs::write( + temp.path().join("cluster.yaml"), + r#" +version: 1 +providers: + embedding: + default: + kind: openai-compatible + api_key: ${OG_TEST_MISSING_EMBED_KEY} +graphs: + knowledge: + schema: ./people.pg + embedding_provider: default + queries: + find_person: + file: ./people.gq +"#, + ) + .unwrap(); + let import = omnigraph_cluster::import_config_dir(temp.path()).await; + assert!(import.ok, "{:?}", import.diagnostics); + let apply = omnigraph_cluster::apply_config_dir(temp.path()).await; + assert!(apply.ok && apply.converged, "{:?}", apply.diagnostics); + + let _guard = EnvGuard::set(&[ + ("OG_TEST_MISSING_EMBED_KEY", None), + ("OMNIGRAPH_EMBEDDINGS_MOCK", None), + ]); + let err = cluster_settings(temp.path()).await.unwrap_err(); + let message = err.to_string(); + assert!( + message.contains("embedding provider for graph 'knowledge'"), + "{message}" + ); + assert!(message.contains("OG_TEST_MISSING_EMBED_KEY"), "{message}"); +} + #[tokio::test] async fn cluster_boot_wires_policy_bindings_into_cedar_slots() { let temp = tempfile::tempdir().unwrap(); diff --git a/crates/omnigraph/src/db/omnigraph.rs b/crates/omnigraph/src/db/omnigraph.rs index 3d68c3d..48be274 100644 --- a/crates/omnigraph/src/db/omnigraph.rs +++ b/crates/omnigraph/src/db/omnigraph.rs @@ -158,6 +158,17 @@ pub struct Omnigraph { /// `apply_schema_as` consults this field (PR #2 proof-of-concept); /// PR #3 fans the `enforce()` call out to the remaining writers. policy: Option>, + /// Lazily-built, reused-across-queries embedding client. Built on the first + /// `nearest($v, "string")` that needs server-side embedding (so a graph that + /// never embeds needs no provider key), then shared by every later query — + /// avoids the per-query `from_env()` rebuild and keeps the provider HTTP + /// connection pool warm. `OnceCell` guarantees a single initialization. + embedding: Arc>, + /// Optional pre-resolved embedding config (RFC-012 Phase 5), injected from an + /// applied cluster `providers.embedding` profile via [`Omnigraph::with_embedding_config`]. + /// When set, the embedding cell builds its client from this instead of + /// `EmbeddingClient::from_env()`; `None` keeps the env fallback. + embedding_config: Option>, } /// Whether [`Omnigraph::open`] runs the open-time recovery sweep. @@ -321,6 +332,8 @@ impl Omnigraph { write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()), merge_exclusive: Arc::new(tokio::sync::Mutex::new(())), policy: None, + embedding: Arc::new(tokio::sync::OnceCell::new()), + embedding_config: None, }) } @@ -420,6 +433,8 @@ impl Omnigraph { write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()), merge_exclusive: Arc::new(tokio::sync::Mutex::new(())), policy: None, + embedding: Arc::new(tokio::sync::OnceCell::new()), + embedding_config: None, }) } @@ -467,6 +482,29 @@ impl Omnigraph { self } + /// The lazily-initialized, reused-across-queries embedding client cell + /// (see the `embedding` field doc). The query executor resolves the client + /// through this on the first `nearest($v, "string")` that needs embedding. + pub(crate) fn embedding_cell( + &self, + ) -> &tokio::sync::OnceCell { + &self.embedding + } + + /// Install a pre-resolved embedding config (RFC-012 Phase 5). Builder-style, + /// mirroring [`Omnigraph::with_policy`]: a graph served from a cluster + /// embedding provider profile injects it here; an embedded/CLI caller that doesn't + /// call this keeps the `EmbeddingClient::from_env()` fallback. + pub fn with_embedding_config(mut self, config: Arc) -> Self { + self.embedding_config = Some(config); + self + } + + /// The injected embedding config, if any (see the `embedding_config` field). + pub(crate) fn embedding_config_ref(&self) -> Option<&crate::embedding::EmbeddingConfig> { + self.embedding_config.as_deref() + } + /// Engine-layer policy enforcement gate (MR-722 chassis core). /// /// * If no policy is installed → no-op (returns `Ok(())`). diff --git a/crates/omnigraph/src/embedding.rs b/crates/omnigraph/src/embedding.rs index cfd4071..246836c 100644 --- a/crates/omnigraph/src/embedding.rs +++ b/crates/omnigraph/src/embedding.rs @@ -8,29 +8,157 @@ use tokio::time::sleep; use crate::error::{OmniError, Result}; -const GEMINI_EMBED_MODEL: &str = "gemini-embedding-2-preview"; +const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1"; +const DEFAULT_OPENROUTER_MODEL: &str = "openai/text-embedding-3-large"; +const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; +const DEFAULT_OPENAI_MODEL: &str = "text-embedding-3-large"; const DEFAULT_GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta"; +const DEFAULT_GEMINI_MODEL: &str = "gemini-embedding-2"; const DEFAULT_TIMEOUT_MS: u64 = 30_000; const DEFAULT_RETRY_ATTEMPTS: usize = 4; const DEFAULT_RETRY_BACKOFF_MS: u64 = 200; -const QUERY_TASK_TYPE: &str = "RETRIEVAL_QUERY"; -const DOCUMENT_TASK_TYPE: &str = "RETRIEVAL_DOCUMENT"; +const DEFAULT_DEADLINE_MS: u64 = 60_000; +const GEMINI_QUERY_TASK_TYPE: &str = "RETRIEVAL_QUERY"; +const GEMINI_DOCUMENT_TASK_TYPE: &str = "RETRIEVAL_DOCUMENT"; -#[derive(Clone, Debug)] -enum EmbeddingTransport { +/// Which embedding API a client speaks. Each variant owns its request shape, +/// auth, and response parsing; everything else (retry, deadline, normalization, +/// tracing) is provider-independent. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Provider { + /// OpenAI-compatible (`POST {base}/embeddings`, bearer auth, + /// `{model, input, dimensions}`). Covers OpenRouter (the default gateway), + /// OpenAI direct, and self-hosted endpoints (vLLM/Ollama/LM Studio). + OpenAiCompatible, + /// Google Gemini `generativelanguage` (`POST {base}/models/{model}:embedContent`, + /// `x-goog-api-key`), with `RETRIEVAL_QUERY` / `RETRIEVAL_DOCUMENT` task types. + Gemini, + /// Deterministic, offline. No network, no key. Mock, - Gemini { +} + +/// Whether the text being embedded is a search query or a stored document. +/// Only Gemini distinguishes these (`RETRIEVAL_QUERY` vs `RETRIEVAL_DOCUMENT`); +/// OpenAI-compatible providers and Mock produce the identical request for both, +/// which is also the same-space property a query relies on. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum EmbedRole { + Query, + Document, +} + +/// The single source of truth for how embedding text becomes a vector: +/// provider + model + endpoint + key. Resolved once (from env for direct +/// engine/CLI callers, or from an applied cluster `providers.embedding` profile +/// at server boot) and shared by the query path and the offline CLI so stored +/// and query vectors stay same-space by construction. +#[derive(Clone, Debug)] +pub struct EmbeddingConfig { + pub provider: Provider, + pub model: String, + pub base_url: String, + pub api_key: String, +} + +impl EmbeddingConfig { + /// Resolve from the environment. Precedence: + /// 1. `OMNIGRAPH_EMBEDDINGS_MOCK` → Mock. + /// 2. `OMNIGRAPH_EMBED_PROVIDER` (`openai-compatible`|`openai`|`gemini`|`mock`); + /// unset defaults to `openai-compatible` (OpenRouter). + /// 3. `OMNIGRAPH_EMBED_BASE_URL` else the provider default. + /// 4. `OMNIGRAPH_EMBED_MODEL` else the provider default. + /// 5. provider api-key env (`OPENROUTER_API_KEY`/`OPENAI_API_KEY`, or `GEMINI_API_KEY`). + pub fn from_env() -> Result { + if env_flag("OMNIGRAPH_EMBEDDINGS_MOCK") { + return Ok(Self::mock()); + } + + let alias = env_string("OMNIGRAPH_EMBED_PROVIDER"); + if alias.as_deref() == Some("mock") { + return Ok(Self::mock()); + } + + let (provider, default_base, default_model, key_envs) = provider_profile(alias.as_deref())?; + let base_url = env_string("OMNIGRAPH_EMBED_BASE_URL") + .unwrap_or_else(|| default_base.to_string()) + .trim_end_matches('/') + .to_string(); + let model = + env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_else(|| default_model.to_string()); + + let api_key = key_envs.iter().copied().find_map(env_string).ok_or_else(|| { + OmniError::manifest_internal(format!( + "{} is required for the {} embedding provider", + key_envs.join(" or "), + alias.as_deref().unwrap_or("openai-compatible") + )) + })?; + + Ok(Self { + provider, + model, + base_url, + api_key, + }) + } + + /// Build a config from explicit parts — the cluster `providers.embedding` profile path + /// (RFC-012 Phase 5). `provider`/`base_url`/`model` default exactly as + /// `from_env` does (shared `provider_profile`); `api_key` is already resolved + /// (the cluster path resolves a `${NAME}` ref before calling this). + pub fn from_parts( + provider: Option<&str>, + base_url: Option, + model: Option, api_key: String, - base_url: String, - http: Client, - }, + ) -> Result { + if provider == Some("mock") { + // An explicit `model` (e.g. a cluster `providers.embedding` profile) is + // authoritative — it is what the same-space check compares against — + // so honor it; fall back to `mock()`'s env-based model only when the + // caller supplied none. Without this, a profile's `model` is silently + // dropped and the same-space check resolves to OMNIGRAPH_EMBED_MODEL. + let mut config = Self::mock(); + if let Some(model) = model { + config.model = model; + } + return Ok(config); + } + let (provider, default_base, default_model, _key_envs) = provider_profile(provider)?; + let base_url = base_url + .unwrap_or_else(|| default_base.to_string()) + .trim_end_matches('/') + .to_string(); + let model = model.unwrap_or_else(|| default_model.to_string()); + Ok(Self { + provider, + model, + base_url, + api_key, + }) + } + + fn mock() -> Self { + Self { + provider: Provider::Mock, + // Honor OMNIGRAPH_EMBED_MODEL so the same-space check is exercisable + // under mock; the mock vectors themselves don't depend on the model. + model: env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_default(), + base_url: String::new(), + api_key: String::new(), + } + } } #[derive(Clone, Debug)] pub struct EmbeddingClient { + config: EmbeddingConfig, + http: Client, retry_attempts: usize, retry_backoff_ms: u64, - transport: EmbeddingTransport, + /// Total wall-clock budget for one embed call, across all retries + /// (`OMNIGRAPH_EMBED_DEADLINE_MS`). `0` = unbounded. + deadline_ms: u64, } struct EmbedCallError { @@ -58,35 +186,39 @@ struct GoogleErrorBody { message: String, } +#[derive(Debug, Deserialize)] +struct OpenAiEmbeddingResponse { + data: Vec, +} + +#[derive(Debug, Deserialize)] +struct OpenAiEmbeddingDatum { + index: usize, + embedding: Vec, +} + +#[derive(Debug, Deserialize)] +struct OpenAiErrorEnvelope { + error: OpenAiErrorBody, +} + +#[derive(Debug, Deserialize)] +struct OpenAiErrorBody { + message: String, +} + impl EmbeddingClient { pub fn from_env() -> Result { + Self::new(EmbeddingConfig::from_env()?) + } + + pub fn new(config: EmbeddingConfig) -> Result { let retry_attempts = parse_env_usize("OMNIGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS); let retry_backoff_ms = parse_env_u64("OMNIGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS); - - if env_flag("OMNIGRAPH_EMBEDDINGS_MOCK") { - return Ok(Self { - retry_attempts, - retry_backoff_ms, - transport: EmbeddingTransport::Mock, - }); - } - - let api_key = std::env::var("GEMINI_API_KEY") - .ok() - .map(|v| v.trim().to_string()) - .filter(|v| !v.is_empty()) - .ok_or_else(|| { - OmniError::manifest_internal( - "GEMINI_API_KEY is required when nearest() needs a string embedding", - ) - })?; - let base_url = std::env::var("OMNIGRAPH_GEMINI_BASE_URL") - .ok() - .map(|v| v.trim_end_matches('/').to_string()) - .filter(|v| !v.is_empty()) - .unwrap_or_else(|| DEFAULT_GEMINI_BASE_URL.to_string()); + let deadline_ms = + parse_env_u64_allow_zero("OMNIGRAPH_EMBED_DEADLINE_MS", DEFAULT_DEADLINE_MS); let timeout_ms = parse_env_u64("OMNIGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS); let http = Client::builder() .timeout(Duration::from_millis(timeout_ms)) @@ -96,39 +228,36 @@ impl EmbeddingClient { })?; Ok(Self { + config, + http, retry_attempts, retry_backoff_ms, - transport: EmbeddingTransport::Gemini { - api_key, - base_url, - http, - }, + deadline_ms, }) } + pub fn config(&self) -> &EmbeddingConfig { + &self.config + } + #[cfg(test)] fn mock_for_tests() -> Self { - Self { - retry_attempts: DEFAULT_RETRY_ATTEMPTS, - retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS, - transport: EmbeddingTransport::Mock, - } + Self::new(EmbeddingConfig::mock()).expect("mock client builds") } pub async fn embed_query_text(&self, input: &str, expected_dim: usize) -> Result> { - self.embed_text(input, expected_dim, QUERY_TASK_TYPE).await + self.embed_text(input, expected_dim, EmbedRole::Query).await } pub async fn embed_document_text(&self, input: &str, expected_dim: usize) -> Result> { - self.embed_text(input, expected_dim, DOCUMENT_TASK_TYPE) - .await + self.embed_text(input, expected_dim, EmbedRole::Document).await } async fn embed_text( &self, input: &str, expected_dim: usize, - task_type: &'static str, + role: EmbedRole, ) -> Result> { if expected_dim == 0 { return Err(OmniError::manifest_internal( @@ -136,10 +265,71 @@ impl EmbeddingClient { )); } - match &self.transport { - EmbeddingTransport::Mock => Ok(mock_embedding(input, expected_dim)), - EmbeddingTransport::Gemini { .. } => { - self.with_retry(|| self.embed_text_gemini_once(input, expected_dim, task_type)) + let started = std::time::Instant::now(); + let result = self + .run_with_deadline(self.embed_text_inner(input, expected_dim, role)) + .await; + let elapsed_ms = started.elapsed().as_millis() as u64; + + match &result { + Ok(_) => tracing::info!( + target: "omnigraph::embedding", + provider = ?self.config.provider, + model = %self.config.model, + dim = expected_dim, + elapsed_ms, + outcome = "ok", + "embedding succeeded" + ), + Err(err) => tracing::warn!( + target: "omnigraph::embedding", + provider = ?self.config.provider, + model = %self.config.model, + dim = expected_dim, + elapsed_ms, + outcome = "error", + error = %err, + "embedding failed" + ), + } + result + } + + /// Bound the whole embed operation (all retries + backoff) by `deadline_ms`, + /// so a degraded provider can never hang the caller for the full retry + /// envelope. Applies to every embed call (query and document). `0` = + /// unbounded. Embedding has no Lance/manifest side effects, so cancelling the + /// in-flight request future on elapse is safe. + async fn run_with_deadline(&self, fut: F) -> Result> + where + F: Future>>, + { + if self.deadline_ms == 0 { + return fut.await; + } + match tokio::time::timeout(Duration::from_millis(self.deadline_ms), fut).await { + Ok(res) => res, + Err(_elapsed) => Err(OmniError::manifest_internal(format!( + "embedding deadline exceeded after {} ms (provider={:?}, model={})", + self.deadline_ms, self.config.provider, self.config.model + ))), + } + } + + async fn embed_text_inner( + &self, + input: &str, + expected_dim: usize, + role: EmbedRole, + ) -> Result> { + match self.config.provider { + Provider::Mock => Ok(mock_embedding(input, expected_dim)), + Provider::Gemini => { + self.with_retry(|| self.embed_gemini_once(input, expected_dim, role)) + .await + } + Provider::OpenAiCompatible => { + self.with_retry(|| self.embed_openai_once(input, expected_dim)) .await } } @@ -160,6 +350,14 @@ impl EmbeddingClient { if !err.retryable || attempt >= max_attempt { return Err(OmniError::manifest_internal(err.message)); } + tracing::warn!( + target: "omnigraph::embedding", + provider = ?self.config.provider, + model = %self.config.model, + attempt, + error = %err.message, + "embedding attempt failed, retrying" + ); let shift = (attempt - 1).min(10) as u32; let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift); sleep(Duration::from_millis(delay)).await; @@ -168,25 +366,27 @@ impl EmbeddingClient { } } - async fn embed_text_gemini_once( + async fn embed_gemini_once( &self, input: &str, expected_dim: usize, - task_type: &'static str, + role: EmbedRole, ) -> std::result::Result, EmbedCallError> { - let (api_key, base_url, http) = match &self.transport { - EmbeddingTransport::Gemini { - api_key, - base_url, - http, - } => (api_key, base_url, http), - EmbeddingTransport::Mock => unreachable!("mock transport should not call Gemini"), + let task_type = match role { + EmbedRole::Query => GEMINI_QUERY_TASK_TYPE, + EmbedRole::Document => GEMINI_DOCUMENT_TASK_TYPE, }; - let response = http - .post(gemini_endpoint(base_url)) - .header("x-goog-api-key", api_key) - .json(&build_gemini_request(input, expected_dim, task_type)) + let response = self + .http + .post(gemini_endpoint(&self.config.base_url, &self.config.model)) + .header("x-goog-api-key", &self.config.api_key) + .json(&build_gemini_request( + &self.config.model, + input, + expected_dim, + task_type, + )) .send() .await; let response = match response { @@ -205,10 +405,7 @@ impl EmbeddingClient { Ok(body) => body, Err(err) => { return Err(EmbedCallError { - message: format!( - "embedding response read failed (status {}): {}", - status, err - ), + message: format!("embedding response read failed (status {}): {}", status, err), retryable: status.is_server_error() || status.as_u16() == 429, }); } @@ -217,10 +414,7 @@ impl EmbeddingClient { if !status.is_success() { let message = parse_google_error_message(&body).unwrap_or(body); return Err(EmbedCallError { - message: format!( - "embedding request failed with status {}: {}", - status, message - ), + message: format!("embedding request failed with status {}: {}", status, message), retryable: status.is_server_error() || status.as_u16() == 429, }); } @@ -238,19 +432,85 @@ impl EmbeddingClient { } }) } + + async fn embed_openai_once( + &self, + input: &str, + expected_dim: usize, + ) -> std::result::Result, EmbedCallError> { + let response = self + .http + .post(format!("{}/embeddings", self.config.base_url)) + .bearer_auth(&self.config.api_key) + .json(&build_openai_request(&self.config.model, input, expected_dim)) + .send() + .await; + let response = match response { + Ok(response) => response, + Err(err) => { + let retryable = err.is_timeout() || err.is_connect() || err.is_request(); + return Err(EmbedCallError { + message: format!("embedding request failed: {}", err), + retryable, + }); + } + }; + + let status = response.status(); + let body = match response.text().await { + Ok(body) => body, + Err(err) => { + return Err(EmbedCallError { + message: format!("embedding response read failed (status {}): {}", status, err), + retryable: status.is_server_error() || status.as_u16() == 429, + }); + } + }; + + if !status.is_success() { + let message = parse_openai_error_message(&body).unwrap_or(body); + return Err(EmbedCallError { + message: format!("embedding request failed with status {}: {}", status, message), + retryable: status.is_server_error() || status.as_u16() == 429, + }); + } + + let parsed: OpenAiEmbeddingResponse = + serde_json::from_str(&body).map_err(|err| EmbedCallError { + message: format!("embedding response decode failed: {}", err), + retryable: false, + })?; + + // The query path embeds exactly one string, so expect one datum at index 0. + let datum = parsed + .data + .into_iter() + .find(|d| d.index == 0) + .ok_or_else(|| EmbedCallError { + message: "embedding response missing data[0]".to_string(), + retryable: false, + })?; + + validate_and_normalize_embedding(datum.embedding, expected_dim).map_err(|message| { + EmbedCallError { + message, + retryable: false, + } + }) + } } -fn gemini_endpoint(base_url: &str) -> String { +fn gemini_endpoint(base_url: &str, model: &str) -> String { format!( "{}/models/{}:embedContent", base_url.trim_end_matches('/'), - GEMINI_EMBED_MODEL + model ) } -fn build_gemini_request(input: &str, expected_dim: usize, task_type: &'static str) -> Value { +fn build_gemini_request(model: &str, input: &str, expected_dim: usize, task_type: &str) -> Value { json!({ - "model": format!("models/{}", GEMINI_EMBED_MODEL), + "model": format!("models/{}", model), "content": { "parts": [ { @@ -263,6 +523,14 @@ fn build_gemini_request(input: &str, expected_dim: usize, task_type: &'static st }) } +fn build_openai_request(model: &str, input: &str, expected_dim: usize) -> Value { + json!({ + "model": model, + "input": [input], + "dimensions": expected_dim, + }) +} + fn validate_and_normalize_embedding( values: Vec, expected_dim: usize, @@ -298,6 +566,57 @@ fn parse_google_error_message(body: &str) -> Option { .filter(|msg| !msg.trim().is_empty()) } +fn parse_openai_error_message(body: &str) -> Option { + serde_json::from_str::(body) + .ok() + .map(|e| e.error.message) + .filter(|msg| !msg.trim().is_empty()) +} + +/// Map a provider alias to `(provider, default base URL, default model, ordered +/// api-key envs)`. Shared by `from_env` and `from_parts` so both apply identical +/// defaults: `openai-compatible`/unset → the OpenRouter gateway, `openai` → +/// OpenAI's own host. `mock` is handled by callers before this is reached. The +/// `Provider` enum alone would collapse the two openai aliases, so the alias +/// (not the enum) determines the key-env order here. +fn provider_profile( + alias: Option<&str>, +) -> Result<(Provider, &'static str, &'static str, &'static [&'static str])> { + Ok(match alias { + None | Some("openai-compatible") => ( + Provider::OpenAiCompatible, + DEFAULT_OPENROUTER_BASE_URL, + DEFAULT_OPENROUTER_MODEL, + &["OPENROUTER_API_KEY", "OPENAI_API_KEY"], + ), + Some("openai") => ( + Provider::OpenAiCompatible, + DEFAULT_OPENAI_BASE_URL, + DEFAULT_OPENAI_MODEL, + &["OPENAI_API_KEY"], + ), + Some("gemini") => ( + Provider::Gemini, + DEFAULT_GEMINI_BASE_URL, + DEFAULT_GEMINI_MODEL, + &["GEMINI_API_KEY"], + ), + Some(other) => { + return Err(OmniError::manifest_internal(format!( + "unknown embedding provider '{}' (expected openai-compatible|openai|gemini|mock)", + other + ))); + } + }) +} + +fn env_string(name: &str) -> Option { + std::env::var(name) + .ok() + .map(|v| v.trim().to_string()) + .filter(|v| !v.is_empty()) +} + fn parse_env_usize(name: &str, default: usize) -> usize { std::env::var(name) .ok() @@ -314,6 +633,15 @@ fn parse_env_u64(name: &str, default: u64) -> u64 { .unwrap_or(default) } +/// Like [`parse_env_u64`] but accepts `0` as a meaningful value (the deadline +/// uses `0` for "unbounded"). +fn parse_env_u64_allow_zero(name: &str, default: u64) -> u64 { + std::env::var(name) + .ok() + .and_then(|v| v.trim().parse::().ok()) + .unwrap_or(default) +} + fn env_flag(name: &str) -> bool { std::env::var(name) .ok() @@ -395,6 +723,25 @@ mod tests { } } + // Every test that calls `EmbeddingConfig::from_env` clears the full set of + // embedding env vars first so the host environment can't leak in. + const EMBED_ENV: &[&str] = &[ + "OMNIGRAPH_EMBEDDINGS_MOCK", + "OMNIGRAPH_EMBED_PROVIDER", + "OMNIGRAPH_EMBED_BASE_URL", + "OMNIGRAPH_EMBED_MODEL", + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "GEMINI_API_KEY", + ]; + + fn cleared_env(extra: &[(&'static str, Option<&str>)]) -> EnvGuard { + let mut vars: Vec<(&'static str, Option<&str>)> = + EMBED_ENV.iter().map(|n| (*n, None)).collect(); + vars.extend_from_slice(extra); + EnvGuard::set(&vars) + } + #[tokio::test] async fn mock_embeddings_are_deterministic() { let client = EmbeddingClient::mock_for_tests(); @@ -407,18 +754,30 @@ mod tests { } #[test] - fn gemini_request_uses_preview_model_retrieval_query_and_dimension() { - let request = build_gemini_request("alpha", 4, QUERY_TASK_TYPE); - assert_eq!(request["model"], "models/gemini-embedding-2-preview"); - assert_eq!(request["taskType"], QUERY_TASK_TYPE); + fn gemini_request_uses_model_retrieval_query_and_dimension() { + let request = + build_gemini_request("gemini-embedding-2", "alpha", 4, GEMINI_QUERY_TASK_TYPE); + assert_eq!(request["model"], "models/gemini-embedding-2"); + assert_eq!(request["taskType"], GEMINI_QUERY_TASK_TYPE); assert_eq!(request["outputDimensionality"], 4); assert_eq!(request["content"]["parts"][0]["text"], "alpha"); } #[test] fn gemini_document_request_uses_retrieval_document_task_type() { - let request = build_gemini_request("alpha", 4, DOCUMENT_TASK_TYPE); - assert_eq!(request["taskType"], DOCUMENT_TASK_TYPE); + let request = + build_gemini_request("gemini-embedding-2", "alpha", 4, GEMINI_DOCUMENT_TASK_TYPE); + assert_eq!(request["taskType"], GEMINI_DOCUMENT_TASK_TYPE); + } + + #[test] + fn openai_request_uses_model_input_array_and_dimensions() { + let request = build_openai_request("openai/text-embedding-3-large", "alpha", 4); + assert_eq!(request["model"], "openai/text-embedding-3-large"); + assert_eq!(request["input"][0], "alpha"); + assert!(request["input"].is_array()); + assert_eq!(request["dimensions"], 4); + assert!(request.get("taskType").is_none()); } #[test] @@ -475,15 +834,202 @@ mod tests { assert!(err.to_string().contains("do not retry")); } + #[tokio::test] + async fn run_with_deadline_aborts_slow_future() { + let mut client = EmbeddingClient::mock_for_tests(); + client.deadline_ms = 20; + let slow = async { + tokio::time::sleep(Duration::from_secs(5)).await; + Ok(vec![0.0_f32]) + }; + let err = client.run_with_deadline(slow).await.unwrap_err(); + assert!(err.to_string().contains("deadline exceeded")); + } + + #[tokio::test] + async fn run_with_deadline_passes_through_fast_future() { + let client = EmbeddingClient::mock_for_tests(); + let ok = client + .run_with_deadline(async { Ok(vec![1.0_f32, 2.0]) }) + .await + .unwrap(); + assert_eq!(ok, vec![1.0, 2.0]); + } + + #[tokio::test] + async fn run_with_deadline_zero_is_unbounded() { + let mut client = EmbeddingClient::mock_for_tests(); + client.deadline_ms = 0; + let ok = client + .run_with_deadline(async { Ok(vec![3.0_f32]) }) + .await + .unwrap(); + assert_eq!(ok, vec![3.0]); + } + #[test] #[serial] - fn from_env_requires_gemini_api_key_when_not_mocking() { - let _guard = EnvGuard::set(&[ - ("OMNIGRAPH_EMBEDDINGS_MOCK", None), - ("GEMINI_API_KEY", None), - ]); + fn from_env_defaults_to_openai_compatible_openrouter() { + let _guard = cleared_env(&[("OPENROUTER_API_KEY", Some("sk-test"))]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.provider, Provider::OpenAiCompatible); + assert_eq!(config.base_url, DEFAULT_OPENROUTER_BASE_URL); + assert_eq!(config.model, DEFAULT_OPENROUTER_MODEL); + assert_eq!(config.api_key, "sk-test"); + } - let err = EmbeddingClient::from_env().unwrap_err(); - assert!(err.to_string().contains("GEMINI_API_KEY")); + #[test] + #[serial] + fn from_env_openai_alias_uses_openai_host_not_openrouter() { + let _guard = cleared_env(&[ + ("OMNIGRAPH_EMBED_PROVIDER", Some("openai")), + ("OPENAI_API_KEY", Some("k")), + ]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.provider, Provider::OpenAiCompatible); + assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL); // api.openai.com, not OpenRouter + assert_eq!(config.model, DEFAULT_OPENAI_MODEL); // text-embedding-3-large, no openai/ prefix + assert_eq!(config.api_key, "k"); + } + + #[test] + #[serial] + fn from_env_openai_alias_prefers_openai_key_over_openrouter() { + // `openai` targets api.openai.com, so an OpenRouter key must not be sent there. + let _guard = cleared_env(&[ + ("OMNIGRAPH_EMBED_PROVIDER", Some("openai")), + ("OPENROUTER_API_KEY", Some("router")), + ("OPENAI_API_KEY", Some("openai")), + ]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL); + assert_eq!(config.api_key, "openai"); + } + + #[test] + #[serial] + fn from_env_openai_alias_errors_when_only_openrouter_key_is_set() { + let _guard = cleared_env(&[ + ("OMNIGRAPH_EMBED_PROVIDER", Some("openai")), + ("OPENROUTER_API_KEY", Some("router")), + ]); + let err = EmbeddingConfig::from_env().unwrap_err(); + assert!(err.to_string().contains("OPENAI_API_KEY"), "got: {err}"); + } + + #[test] + fn from_parts_applies_provider_defaults_and_overrides() { + let openrouter = EmbeddingConfig::from_parts(None, None, None, "k".to_string()).unwrap(); + assert_eq!(openrouter.provider, Provider::OpenAiCompatible); + assert_eq!(openrouter.base_url, DEFAULT_OPENROUTER_BASE_URL); + assert_eq!(openrouter.model, DEFAULT_OPENROUTER_MODEL); + assert_eq!(openrouter.api_key, "k"); + + let gemini = + EmbeddingConfig::from_parts(Some("gemini"), None, None, "g".to_string()).unwrap(); + assert_eq!(gemini.provider, Provider::Gemini); + assert_eq!(gemini.base_url, DEFAULT_GEMINI_BASE_URL); + + let overridden = EmbeddingConfig::from_parts( + Some("openai"), + Some("https://x/v1/".to_string()), + Some("custom".to_string()), + "k".to_string(), + ) + .unwrap(); + assert_eq!(overridden.base_url, "https://x/v1"); // trailing slash trimmed + assert_eq!(overridden.model, "custom"); + + let err = + EmbeddingConfig::from_parts(Some("cohere"), None, None, "k".to_string()).unwrap_err(); + assert!( + err.to_string().contains("unknown embedding provider"), + "got: {err}" + ); + } + + #[test] + #[serial] + fn from_parts_mock_honors_an_explicit_model() { + // A cluster `providers.embedding` profile that sets `kind: mock, model: X` + // must resolve to model X — it is what the query-time same-space check + // compares against. Env cleared so the assertion isolates the arg. + let _guard = cleared_env(&[]); + let pinned = + EmbeddingConfig::from_parts(Some("mock"), None, Some("recorded-x".to_string()), String::new()) + .unwrap(); + assert_eq!(pinned.provider, Provider::Mock); + assert_eq!(pinned.model, "recorded-x"); + // With no explicit model, mock falls back to its env-based default (here + // empty, since the env is cleared). + let bare = EmbeddingConfig::from_parts(Some("mock"), None, None, String::new()).unwrap(); + assert_eq!(bare.provider, Provider::Mock); + assert_eq!(bare.model, ""); + } + + #[test] + #[serial] + fn from_env_openai_compatible_prefers_openrouter_key() { + let _guard = cleared_env(&[ + ("OPENROUTER_API_KEY", Some("router")), + ("OPENAI_API_KEY", Some("openai")), + ]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.api_key, "router"); + } + + #[test] + #[serial] + fn from_env_explicit_gemini_provider() { + let _guard = cleared_env(&[ + ("OMNIGRAPH_EMBED_PROVIDER", Some("gemini")), + ("GEMINI_API_KEY", Some("g-key")), + ]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.provider, Provider::Gemini); + assert_eq!(config.base_url, DEFAULT_GEMINI_BASE_URL); + assert_eq!(config.model, DEFAULT_GEMINI_MODEL); + assert_eq!(config.api_key, "g-key"); + } + + #[test] + #[serial] + fn from_env_base_url_and_model_overrides_apply() { + let _guard = cleared_env(&[ + ("OMNIGRAPH_EMBED_PROVIDER", Some("openai-compatible")), + ("OMNIGRAPH_EMBED_BASE_URL", Some("https://example.test/v1/")), + ("OMNIGRAPH_EMBED_MODEL", Some("custom/model")), + ("OPENAI_API_KEY", Some("k")), + ]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.base_url, "https://example.test/v1"); // trailing slash trimmed + assert_eq!(config.model, "custom/model"); + } + + #[test] + #[serial] + fn from_env_unknown_provider_errors() { + let _guard = cleared_env(&[("OMNIGRAPH_EMBED_PROVIDER", Some("cohere"))]); + let err = EmbeddingConfig::from_env().unwrap_err(); + assert!(err.to_string().contains("unknown embedding provider")); + } + + #[test] + #[serial] + fn from_env_errors_when_no_key_present() { + let _guard = cleared_env(&[]); + let err = EmbeddingConfig::from_env().unwrap_err(); + assert!(err.to_string().contains("OPENROUTER_API_KEY or OPENAI_API_KEY")); + } + + #[test] + #[serial] + fn from_env_mock_flag_wins() { + let _guard = cleared_env(&[ + ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")), + ("OMNIGRAPH_EMBED_PROVIDER", Some("gemini")), + ]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.provider, Provider::Mock); } } diff --git a/crates/omnigraph/src/exec/query.rs b/crates/omnigraph/src/exec/query.rs index 4c1822f..b12e26b 100644 --- a/crates/omnigraph/src/exec/query.rs +++ b/crates/omnigraph/src/exec/query.rs @@ -2,6 +2,30 @@ use super::*; use super::projection::{apply_filter, apply_ordering, project_return}; +/// Bundles the per-handle embedding client cell with the optional injected +/// config (RFC-012 Phase 5) so the lazy init uses the injected config when +/// present, else `EmbeddingClient::from_env()`. Threaded through the query path +/// in place of the bare cell, preserving laziness (a graph that never embeds +/// builds no client and needs no key). +pub(crate) struct EmbeddingResolver<'a> { + cell: &'a tokio::sync::OnceCell, + config: Option<&'a crate::embedding::EmbeddingConfig>, +} + +impl EmbeddingResolver<'_> { + async fn resolve(&self) -> Result<&EmbeddingClient> { + let config = self.config.cloned(); + self.cell + .get_or_try_init(|| async move { + match config { + Some(cfg) => EmbeddingClient::new(cfg), + None => EmbeddingClient::from_env(), + } + }) + .await + } +} + impl Omnigraph { /// Run a named query against an explicit branch or snapshot target. pub async fn query( @@ -31,7 +55,18 @@ impl Omnigraph { GraphIndexHandle::none() }; - execute_query(&ir, params, &resolved.snapshot, &graph_index, &catalog).await + execute_query( + &ir, + params, + &resolved.snapshot, + &graph_index, + &catalog, + &EmbeddingResolver { + cell: self.embedding_cell(), + config: self.embedding_config_ref(), + }, + ) + .await } /// Run a named query against the graph as it existed at a prior manifest version. @@ -72,7 +107,18 @@ impl Omnigraph { GraphIndexHandle::none() }; - execute_query(&ir, params, &snapshot, &graph_index, &catalog).await + execute_query( + &ir, + params, + &snapshot, + &graph_index, + &catalog, + &EmbeddingResolver { + cell: self.embedding_cell(), + config: self.embedding_config_ref(), + }, + ) + .await } } @@ -102,6 +148,7 @@ async fn extract_search_mode( ir: &QueryIR, params: &ParamMap, catalog: &Catalog, + embedding: &EmbeddingResolver<'_>, ) -> Result { if ir.order_by.is_empty() { return Ok(SearchMode::default()); @@ -114,7 +161,8 @@ async fn extract_search_mode( query, } => { let vec = - resolve_nearest_query_vec(ir, catalog, variable, property, query, params).await?; + resolve_nearest_query_vec(ir, catalog, variable, property, query, params, embedding) + .await?; let k = ir.limit.ok_or_else(|| { OmniError::manifest("nearest() ordering requires a limit clause".to_string()) })? as usize; @@ -157,9 +205,10 @@ async fn extract_search_mode( .unwrap_or(60) as u32; let primary_mode = - extract_sub_search_mode(ir, primary, params, catalog, ir.limit).await?; + extract_sub_search_mode(ir, primary, params, catalog, ir.limit, embedding).await?; let secondary_mode = - extract_sub_search_mode(ir, secondary, params, catalog, ir.limit).await?; + extract_sub_search_mode(ir, secondary, params, catalog, ir.limit, embedding) + .await?; Ok(SearchMode { rrf: Some(RrfMode { @@ -182,6 +231,7 @@ async fn extract_sub_search_mode( params: &ParamMap, catalog: &Catalog, limit: Option, + embedding: &EmbeddingResolver<'_>, ) -> Result { match expr { IRExpr::Nearest { @@ -190,7 +240,8 @@ async fn extract_sub_search_mode( query, } => { let vec = - resolve_nearest_query_vec(ir, catalog, variable, property, query, params).await?; + resolve_nearest_query_vec(ir, catalog, variable, property, query, params, embedding) + .await?; let k = limit.unwrap_or(100) as usize; Ok(SearchMode { nearest: Some((variable.clone(), property.clone(), vec, k)), @@ -229,15 +280,34 @@ async fn resolve_nearest_query_vec( property: &str, expr: &IRExpr, params: &ParamMap, + embedding: &EmbeddingResolver<'_>, ) -> Result> { let lit = resolve_literal_or_param(expr, params)?; match lit { Literal::List(_) => literal_to_f32_vec(&lit), Literal::String(text) => { - let expected_dim = nearest_property_dimension(ir, catalog, variable, property)?; - EmbeddingClient::from_env()? - .embed_query_text(&text, expected_dim) - .await + let (expected_dim, recorded_model) = + nearest_property_dim_and_model(ir, catalog, variable, property)?; + // Lazily resolve the per-handle client once, then reuse it across + // queries (keeps the provider connection pool warm); a graph that + // never embeds never builds a client and needs no provider key. + let client = embedding.resolve().await?; + // Same-space guarantee: if the property recorded the model that + // produced its stored vectors (`@embed("…", model="…")`), the query + // embedder must resolve to that same model — otherwise the comparison + // is across vector spaces. Reject loudly instead of ranking garbage. + if let Some(recorded) = &recorded_model { + let resolved = &client.config().model; + if resolved != recorded { + return Err(OmniError::manifest(format!( + "nearest() on '{property}': its stored vectors were embedded with model \ + '{recorded}', but the query embedder resolves to '{resolved}'. Set \ + OMNIGRAPH_EMBED_MODEL='{recorded}' (and the matching provider) or re-embed \ + the stored vectors." + ))); + } + } + client.embed_query_text(&text, expected_dim).await } _ => Err(OmniError::manifest( "nearest query must be a string or list of floats".to_string(), @@ -279,12 +349,14 @@ fn literal_to_f32_vec(lit: &Literal) -> Result> { } } -fn nearest_property_dimension( +/// Resolve the nearest() target property's vector dimension and the embedding +/// model recorded for it via `@embed("…", model="…")` (`None` if unrecorded). +fn nearest_property_dim_and_model( ir: &QueryIR, catalog: &Catalog, variable: &str, property: &str, -) -> Result { +) -> Result<(usize, Option)> { let type_name = resolve_binding_type_name(&ir.pipeline, variable).ok_or_else(|| { OmniError::manifest_internal(format!( "nearest() variable '${}' is not bound to a node type in the lowered pipeline", @@ -303,13 +375,20 @@ fn nearest_property_dimension( type_name, property )) })?; - match prop.scalar { - ScalarType::Vector(dim) if !prop.list => Ok(dim as usize), - _ => Err(OmniError::manifest_internal(format!( - "nearest() property '{}.{}' is not a scalar vector", - type_name, property - ))), - } + let dim = match prop.scalar { + ScalarType::Vector(dim) if !prop.list => dim as usize, + _ => { + return Err(OmniError::manifest_internal(format!( + "nearest() property '{}.{}' is not a scalar vector", + type_name, property + ))); + } + }; + let recorded_model = node_type + .embed_sources + .get(property) + .and_then(|embed| embed.model.clone()); + Ok((dim, recorded_model)) } fn resolve_binding_type_name<'a>(pipeline: &'a [IROp], variable: &str) -> Option<&'a str> { @@ -341,8 +420,9 @@ pub async fn execute_query( snapshot: &Snapshot, graph_index: &GraphIndexHandle<'_>, catalog: &Catalog, + embedding: &EmbeddingResolver<'_>, ) -> Result { - let search_mode = extract_search_mode(ir, params, catalog).await?; + let search_mode = extract_search_mode(ir, params, catalog, embedding).await?; // RRF requires forked execution if let Some(ref rrf) = search_mode.rrf { diff --git a/crates/omnigraph/tests/search.rs b/crates/omnigraph/tests/search.rs index 480ec3c..425c51b 100644 --- a/crates/omnigraph/tests/search.rs +++ b/crates/omnigraph/tests/search.rs @@ -60,6 +60,15 @@ query hybrid_search_string($vq: String, $tq: String) { limit 3 } "#; +// Same shape as MOCK_SEARCH_SCHEMA but the vector records the model that +// produced its stored vectors, opting into the query-time same-space check. +const MODEL_RECORDED_SCHEMA: &str = r#" +node Doc { + slug: String @key + title: String @index + embedding: Vector(4) @embed("title", model="test-model-a") @index +} +"#; const SEARCH_MUTATIONS: &str = r#" query insert_doc($slug: String, $title: String, $body: String, $embedding: Vector(4)) { insert Doc { @@ -89,6 +98,15 @@ async fn init_mock_embedding_search_db(dir: &tempfile::TempDir) -> Omnigraph { db } +async fn init_model_recorded_search_db(dir: &tempfile::TempDir) -> Omnigraph { + let uri = dir.path().to_str().unwrap(); + let mut db = Omnigraph::init(uri, MODEL_RECORDED_SCHEMA).await.unwrap(); + load_jsonl(&mut db, &mock_embedding_seed_data(), LoadMode::Overwrite) + .await + .unwrap(); + db +} + fn mock_embedding_seed_data() -> String { [ ("alpha-doc", "alpha guide", mock_embedding("alpha", 4)), @@ -510,9 +528,14 @@ async fn explicit_vector_nearest_does_not_require_gemini_credentials() { #[tokio::test] #[serial] -async fn string_nearest_requires_gemini_credentials_when_mock_is_disabled() { +async fn string_nearest_requires_provider_credentials_when_mock_is_disabled() { + // With mock off and no provider key, the default (openai-compatible) + // provider fails loudly rather than silently producing garbage vectors. let _guard = EnvGuard::set(&[ ("OMNIGRAPH_EMBEDDINGS_MOCK", None), + ("OMNIGRAPH_EMBED_PROVIDER", None), + ("OPENROUTER_API_KEY", None), + ("OPENAI_API_KEY", None), ("GEMINI_API_KEY", None), ]); @@ -528,7 +551,105 @@ async fn string_nearest_requires_gemini_credentials_when_mock_is_disabled() { .await .unwrap_err(); - assert!(err.to_string().contains("GEMINI_API_KEY")); + assert!( + err.to_string() + .contains("OPENROUTER_API_KEY or OPENAI_API_KEY"), + "unexpected error: {err}" + ); +} + +#[tokio::test] +#[serial] +async fn nearest_string_passes_when_query_model_matches_recorded_model() { + let _guard = EnvGuard::set(&[ + ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")), + ("OMNIGRAPH_EMBED_MODEL", Some("test-model-a")), + ("OMNIGRAPH_EMBED_PROVIDER", None), + ("OPENROUTER_API_KEY", None), + ("OPENAI_API_KEY", None), + ("GEMINI_API_KEY", None), + ]); + + let dir = tempfile::tempdir().unwrap(); + let mut db = init_model_recorded_search_db(&dir).await; + + let result = query_main( + &mut db, + MOCK_SEARCH_QUERIES, + "vector_search_string", + ¶ms(&[("$q", "alpha")]), + ) + .await + .unwrap(); + + assert_eq!(result_slugs(&result)[0], "alpha-doc"); +} + +#[tokio::test] +#[serial] +async fn nearest_string_errors_when_query_model_differs_from_recorded_model() { + let _guard = EnvGuard::set(&[ + ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")), + ("OMNIGRAPH_EMBED_MODEL", Some("test-model-b")), + ("OMNIGRAPH_EMBED_PROVIDER", None), + ("OPENROUTER_API_KEY", None), + ("OPENAI_API_KEY", None), + ("GEMINI_API_KEY", None), + ]); + + let dir = tempfile::tempdir().unwrap(); + let mut db = init_model_recorded_search_db(&dir).await; + + let err = query_main( + &mut db, + MOCK_SEARCH_QUERIES, + "vector_search_string", + ¶ms(&[("$q", "alpha")]), + ) + .await + .unwrap_err(); + + // The error must name both the recorded model and the resolved one. + let msg = err.to_string(); + assert!(msg.contains("test-model-a"), "got: {msg}"); + assert!(msg.contains("test-model-b"), "got: {msg}"); +} + +#[tokio::test] +#[serial] +async fn injected_embedding_config_is_used_instead_of_env() { + // No mock flag and no provider keys in env, so `from_env()` would error. + // Injecting a Mock config proves the resolver uses the injected config + // (RFC-012 Phase 5), and its model satisfies the recorded same-space check. + let _guard = EnvGuard::set(&[ + ("OMNIGRAPH_EMBEDDINGS_MOCK", None), + ("OMNIGRAPH_EMBED_PROVIDER", None), + ("OMNIGRAPH_EMBED_MODEL", None), + ("OPENROUTER_API_KEY", None), + ("OPENAI_API_KEY", None), + ("GEMINI_API_KEY", None), + ]); + + let dir = tempfile::tempdir().unwrap(); + let mut db = init_model_recorded_search_db(&dir) + .await + .with_embedding_config(std::sync::Arc::new(omnigraph::embedding::EmbeddingConfig { + provider: omnigraph::embedding::Provider::Mock, + model: "test-model-a".to_string(), + base_url: String::new(), + api_key: String::new(), + })); + + let result = query_main( + &mut db, + MOCK_SEARCH_QUERIES, + "vector_search_string", + ¶ms(&[("$q", "alpha")]), + ) + .await + .unwrap(); + + assert_eq!(result_slugs(&result)[0], "alpha-doc"); } // ─── BM25 search ──────────────────────────────────────────────────────────── diff --git a/docs/dev/execution.md b/docs/dev/execution.md index 237a7af..e9ac9eb 100644 --- a/docs/dev/execution.md +++ b/docs/dev/execution.md @@ -177,4 +177,4 @@ For all three modes, a mid-load failure (RI / cardinality violation, validation ## Embeddings during load -If a node type has `@embed` properties, the loader calls the engine embedding client (Gemini, RETRIEVAL_DOCUMENT) per row to populate the vector column. See [embeddings.md](../user/search/embeddings.md). +The loader does **not** embed `@embed` properties at load time. `@embed` is a catalog annotation consumed by query typecheck/lint; vectors are supplied directly in the load data, or pre-filled by the offline `omnigraph embed` pipeline. Query-time `nearest($v, "string")` auto-embeds the query string via the provider-independent embedding client. See [embeddings.md](../user/search/embeddings.md). (Ingest-time `@embed` execution is a planned RFC-012 phase.) diff --git a/docs/dev/index.md b/docs/dev/index.md index 8f4cf86..a0a6afb 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -81,6 +81,7 @@ Working documents for in-flight feature work. Removed when the work lands. | Unify CLI embedded/remote access paths — parity referee, shared wire-DTO crate, `GraphClient` trait, declared plane capabilities | [rfc-009-unify-access-paths.md](rfc-009-unify-access-paths.md) | | Restructure the CLI around explicit planes — one graph-addressing model, declared capability surface, plane-grouped help (expands RFC-009 Phase 4) | [rfc-010-cli-planes-restructure.md](rfc-010-cli-planes-restructure.md) | | CLI refactoring — one addressing & config model post-`omnigraph.yaml`: scope + `--graph` + derived access path, served-default / privileged-direct, profiles, named queries, capability classifier (completes RFC-008) | [rfc-011-cli-refactoring.md](rfc-011-cli-refactoring.md) | +| Provider-independent embedding configuration — one resolved `EmbeddingConfig` + sealed provider enum (Gemini/OpenAI/Mock), identity recorded in the schema IR, query-time same-space validation, NFR floor | [rfc-012-embedding-provider-config.md](rfc-012-embedding-provider-config.md) | ## Boundary diff --git a/docs/dev/rfc-012-embedding-provider-config.md b/docs/dev/rfc-012-embedding-provider-config.md new file mode 100644 index 0000000..45083a2 --- /dev/null +++ b/docs/dev/rfc-012-embedding-provider-config.md @@ -0,0 +1,295 @@ +# RFC: Provider-Independent Embedding Configuration + +**Status:** Accepted — Phases 1-5 implemented +**Date:** 2026-06-15 +**Builds on:** the engine embedding client (`crates/omnigraph/src/embedding.rs`), the `@embed` catalog +annotation (`omnigraph-compiler/src/catalog`), the cluster `providers.embedding` surface +([cluster-config-specs.md](cluster-config-specs.md), [rfc-007-operator-config.md](rfc-007-operator-config.md) +for the secret-resolution pattern). +**Target release:** staged — NFR floor first, then the provider-independent config core; ingest-time `@embed` +execution is a separate later phase. + +## Summary + +OmniGraph's embedding subsystem is **hardwired to a single provider (Google Gemini)** and has no recorded +link between the model that produced a stored vector and the model that embeds a query string. Today that +happens to be self-consistent (one live client embeds both sides), but it is consistent by accident, not by +construction: the provider is hardcoded, the model is a moving `-preview` target, nothing validates that a +query vector and a stored vector share a space, and the one configurable knob (key + base URL) cannot change +the provider or model. + +This RFC makes embedding **provider-independent**: one resolved `EmbeddingConfig { provider, model, base_url, +api_key, dim, normalize }` behind a sealed provider abstraction, resolved once and shared by every embedder. +The **primary variant is OpenAI-compatible** — a single request/response shape (`POST {base}/embeddings`, +`{model, input, dimensions}`) that covers **OpenRouter** (the recommended default gateway, one key for Gemini, +OpenAI, Mistral, BGE, Qwen, sentence-transformers, …), OpenAI direct, and any self-hosted OpenAI-compatible +endpoint (vLLM, Ollama, LM Studio, Together). A native **Gemini** (`generativelanguage`) variant is retained +for shops that want to hit Google directly with its `RETRIEVAL_QUERY`/`RETRIEVAL_DOCUMENT` task-type +asymmetry, plus a deterministic **Mock**. The embedding *identity* (provider + model + dim) is recorded in the +schema IR so it travels with the data, and a query whose resolved embedder cannot match the stored vectors' +recorded identity is **rejected with a typed error instead of silently ranking across vector spaces.** +Provider/endpoint wiring lands on the already-reserved cluster `providers.embedding` field; secrets follow the +existing operator-credential pattern; no secret ever enters the schema. + +This RFC supersedes the framing in `docs/user/search/embeddings.md` that described "two embedding clients +with different defaults" — one of those clients was dead code with zero callers and has been removed (see +Phase 1); the OpenAI request shape returns as a first-class *provider variant* of the one client, not as a +second parallel client. + +## Motivation + +This work originated in an external handoff that reported a live cross-provider bug: gemini-3072 stored +vectors compared against OpenAI-1536 query vectors, silently. Investigation against the current source showed +the reported mechanism is **inaccurate** — the OpenAI client it blamed (`omnigraph-compiler/src/embedding.rs`) +was `pub(crate)`, `#![allow(dead_code)]`, and had **zero callers**; the live `nearest("string")` path and the +offline `omnigraph embed` CLI both use the engine **Gemini** client; and `@embed` does no ingest-time +embedding at all. So the documented happy path is self-consistent. But the investigation surfaced four real +problems the handoff's instincts correctly smelled: + +- **P1 — Provider is hardwired.** The one live client builds Google `generativelanguage` requests; only key + + base URL are configurable, not the provider or model. A non-Gemini shop cannot use `nearest("string")` + without a Gemini key, and cannot make it produce non-Gemini vectors. If they store their own vectors and + query with `nearest("string")`, the query is embedded with Gemini → a silent cross-space ranking. This is + the handoff's failure, reached by a different cause. +- **P2 — A dead, divergent second client + stale docs** invited exactly the misdiagnosis the handoff made. +- **P3 — No same-space guarantee recorded with the data.** Nothing stamps which model/dim produced a stored + vector, so write-side and read-side embedders can drift with no validation. +- **P4 — `@embed` is declarative-in-name-only.** It records a source property for the typechecker but never + embeds at ingest; the docs claimed otherwise. + +Per the project's first principle, the lower-liability shape is **one provider-independent client with the +identity recorded next to the data**, not N independently-defaulted clients kept in lockstep by discipline. +Hardcoding one provider mortgages every future "we need OpenAI / a local model / Vertex" against a rewrite; +recording identity once closes the silent-wrong-results class by construction. + +## Current state — which API we actually use + +| | Live engine client (`crates/omnigraph/src/embedding.rs`) | Deleted dead client (was `omnigraph-compiler/src/embedding.rs`) | +|---|---|---| +| Provider | **Google Gemini Developer API** (`generativelanguage`, *not* Vertex AI) | OpenAI | +| Endpoint | `POST {base}/models/{model}:embedContent` | `POST {base}/embeddings` | +| Auth | header `x-goog-api-key`, env `GEMINI_API_KEY` | `Authorization: Bearer`, env `OPENAI_API_KEY` | +| Model | `gemini-embedding-2-preview` (hardcoded) | `text-embedding-3-small` (env `NANOGRAPH_EMBED_MODEL`) | +| Base default | `https://generativelanguage.googleapis.com/v1beta` | `https://api.openai.com/v1` | +| Request body | `{model, content:{parts:[{text}]}, taskType, outputDimensionality}` | `{model, input:[…], dimensions}` | +| Response | `{embedding:{values:[f32]}}` | `{data:[{index, embedding:[f32]}]}` | +| Task types | `RETRIEVAL_QUERY` / `RETRIEVAL_DOCUMENT` | none | +| Status | **live** — used by `nearest("string")` and `omnigraph embed` | **removed in Phase 1** (zero callers) | + +Both shapes honour a requested output dimensionality (Gemini `outputDimensionality`, OpenAI `dimensions`) +driven by the target column width, so dimension is already schema-driven. The two known shapes are exactly the +two initial provider variants this RFC defines — the OpenAI shape returns from git history as a `Provider` +variant of the single client. + +## Guide-level explanation + +### Configuring a provider (operator view) + +Pick a provider for the graph in `cluster.yaml` (the team-owned surface), referencing a secret by name. The +recommended default routes through OpenRouter (OpenAI-compatible, one key for many models): + +```yaml +providers: + embedding: + default: + kind: openai-compatible # openai-compatible | gemini | mock + base_url: https://openrouter.ai/api/v1 + model: google/gemini-embedding-2 # or openai/text-embedding-3-large, mistralai/mistral-embed, … + api_key: ${OPENROUTER_API_KEY} +graphs: + knowledge: + schema: knowledge.pg + embedding_provider: default +``` + +The same `openai-compatible` kind points at OpenAI direct (`base_url: https://api.openai.com/v1`, +`model: text-embedding-3-large`) or a self-hosted endpoint (vLLM/Ollama/LM Studio) by changing `base_url`. Use +`kind: gemini` only to reach Google's `generativelanguage` API directly (it keeps the query/document +task-type asymmetry that the OpenAI-compatible shape does not expose). Dimensions are schema-driven by the +target `Vector(N)` column, not duplicated in the provider profile. + +The zero-config tier keeps working with env only (`OMNIGRAPH_EMBED_PROVIDER`, `OMNIGRAPH_EMBED_BASE_URL`, +`OMNIGRAPH_EMBED_MODEL`, and the provider api-key env — `OPENROUTER_API_KEY` / `OPENAI_API_KEY` / +`GEMINI_API_KEY`), so no cluster file is required for a single-graph setup. + +### Recording identity in the schema + +`@embed` grows optional arguments that pin the embedding identity to the vector column: + +```pg +node Doc { + slug: String @key + text: String + v: Vector(3072) @embed("text", model="gemini-embedding-2", dim=3072) @index +} +``` + +The single-argument form `@embed("text")` keeps working unchanged. The recorded identity persists in the +schema IR (`_schema.ir.json`) and so travels with `schema apply` and `schema show`. + +### What a mismatch looks like + +If the resolved read-side embedder cannot produce the recorded identity (wrong model, wrong dim, wrong +provider), `nearest($v, "string")` fails with a typed error naming both sides, instead of returning a +plausible-but-meaningless ranking. Changing the recorded identity on an existing column is a loud schema-apply +refusal (it is a re-embed, a deliberate migration step), reusing the migration planner's existing +annotation-change rejection. + +## Reference-level design + +### One client, sealed provider abstraction + +Replace the two-variant `EmbeddingTransport` with a resolved config plus a sealed provider enum: + +```text +EmbeddingConfig { provider: Provider, model, base_url, api_key, dim, normalize } +enum Provider { + OpenAiCompatible, // POST {base}/embeddings, Bearer auth, {model, input, dimensions} → {data:[{embedding,index}]} + // covers OpenRouter (default gateway), OpenAI direct, vLLM/Ollama/LM Studio/Together + Gemini, // POST {base}/models/{model}:embedContent, x-goog-api-key, with RETRIEVAL_QUERY/DOCUMENT task types + Mock, // deterministic, offline +} +struct EmbeddingClient { config, http, retry, deadline } +``` + +`Provider` owns the per-API differences (endpoint suffix, auth header, request JSON, response JSON, task-type +support); the client owns retry/backoff, the deadline, normalization, and tracing — all provider-independent. +**OpenRouter is not a distinct variant** — it is `OpenAiCompatible` with `base_url = +https://openrouter.ai/api/v1`, which is the point: one OpenAI-compatible shape gives provider-independence +across every model OpenRouter fronts, so the gateway does the multi-provider fan-out and OmniGraph carries one +request shape. The native `Gemini` variant exists only for direct-to-Google with task-type asymmetry. An enum +(not a trait) is the earned complexity for this small, first-party set; if third-party plug-in providers are +ever needed, the enum becomes a trait behind the same `EmbeddingConfig` surface without touching callers. + +The OpenAI-compatible `input` accepts an **array**, giving batch embedding for free — which the later +ingest phase needs for throughput, and which removes the open dependency on Gemini's native +`batchEmbedContents`. + +### Config resolution (resolved once, shared) + +Precedence, highest first for served cluster graphs: applied cluster `providers.embedding.` profile → +env (`OMNIGRAPH_EMBED_*`, provider api-key env) → built-in defaults. The cluster `api_key` value is a +`${NAME}` env reference resolved at server boot; plaintext never lives in the schema, state ledger, or any +checked-in file. Resolution happens once per graph handle; the resolved client is shared by +`nearest("string")`. Direct single-graph serving, embedded callers, and the offline CLI keep the env path +unless they inject an `EmbeddingConfig` directly. + +### Identity recorded in the schema IR (not a new store) + +The `@embed` args serialize into `PropertyIR.annotations` → `_schema.ir.json`, which `schema apply` already +persists atomically and which the catalog (the one thing `nearest()` reads at query time) is built from. No +new metadata store, no manifest column, no extra read on the query path. The migration planner already rejects +non-description annotation changes as `UnsupportedChange`, so "recorded identity is immutable without a +deliberate re-embed migration" is the default behaviour, not new code. (A second, optional copy in Lance +field metadata — co-located with the vectors — is available later by activating the currently no-op +`UpdatePropertyMetadata` migration step; out of scope here.) + +### Query-time validation + +`resolve_nearest_query_vec` compares the resolved read-side identity against the column's recorded identity +before embedding; on mismatch it returns a typed `OmniError` naming recorded vs resolved (model, dim, +provider). This is the only behaviour that closes P3 by construction. + +### NFR floor (independent of the provider work) + +- **Deadline:** wrap every embed call (query or document) in a total-operation deadline + (`OMNIGRAPH_EMBED_DEADLINE_MS`) so a degraded provider cannot hang the caller for the current ~121 s worst + case (4 × 30 s timeout + backoff). +- **Observability:** `tracing` span per embed call (provider, model, dim, attempts, outcome, elapsed; `warn!` + per retry; token usage when the provider returns it). The subsystem has zero instrumentation today. +- **Single normalization:** one `normalize_vector` (the dead client carried a divergent second copy; removed + in Phase 1). +- **Stable model:** make the model configurable and default to a stable (non-`-preview`) model once the GA + name is confirmed. + +### Ingest-time `@embed` (later phase, not this RFC's core) + +Making `@embed` embed at ingest is a separate phase with a hard constraint: embedding is a slow, external, +**non-idempotent** side effect, so it must run **entirely before staging** — in the pure in-memory phase, +before any `stage_*`/Lance HEAD move, alongside the existing constraint validation — so a mid-load provider +failure aborts with zero drift. It must never sit inside or after the commit protocol, because the recovery +sweep cannot re-run or undo an external embedding. It also needs a content-hash skip (so `load --mode +overwrite` does not re-bill every row), batching, and a bounded-concurrency stage. Specified here only to fix +the design constraint; deferred to its own RFC/phase. + +### Phasing (implementation order) + +| Phase | Scope | Demo | +|---|---|---| +| **1 — NFR floor + dead-client removal** | deadline, observability, single normalize, configurable model, delete dead client + `NANOGRAPH_*` | a hung provider fails at the deadline; embed calls traced; `rg NANOGRAPH_` empty | +| **2 — Provider-independent config** | `EmbeddingConfig` + `Provider` enum (OpenAiCompatible covering OpenRouter/OpenAI/local, Gemini, Mock); env-first resolution; client reuse | point `base_url` at OpenRouter, run `nearest("string")`, get correct neighbours vs OpenRouter-stored vectors; CLI shares the config | +| **3 — Record identity in schema IR** | `@embed` args grammar + catalog + IR persistence | `schema show` reflects recorded model/dim | +| **4 — Query-time validation** | compare resolved vs recorded; typed error; planner refusal on identity change | stored model A vs read model B → loud error, never silent garbage | +| **5 — Cluster provider wiring** | `providers.embedding` resources; `graphs..embedding_provider`; `${NAME}` resolution at server boot | provider profile resolved from applied cluster state; legacy `omnigraph.yaml` untouched | +| later | ingest-time `@embed` (Shape C) | separate RFC | + +**Status:** Phases 1–5 are implemented (`@embed("…", model="…")` is recorded in the schema IR and validated at +query time with a typed same-space error; an unrecorded `@embed` keeps working with no check; cluster-served +graphs can bind an applied `providers.embedding` profile). Ingest-time `@embed` remains. + +## Invariants & deny-list check + +- **Invariant 9 (integrity failures are loud):** strengthened — query-time identity mismatch becomes a typed + error instead of silent wrong results. +- **Invariant 10 (query semantics are first-class IR concepts):** embedding identity becomes IR/catalog data, + not an out-of-band env guess. +- **Invariant 11 (transport stays at the boundary):** strengthened — Phase 1 removes the HTTP client + async + runtime (`reqwest`, `tokio`) from `omnigraph-compiler`, whose own manifest advertises "Zero Lance + dependency"; the embedding HTTP client lives only in the engine. +- **Invariant 12 / secret handling:** api-keys resolve through the existing credential chain; never in schema + or checked-in config. +- **Invariant 13 (bounded & observable):** addressed — the deadline bounds latency; tracing makes the + subsystem observable. +- **Deny-list — "silent fallback / dropped rows":** the cross-space ranking is exactly a silent-wrong-result; + this RFC closes it. +- **Deny-list — "new write paths that advance Lance HEAD before manifest publish without a recovery + sidecar":** the ingest phase (deferred) explicitly keeps embedding *before* staging, so it does not create a + new HEAD-advancing write path. No invariant is weakened. + +## Drawbacks & alternatives + +- **Do nothing.** The happy path works today, so the live risk is narrow (P1 + P3). But the provider hardwiring + and missing validation are a latent silent-wrong-results class that bites the first non-Gemini user. +- **Interim env-only provider switch (no schema record).** Cheaper, but leaves the same-space guarantee to + operator discipline (fails P3). Folded in as Phase 2's env-first resolution, with Phases 3–4 adding the + record/validate guarantee. +- **Trait-based provider plug-ins now.** Rejected as unearned complexity for two first-party providers; the + enum upgrades to a trait behind the same surface if needed. +- **Stamp identity in the manifest or Lance field metadata instead of the IR.** The manifest is the wrong + granularity; field metadata needs net-new wiring and a query-path dataset open. The IR is where `@embed` + already lives and is already read at query time (see spike). + +## Reversibility + +Mostly reversible. Phases 1–2 and 5 are code/config (env, CLI, cluster keys) and cheap to undo. Phase 3 +(recording identity in the schema IR) is **near-permanent** — it changes the on-disk `_schema.ir.json` shape +and the schema hash — so it earns the most scrutiny: the single-arg `@embed` form stays byte-compatible, and +recorded identity is additive (absent identity = today's behaviour). Provider request/response shapes are +external API contracts, not our format, so adding providers is reversible. + +## Gateway tradeoff (OpenRouter) + +Routing through OpenRouter (the default) buys provider-independence with one key and one billing relationship, +batch input, and access to the GA `google/gemini-embedding-2`. Costs to accept, all controllable: + +- **Extra network hop** → more query-path latency. The Phase-1 deadline bounds it; the cache mitigates repeats. +- **Text transits a third party.** OpenRouter's `provider: { data_collection }` routing preference controls + retention; shops with strict residency requirements use `kind: gemini`/`openai-compatible` pointed at the + provider (or a self-hosted endpoint) directly instead of the gateway. Provider-independence means this is a + config change, not a code change. +- **Loses Gemini's task-type asymmetry** when Gemini is reached via the OpenAI-compatible gateway (both sides + embed symmetrically). This is a retrieval-quality cost, **not** a same-space correctness cost — both stored + and query vectors take the identical path, so they stay in one space by construction. Shops that want the + asymmetry use `kind: gemini`. + +## Unresolved questions + +- GA Gemini model name — **resolved:** `google/gemini-embedding-2` (via OpenRouter) / `gemini-embedding-2` + (direct), 128–3072 dims (recommended 768/1536/3072). Default flips off `-preview` in Phase 2. +- Gemini `batchEmbedContents` availability — **moot** when going through the OpenAI-compatible gateway (its + `input` array batches); still relevant only for the direct `kind: gemini` path. +- Identity granularity: per-vector-property args vs one graph-level default profile referenced by name. +- Whether to backfill recorded identity for existing graphs, or treat absent-identity as "unvalidated, legacy" + permanently. +- Default model for the zero-config tier: `google/gemini-embedding-2` vs `openai/text-embedding-3-large` + (both 3072-capable) — pick the project default. diff --git a/docs/user/clusters/config.md b/docs/user/clusters/config.md index d5016b3..8f8caf4 100644 --- a/docs/user/clusters/config.md +++ b/docs/user/clusters/config.md @@ -12,9 +12,9 @@ that ledger, manually remove a held local state lock by exact lock id, and catalog writes, **graph creation** (a declared graph that does not exist yet is initialized by apply at the derived root), **schema updates** (soft drops only), and — behind an explicit, digest-bound **approval** — **graph -deletion**. It does not perform data-loss schema migrations or start servers: -a separate `omnigraph-server --cluster ` serves the applied revision on -its next (re)start. +deletion**. It does not perform data-loss schema migrations, start servers, +or run data loads. A server can boot from the applied ledger with +`omnigraph-server --cluster `. ## Commands @@ -56,7 +56,7 @@ The exact contract: ## Supported `cluster.yaml` -Stage 3A accepts only this resource subset: +The current config surface accepts this resource subset: ```yaml version: 1 @@ -67,9 +67,18 @@ state: backend: cluster lock: true +providers: + embedding: + default: + kind: openai-compatible + base_url: https://openrouter.ai/api/v1 + model: openai/text-embedding-3-large + api_key: ${OPENROUTER_API_KEY} + graphs: knowledge: schema: knowledge.pg + embedding_provider: default queries: queries/ # discover every `query ` in queries/*.gq policies: @@ -98,6 +107,17 @@ updates all of its queries together. Paths are relative to the config directory — the cluster is one explicit folder, so no `./` prefixes are needed. +`providers.embedding.` defines a query-time embedding provider profile +for cluster-served graphs. A graph opts in with `embedding_provider: `; +bare names normalize to `provider.embedding.`. Supported provider +`kind` values are `openai-compatible` (default/OpenRouter-compatible), +`openai` (OpenAI's own host), `gemini`, and `mock`. Real providers require +`api_key: ${ENV_VAR}`; inline secrets are rejected. The env var is resolved +only when a `--cluster` server boots, so `cluster validate`, `plan`, and +`apply` do not need deployment secrets. `mock` is deterministic and does not +require `api_key`. Vector dimensions stay schema-driven by the target +`Vector(N)` column, not the provider profile. + `storage:` (optional) is the **storage root URI** for everything the cluster stores — the state ledger, lock, content-addressed catalog, recovery sidecars, approval artifacts, and the derived graph roots @@ -132,10 +152,12 @@ operation is active. - stored-query parsing and query-name matching - stored-query type-checking against the desired schema - policy `applies_to` graph references +- embedding provider profiles and graph `embedding_provider` references -Fields reserved for later phases, such as `pipelines`, `embeddings`, `ui`, -`aliases`, and `bindings`, fail with a typed diagnostic instead of being -silently ignored. +Fields reserved for later phases, such as `pipelines`, top-level +`embeddings`, `ui`, `aliases`, and `bindings`, fail with a typed diagnostic +instead of being silently ignored. Under `providers`, only `embedding` is +supported today; other provider namespaces fail as unsupported config. ## Planning @@ -155,9 +177,21 @@ resource is planned as a create. If present, the file must use this shape: "applied_revision": { "config_digest": "...", "resources": { - "graph.knowledge": { "digest": "..." }, "schema.knowledge": { "digest": "..." }, "query.knowledge.find_experts": { "digest": "..." }, + "provider.embedding.default": { + "digest": "...", + "embedding_profile": { + "kind": "openai-compatible", + "base_url": "https://openrouter.ai/api/v1", + "model": "openai/text-embedding-3-large", + "api_key": "${OPENROUTER_API_KEY}" + } + }, + "graph.knowledge": { + "digest": "...", + "embedding_provider": "provider.embedding.default" + }, "policy.base": { "digest": "...", "applies_to": ["cluster", "graph.knowledge"] diff --git a/docs/user/reference/constants.md b/docs/user/reference/constants.md index 2cad0d1..ec19f4d 100644 --- a/docs/user/reference/constants.md +++ b/docs/user/reference/constants.md @@ -19,11 +19,13 @@ | Expand mode override | `OMNIGRAPH_TRAVERSAL_MODE` (`indexed`\|`csr`; unset = cost-based auto) | traversal | | Default body limit | `1 MB` | HTTP server | | Ingest body limit | `32 MB` | HTTP server | -| Engine embed model | `gemini-embedding-2-preview` | engine embedding | -| Compiler embed model | `text-embedding-3-small` | compiler embedding | -| Embed timeout | `30 000 ms` | both clients | -| Embed retries | `4` | both clients | -| Embed retry backoff | `200 ms` | both clients | +| Default embed provider/model | `openai-compatible` / `openai/text-embedding-3-large` | engine embedding | +| OpenAI-direct embed model | `text-embedding-3-large` | engine embedding | +| Gemini-direct embed model | `gemini-embedding-2` | engine embedding | +| Embed deadline | `OMNIGRAPH_EMBED_DEADLINE_MS=60000` | engine embedding | +| Embed timeout | `OMNIGRAPH_EMBED_TIMEOUT_MS=30000` | engine embedding | +| Embed retries | `OMNIGRAPH_EMBED_RETRY_ATTEMPTS=4` | engine embedding | +| Embed retry backoff | `OMNIGRAPH_EMBED_RETRY_BACKOFF_MS=200` | engine embedding | | LANCE memory pool default | `1 GB` (raised in v0.3.0) | runtime | **Expand traversal dispatch.** With `OMNIGRAPH_TRAVERSAL_MODE` unset, the engine diff --git a/docs/user/schema/index.md b/docs/user/schema/index.md index d0fcd1b..105281c 100644 --- a/docs/user/schema/index.md +++ b/docs/user/schema/index.md @@ -45,7 +45,7 @@ Edge bodies only allow `@unique` and `@index`. - `@` or `@()` on any declaration or property. - Known annotations: - - `@embed` on a Vector property — names the *source* property whose text gets embedded into this vector at ingest. + - `@embed("source_property")` on a Vector property — records which String property is the embedding source for query-time `nearest($v, "string")` auto-embedding. It is a catalog annotation; it does **not** populate the vector at ingest (supply vectors in load data, or pre-fill via the offline `omnigraph embed` pipeline). An optional `model="…"` kwarg (`@embed("source_property", model="openai/text-embedding-3-large")`) records the embedding model so a `nearest()` query whose embedder uses a different model is rejected loudly; `model` is the only supported kwarg. See [search/embeddings.md](../search/embeddings.md). - `@description("…")`, `@instruction("…")` on query declarations (carried through to clients). - Custom annotations are accepted by the parser and surfaced in catalog metadata; unrecognized annotations don't fail compilation. diff --git a/docs/user/search/embeddings.md b/docs/user/search/embeddings.md index 31455c4..e69d928 100644 --- a/docs/user/search/embeddings.md +++ b/docs/user/search/embeddings.md @@ -1,31 +1,112 @@ # Embeddings -OmniGraph has **two** embedding clients with different defaults and purposes. +OmniGraph embeds text through a **single, provider-independent client** resolved from one +`EmbeddingConfig { provider, model, base_url, api_key }`. The same resolved config is used by the query-time +auto-embed of a string in `nearest($v, "string")` and by the offline `omnigraph embed` file pipeline, so +query vectors and document vectors share one model and one vector space. -## Compiler-side client — query-time normalization +## Providers -- Default model: `text-embedding-3-small` (OpenAI-style schema) -- Env: `NANOGRAPH_EMBED_MODEL`, `OPENAI_API_KEY`, `OPENAI_BASE_URL` (default `https://api.openai.com/v1`), `NANOGRAPH_EMBEDDINGS_MOCK`, `NANOGRAPH_EMBED_TIMEOUT_MS=30000`, `NANOGRAPH_EMBED_RETRY_ATTEMPTS=4`, `NANOGRAPH_EMBED_RETRY_BACKOFF_MS=200` -- Methods: `embed_text(input, expected_dim)`, `embed_texts(inputs, expected_dim)` -- Mock mode: deterministic FNV-1a + xorshift64 → L2-normalized vectors +| `provider` | Wire shape | Use it for | +|---|---|---| +| `openai-compatible` (default) | `POST {base}/embeddings`, bearer auth, `{model, input, dimensions}` | **OpenRouter** (the default gateway — one key for many models), OpenAI direct, or a self-hosted endpoint (vLLM / Ollama / LM Studio) | +| `gemini` | `POST {base}/models/{model}:embedContent`, `x-goog-api-key`, with `RETRIEVAL_QUERY` / `RETRIEVAL_DOCUMENT` task types | Reaching Google's `generativelanguage` API directly | +| `mock` | none — deterministic offline vectors | Tests and local dev without a key | -## Engine-side client — runtime ingest +Vectors are stored L2-normalized as `FixedSizeList(Float32, dim)`; the requested output dimension is driven by +the target column width and sent as Gemini `outputDimensionality` / OpenAI `dimensions`. -- Model: `gemini-embedding-2-preview` -- Env: `GEMINI_API_KEY`, `OMNIGRAPH_GEMINI_BASE_URL` (default Google generativelanguage v1beta), `OMNIGRAPH_EMBED_TIMEOUT_MS=30000`, `OMNIGRAPH_EMBED_RETRY_ATTEMPTS=4`, `OMNIGRAPH_EMBED_RETRY_BACKOFF_MS=200`, `OMNIGRAPH_EMBEDDINGS_MOCK` -- Two task types: `embed_query_text` (RETRIEVAL_QUERY) and `embed_document_text` (RETRIEVAL_DOCUMENT) -- Exponential backoff with retryable detection (timeouts, 429, 5xx) +## Configuration (cluster) -## Schema integration +Cluster-served graphs can pin their query-time embedder in `cluster.yaml`: -Mark a Vector property with `@embed("source_text_property")`. At ingest, the engine pulls the source text and writes the embedding into the vector column. Stored as L2-normalized FixedSizeList(Float32, dim). +```yaml +providers: + embedding: + default: + kind: openai-compatible + base_url: https://openrouter.ai/api/v1 + model: openai/text-embedding-3-large + api_key: ${OPENROUTER_API_KEY} + +graphs: + knowledge: + schema: knowledge.pg + embedding_provider: default +``` + +`embedding_provider` references `providers.embedding.`; bare names are +normalized to that typed ref. The server resolves `${ENV_VAR}` only when it +boots from the applied cluster ledger, so `cluster validate`, `plan`, and +`apply` do not need provider secrets. Inline API keys are rejected. `mock` +needs no key. Vector dimensions stay schema-driven by the target `Vector(N)` +column. + +Direct single-graph serving, embedded callers, and the offline +`omnigraph embed` pipeline use environment configuration unless they inject an +`EmbeddingConfig` directly. + +## Configuration (environment) + +| Variable | Meaning | +|---|---| +| `OMNIGRAPH_EMBED_PROVIDER` | `openai-compatible` (default, → OpenRouter) \| `openai` (→ OpenAI's own host) \| `gemini` \| `mock` | +| `OMNIGRAPH_EMBED_BASE_URL` | endpoint base; defaults `https://openrouter.ai/api/v1` (`openai-compatible`/unset), `https://api.openai.com/v1` (`openai`), `https://generativelanguage.googleapis.com/v1beta` (`gemini`) | +| `OMNIGRAPH_EMBED_MODEL` | model id; defaults `openai/text-embedding-3-large` (OpenRouter), `text-embedding-3-large` (`openai`), `gemini-embedding-2` (`gemini`) | +| `OPENROUTER_API_KEY` / `OPENAI_API_KEY` | api key for `openai-compatible` (OpenRouter preferred) | +| `GEMINI_API_KEY` | api key for `gemini` | +| `OMNIGRAPH_EMBED_DEADLINE_MS` | total wall-clock budget for one embed call across all retries (default `60000`; `0` = unbounded) | +| `OMNIGRAPH_EMBED_TIMEOUT_MS` | per-request HTTP timeout (default `30000`) | +| `OMNIGRAPH_EMBED_RETRY_ATTEMPTS` / `OMNIGRAPH_EMBED_RETRY_BACKOFF_MS` | retry policy (defaults `4` / `200`) | +| `OMNIGRAPH_EMBEDDINGS_MOCK` | set truthy to force the deterministic mock provider | + +The default zero-config path is OpenRouter: set `OPENROUTER_API_KEY` and run. Reaching Gemini takes +`OMNIGRAPH_EMBED_PROVIDER=gemini` plus `GEMINI_API_KEY`. + +### Behavior notes + +- **Bounded latency.** Each embed call is wrapped in `OMNIGRAPH_EMBED_DEADLINE_MS`, so a degraded + provider cannot hang a read for the full retry envelope. +- **Reuse.** The query path builds the client once per graph handle (on the first `nearest($v, "string")` + that needs embedding) and reuses it, keeping the provider connection pool warm. A graph that never embeds + needs no provider key. +- **Observability.** Embed calls emit `tracing` events under `target = "omnigraph::embedding"` (provider, + model, dim, attempt, elapsed, outcome). + +## `@embed` schema annotation + +Mark a Vector property with `@embed("source_text_property")`. This is a **catalog annotation** consumed by the +query typechecker and linter: it records which String property is the embedding source and lets +`nearest($v, "string")` auto-embed a query string for comparison against that vector column. + +Optionally record the model that produced the stored vectors: +`@embed("source_text_property", model="openai/text-embedding-3-large")`. When a model is recorded, a +`nearest($v, "string")` query is **rejected with a typed error** unless the resolved query embedder uses the +same model — so stored and query vectors are guaranteed same-space instead of silently ranking across spaces. +To fix a mismatch, set `OMNIGRAPH_EMBED_MODEL` (and the matching provider) to the recorded model, or re-embed. +The recorded model is the literal string, so `openai/text-embedding-3-large` (via OpenRouter) and +`text-embedding-3-large` (OpenAI direct) are distinct identities; use the matching string. Changing a recorded +model is a loud `schema apply` refusal (treat it as a re-embed migration). `@embed` without a model keeps +working with no validation. `model` is the only supported `@embed` argument; any other is a parse error. + +**It does not embed at ingest.** Stored vectors are supplied directly in your load data, or pre-filled by the +offline `omnigraph embed` pipeline below. (Ingest-time execution of `@embed` is a planned enhancement.) ## CLI `omnigraph embed` (offline file pipeline) -Operates on **JSONL files** (not on a graph). Three modes (mutually exclusive): +Operates on **JSONL files** (not on a graph), using the same resolved provider config. Three modes (mutually +exclusive): - (default) `fill_missing` — only embed rows whose target field is empty - `--reembed-all` — overwrite all - `--clean` — strip embeddings Inputs are either a single seed manifest YAML or `--input/--output/--spec`. Selectors `--type T`, `--select T:field=value` filter rows. Streams JSONL → JSONL. + +## Migration + +This release has no backwards-compatibility shim (pre-release). The default provider is now OpenRouter, and +the legacy `OMNIGRAPH_GEMINI_BASE_URL` is removed. A graph whose vectors were produced with +`gemini-embedding-2-preview` should either re-embed, or pin the query-time embedder to match by setting +`OMNIGRAPH_EMBED_PROVIDER=gemini` and `OMNIGRAPH_EMBED_MODEL=gemini-embedding-2-preview` (the stored and query +vectors must come from the same model to be comparable).