Merge pull request #248 from ModernRelay/ragnorc/shaping-config-integration

RFC-012: embedding provider independence + remove dead client
2026-06-18 02:24:27 +02:00 · 2026-06-16 04:38:50 +03:00 · 2026-06-16 04:38:50 +03:00 · 51852fdbed
commit 51852fdbed
parent b5658dc696 e33041c8b7
37 changed files with 2663 additions and 655 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -80,7 +80,7 @@ Full diagram and concurrency model: [docs/dev/architecture.md](docs/dev/architec
 | Mutations — insert/update/delete, D2, atomicity | [docs/user/mutations/index.md](docs/user/mutations/index.md) |
 | Search funcs (`nearest`/`bm25`/`rrf`), hybrid ranking | [docs/user/search/index.md](docs/user/search/index.md) |
 | Indexes (BTREE / inverted / vector / graph topology) | [docs/user/search/indexes.md](docs/user/search/indexes.md) |
-| Embeddings (compiler + engine clients, env vars, `@embed`) | [docs/user/search/embeddings.md](docs/user/search/embeddings.md) |
+| Embeddings (engine client, env vars, `@embed`) | [docs/user/search/embeddings.md](docs/user/search/embeddings.md) |
 | Concepts — what OmniGraph is, L1/L2 framing | [docs/user/concepts/index.md](docs/user/concepts/index.md) |
 | Quickstart — init → load → query → branch | [docs/user/quickstart.md](docs/user/quickstart.md) |
 | Branches, commit graph, system branches | [docs/user/branching/index.md](docs/user/branching/index.md) |
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4915,12 +4915,10 @@ dependencies = [
 "arrow-select",
 "pest",
 "pest_derive",
- "reqwest 0.12.28",
 "serde",
 "serde_json",
 "sha2 0.10.9",
 "thiserror",
- "tokio",
 ]

 [[package]]
--- a/crates/omnigraph-cli/src/embed.rs
+++ b/crates/omnigraph-cli/src/embed.rs
@ -9,8 +9,6 @@ use omnigraph::embedding::EmbeddingClient;
 use serde::{Deserialize, Serialize};
 use serde_json::{Map, Value, json};

-const DEFAULT_EMBED_MODEL: &str = "gemini-embedding-2-preview";
-
 #[derive(Debug, Args, Clone)]
 pub(crate) struct EmbedArgs {
    /// Seed manifest path
@ -85,8 +83,6 @@ impl EmbedMode {

 #[derive(Debug, Clone, Deserialize)]
 struct EmbedSpec {
-    #[serde(default = "default_embed_model")]
-    model: String,
    dimension: usize,
    types: BTreeMap<String, EmbedTypeSpec>,
 }
@ -180,13 +176,6 @@ pub(crate) fn resolve_embed_job(args: &EmbedArgs) -> Result<EmbedJob> {
        (input, output, spec)
    };

-    if spec.model != DEFAULT_EMBED_MODEL {
-        bail!(
-            "only {} is supported for explicit seed embeddings right now",
-            DEFAULT_EMBED_MODEL
-        );
-    }
-
    Ok(EmbedJob {
        input,
        output,
@ -305,7 +294,14 @@ pub(crate) async fn run_embed_job(job: &EmbedJob) -> Result<EmbedOutput> {
        cleaned_rows,
        mode: job.mode.as_str(!job.selectors.is_empty()),
        dimension: job.spec.dimension,
-        model: job.spec.model.clone(),
+        // The embedding model is resolved solely from the provider config; the
+        // spec carries no model field, so there is no second source of truth to
+        // silently disagree with the API. Report what was actually used (empty
+        // for `--clean`, which builds no client).
+        model: client
+            .as_ref()
+            .map(|c| c.config().model.clone())
+            .unwrap_or_default(),
    })
 }

@ -315,10 +311,6 @@ fn temp_output_path(output: &Path) -> PathBuf {
    PathBuf::from(temp)
 }

-fn default_embed_model() -> String {
-    DEFAULT_EMBED_MODEL.to_string()
-}
-
 fn load_embed_spec(path: &Path) -> Result<EmbedSpec> {
    Ok(serde_json::from_str(&fs::read_to_string(path)?)?)
 }
--- a/crates/omnigraph-cli/src/output.rs
+++ b/crates/omnigraph-cli/src/output.rs
@ -696,9 +696,24 @@ pub(crate) fn render_constraint(constraint: &omnigraph_compiler::schema::ast::Co
 pub(crate) fn render_annotations(annotations: &[omnigraph_compiler::schema::ast::Annotation]) -> String {
    annotations
        .iter()
-        .map(|annotation| match &annotation.value {
-            Some(value) => format!("@{}({})", annotation.name, value),
-            None => format!("@{}", annotation.name),
+        .map(|annotation| {
+            let mut args: Vec<String> = Vec::new();
+            // Values are parsed via `decode_string_literal` (quotes stripped), so
+            // re-quote them as string literals on render — otherwise a value with
+            // non-ident chars (e.g. `model=openai/text-embedding-3-large`) fails to
+            // round-trip back through the schema parser (`annotation_kwarg` wants a
+            // quoted `literal`, not a bare token).
+            if let Some(value) = &annotation.value {
+                args.push(format!("\"{}\"", value));
+            }
+            for (key, val) in &annotation.kwargs {
+                args.push(format!("{}=\"{}\"", key, val));
+            }
+            if args.is_empty() {
+                format!("@{}", annotation.name)
+            } else {
+                format!("@{}({})", annotation.name, args.join(", "))
+            }
        })
        .collect::<Vec<_>>()
        .join(", ")
@ -970,3 +985,43 @@ pub(crate) fn resolve_table_render_options() -> ReadRenderOptions {
        cell_layout: operator.defaults.table_cell_layout.unwrap_or_default(),
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use omnigraph_compiler::schema::ast::Annotation;
+    use omnigraph_compiler::schema::parser::parse_schema;
+    use std::collections::BTreeMap;
+
+    use super::render_annotations;
+
+    #[test]
+    fn render_annotations_quotes_values_so_embed_round_trips() {
+        let mut kwargs = BTreeMap::new();
+        kwargs.insert(
+            "model".to_string(),
+            "openai/text-embedding-3-large".to_string(),
+        );
+        let embed = Annotation {
+            name: "embed".to_string(),
+            value: Some("title".to_string()),
+            kwargs,
+        };
+
+        let rendered = render_annotations(std::slice::from_ref(&embed));
+        assert_eq!(
+            rendered,
+            r#"@embed("title", model="openai/text-embedding-3-large")"#
+        );
+
+        // The bug: an unquoted `model=openai/text-embedding-3-large` is not a
+        // valid `annotation_kwarg` literal, so `schema show` output did not
+        // re-parse. The rendered form must round-trip through the grammar.
+        let schema = format!("node Doc {{\ntitle: String\nembedding: Vector(3) {rendered}\n}}\n");
+        let parsed = parse_schema(&schema);
+        assert!(
+            parsed.is_ok(),
+            "rendered @embed must re-parse: {:?}",
+            parsed.err()
+        );
+    }
+}
--- a/crates/omnigraph-cli/tests/system_local.rs
+++ b/crates/omnigraph-cli/tests/system_local.rs
@ -1036,6 +1036,11 @@ query vector_search($q: String) {

    let result = parse_stdout_json(&output_success(
        cli()
+            // Stored vectors above were produced with gemini-embedding-2-preview;
+            // pin the query-time embedder to the same provider/model so the
+            // auto-embedded `$q` lands in the same vector space.
+            .env("OMNIGRAPH_EMBED_PROVIDER", "gemini")
+            .env("OMNIGRAPH_EMBED_MODEL", "gemini-embedding-2-preview")
            .arg("read")
            .arg("--store")
            .arg(&graph)
--- a/crates/omnigraph-cluster/src/config.rs
+++ b/crates/omnigraph-cluster/src/config.rs
@ -42,7 +42,12 @@ pub(crate) fn resolve_query_decls(
            return (
                map.iter()
                    .map(|(name, config)| {
-                        (name.clone(), QueryConfig { file: config.file.clone() })
+                        (
+                            name.clone(),
+                            QueryConfig {
+                                file: config.file.clone(),
+                            },
+                        )
                    })
                    .collect(),
                BTreeMap::new(),
@ -66,7 +71,10 @@ pub(crate) fn resolve_query_decls(
                    diagnostics.push(Diagnostic::error(
                        "query_dir_unreadable",
                        format!("graphs.{graph_id}.queries"),
-                        format!("could not list query directory '{}': {err}", resolved.display()),
+                        format!(
+                            "could not list query directory '{}': {err}",
+                            resolved.display()
+                        ),
                    ));
                    continue;
                }
@ -76,7 +84,10 @@ pub(crate) fn resolve_query_decls(
                diagnostics.push(Diagnostic::warning(
                    "query_dir_empty",
                    format!("graphs.{graph_id}.queries"),
-                    format!("query directory '{}' contains no .gq files", resolved.display()),
+                    format!(
+                        "query directory '{}' contains no .gq files",
+                        resolved.display()
+                    ),
                ));
            }
            for path in entries {
@ -132,7 +143,12 @@ pub(crate) fn resolve_query_decls(
                continue;
            }
            origin.insert(name.clone(), declared.clone());
-            registry.insert(name, QueryConfig { file: declared.clone() });
+            registry.insert(
+                name,
+                QueryConfig {
+                    file: declared.clone(),
+                },
+            );
        }
        contents.insert(declared, source);
    }
@ -269,8 +285,6 @@ pub(crate) fn validate_cluster_header(
    }
 }

-
-
 pub(crate) fn state_resource_digests(state: &ClusterState) -> BTreeMap<String, String> {
    state
        .applied_revision
@ -295,7 +309,6 @@ pub(crate) fn initial_import_state(desired: &DesiredCluster) -> ClusterState {
    }
 }

-
 pub(crate) async fn observe_declared_graphs(
    desired: &DesiredCluster,
    backend: &ClusterStore,
@ -350,19 +363,28 @@ pub(crate) async fn observe_declared_graphs(
                    StateResource {
                        digest: observation.schema_digest.clone(),
                        applies_to: None,
+                        embedding_provider: None,
+                        embedding_profile: None,
                    },
                );
                let query_digests = state_query_digests_for_graph(state, &graph.id);
+                let embedding_provider = state_graph_embedding_provider(state, &graph.id);
+                let embedding_provider_digest =
+                    state_embedding_provider_digest(state, embedding_provider.as_deref());
                let graph_digest_value = graph_digest(
                    &graph.id,
                    Some(&observation.schema_digest),
                    Some(&query_digests),
+                    embedding_provider.as_deref(),
+                    embedding_provider_digest.as_ref(),
                );
                state.applied_revision.resources.insert(
                    graph_address.clone(),
                    StateResource {
                        digest: graph_digest_value,
                        applies_to: None,
+                        embedding_provider,
+                        embedding_profile: None,
                    },
                );
                state.observations.insert(
@ -499,7 +521,6 @@ pub(crate) fn graph_observation_json(observation: GraphObservationJson<'_>) -> s
    })
 }

-
 pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
    let parsed = parse_cluster_config(config_dir);
    let config_dir = parsed.config_dir;
@ -519,6 +540,35 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
    let mut dependencies = BTreeSet::new();
    let mut graph_query_digests: BTreeMap<String, BTreeMap<String, String>> = BTreeMap::new();
    let mut graph_schema_digests: BTreeMap<String, String> = BTreeMap::new();
+    let mut graph_embedding_providers: BTreeMap<String, String> = BTreeMap::new();
+    let mut embedding_provider_digests: BTreeMap<String, String> = BTreeMap::new();
+    let mut embedding_providers: BTreeMap<String, EmbeddingProviderConfig> = BTreeMap::new();
+
+    for (provider_name, profile) in &raw.providers.embedding {
+        validate_id(
+            "embedding provider name",
+            &format!("providers.embedding.{provider_name}"),
+            provider_name,
+            &mut diagnostics,
+        );
+        let address = embedding_provider_address(provider_name);
+        profile.validate(
+            format!("providers.embedding.{provider_name}"),
+            &mut diagnostics,
+        );
+        let digest = embedding_provider_digest(profile);
+        embedding_provider_digests.insert(address.clone(), digest.clone());
+        embedding_providers.insert(address.clone(), profile.clone());
+        resources.insert(
+            address.clone(),
+            ResourceSummary {
+                address,
+                kind: "embedding_provider".to_string(),
+                digest,
+                path: None,
+            },
+        );
+    }

    for (graph_id, graph) in &raw.graphs {
        validate_id(
@ -533,6 +583,35 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
            from: schema_address.clone(),
            to: graph_address.clone(),
        });
+        if let Some(provider_ref) = graph.embedding_provider.as_deref() {
+            match normalize_embedding_provider_target(provider_ref) {
+                EmbeddingProviderTarget::Provider(provider_name) => {
+                    let provider_address = embedding_provider_address(&provider_name);
+                    if raw.providers.embedding.contains_key(&provider_name) {
+                        dependencies.insert(Dependency {
+                            from: graph_address.clone(),
+                            to: provider_address.clone(),
+                        });
+                        graph_embedding_providers.insert(graph_id.clone(), provider_address);
+                    } else {
+                        diagnostics.push(Diagnostic::error(
+                            "dangling_embedding_provider_reference",
+                            format!("graphs.{graph_id}.embedding_provider"),
+                            format!(
+                                "graph references embedding provider `{provider_name}`, but no providers.embedding.{provider_name} profile is declared"
+                            ),
+                        ));
+                    }
+                }
+                EmbeddingProviderTarget::WrongKind(kind) => diagnostics.push(Diagnostic::error(
+                    "wrong_kind_reference",
+                    format!("graphs.{graph_id}.embedding_provider"),
+                    format!(
+                        "embedding_provider expects a providers.embedding ref or bare provider name, got `{kind}`"
+                    ),
+                )),
+            }
+        }

        let schema_path = resolve_config_path(&config_dir, &graph.schema);
        let schema_source = match fs::read_to_string(&schema_path) {
@ -646,10 +725,15 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
    }

    for graph_id in raw.graphs.keys() {
+        let embedding_provider = graph_embedding_providers.get(graph_id);
+        let embedding_provider_digest =
+            embedding_provider.and_then(|address| embedding_provider_digests.get(address));
        let digest = graph_digest(
            graph_id,
            graph_schema_digests.get(graph_id),
            graph_query_digests.get(graph_id),
+            embedding_provider.map(String::as_str),
+            embedding_provider_digest,
        );
        resources.insert(
            graph_address(graph_id),
@ -754,6 +838,7 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
                .get(graph_id)
                .cloned()
                .unwrap_or_default(),
+            embedding_provider: graph_embedding_providers.get(graph_id).cloned(),
        })
        .collect();
    let config_digest = desired_config_digest(&raw, &resource_digests);
@ -769,6 +854,7 @@ pub(crate) fn load_desired(config_dir: &Path) -> LoadOutcome {
            resources: resource_list,
            dependencies,
            policy_bindings,
+            embedding_providers,
        }),
        diagnostics,
        config_dir,
@ -828,7 +914,6 @@ pub(crate) fn future_field_diagnostics(text: &str) -> Vec<Diagnostic> {
    let future_fields = [
        "apply",
        "env_file",
-        "providers",
        "pipelines",
        "embeddings",
        "ui",
@ -882,6 +967,21 @@ pub(crate) fn normalize_policy_target(value: &str) -> PolicyTarget {
    }
 }

+enum EmbeddingProviderTarget {
+    Provider(String),
+    WrongKind(String),
+}
+
+fn normalize_embedding_provider_target(value: &str) -> EmbeddingProviderTarget {
+    if let Some(name) = value.strip_prefix("provider.embedding.") {
+        EmbeddingProviderTarget::Provider(name.to_string())
+    } else if value.contains('.') {
+        EmbeddingProviderTarget::WrongKind(value.to_string())
+    } else {
+        EmbeddingProviderTarget::Provider(value.to_string())
+    }
+}
+
 pub(crate) fn graph_address(graph_id: &str) -> String {
    format!("graph.{graph_id}")
 }
@ -898,6 +998,10 @@ pub(crate) fn policy_address(policy_name: &str) -> String {
    format!("policy.{policy_name}")
 }

+pub(crate) fn embedding_provider_address(provider_name: &str) -> String {
+    format!("provider.embedding.{provider_name}")
+}
+
 pub(crate) fn resolve_config_path(config_dir: &Path, path: &Path) -> PathBuf {
    if path.is_absolute() {
        path.to_path_buf()
--- a/crates/omnigraph-cluster/src/diff.rs
+++ b/crates/omnigraph-cluster/src/diff.rs
@ -152,7 +152,9 @@ pub(crate) fn approved_resources(
        let candidates: Vec<&ApprovalArtifact> = artifacts
            .iter()
            .map(|(_, artifact)| artifact)
-            .filter(|artifact| artifact.consumed_at.is_none() && artifact.resource == change.resource)
+            .filter(|artifact| {
+                artifact.consumed_at.is_none() && artifact.resource == change.resource
+            })
            .collect();
        if candidates.is_empty() {
            continue;
@ -181,6 +183,7 @@ pub(crate) enum ResourceKind {
    Schema(String),
    Query { graph: String, name: String },
    Policy(String),
+    EmbeddingProvider(String),
    Unknown,
 }

@ -199,6 +202,8 @@ pub(crate) fn resource_kind(address: &str) -> ResourceKind {
        }
    } else if let Some(name) = address.strip_prefix("policy.") {
        ResourceKind::Policy(name.to_string())
+    } else if let Some(name) = address.strip_prefix("provider.embedding.") {
+        ResourceKind::EmbeddingProvider(name.to_string())
    } else {
        ResourceKind::Unknown
    }
@ -261,8 +266,7 @@ pub(crate) fn classify_changes(
        let (disposition, reason) = match resource_kind(&change.resource) {
            ResourceKind::Schema(graph) => match change.operation {
                PlanOperation::Create
-                    if graph_creates.contains(&graph)
-                        && !pending_recovery.contains(&graph) =>
+                    if graph_creates.contains(&graph) && !pending_recovery.contains(&graph) =>
                {
                    // Applied with the graph create — the init carries it.
                    (ApplyDisposition::Applied, None)
@ -325,10 +329,7 @@ pub(crate) fn classify_changes(
                    if pending_recovery.contains(&graph) {
                        (ApplyDisposition::Blocked, Some("cluster_recovery_pending"))
                    } else if schema_pending.contains(&graph) {
-                        (
-                            ApplyDisposition::Blocked,
-                            Some("dependency_not_applied"),
-                        )
+                        (ApplyDisposition::Blocked, Some("dependency_not_applied"))
                    } else {
                        // A graph create in the same plan no longer blocks:
                        // creates execute first in the same apply run.
@ -353,9 +354,8 @@ pub(crate) fn classify_changes(
                    }
                }
            },
-            ResourceKind::Unknown => {
-                (ApplyDisposition::Deferred, Some("apply_unsupported_kind"))
-            }
+            ResourceKind::EmbeddingProvider(_) => (ApplyDisposition::Applied, None),
+            ResourceKind::Unknown => (ApplyDisposition::Deferred, Some("apply_unsupported_kind")),
        };
        change.disposition = Some(disposition);
        change.reason = reason.map(str::to_string);
--- a/crates/omnigraph-cluster/src/lib.rs
+++ b/crates/omnigraph-cluster/src/lib.rs
@ -20,18 +20,35 @@ use ulid::Ulid;
 pub mod failpoints;

 mod config;
-mod types;
 mod diff;
 mod serve;
-mod sweep;
 mod store;
+mod sweep;
+mod types;
+use config::{
+    QueriesDecl, future_field_diagnostics, graph_address, initial_import_state, load_desired,
+    normalize_policy_target, observe_declared_graphs, observe_live_graph, parse_cluster_config,
+    policy_address, preview_schema_migration, query_address, resolve_config_path,
+    resolve_query_decls, schema_address, state_resource_digests, validate_cluster_header,
+    validate_id, validate_query_source,
+};
+use diff::{
+    FailedGraphOrigin, ResourceKind, append_policy_binding_changes, approved_resources,
+    classify_changes, compute_approvals, compute_blast_radius, demote_dependents_of_failed_graphs,
+    diff_resources, resource_kind,
+};
+pub use serve::{
+    ServingGraph, ServingPolicy, ServingQuery, ServingSnapshot, cluster_graph_ids,
+    cluster_root_for_graph_uri, read_serving_snapshot, read_serving_snapshot_from_storage,
+    resolve_graph_storage_uri,
+};
 use store::{ClusterStore, StateLockGuard, StateSnapshot};
+use sweep::{
+    mark_approvals_consumed, record_approval_consumed, sweep_recovery_sidecars,
+    tombstone_graph_subtree, warn_pending_recovery_sidecars,
+};
 pub use types::*;
 use types::*;
-pub use serve::{ServingGraph, ServingPolicy, ServingQuery, ServingSnapshot, cluster_graph_ids, cluster_root_for_graph_uri, read_serving_snapshot, read_serving_snapshot_from_storage, resolve_graph_storage_uri};
-use config::{QueriesDecl, observe_declared_graphs, validate_cluster_header, future_field_diagnostics, initial_import_state, observe_live_graph, preview_schema_migration, state_resource_digests, graph_address, policy_address, query_address, schema_address, load_desired, normalize_policy_target, parse_cluster_config, resolve_config_path, resolve_query_decls, validate_id, validate_query_source};
-use diff::{FailedGraphOrigin, ResourceKind, append_policy_binding_changes, approved_resources, classify_changes, compute_approvals, compute_blast_radius, demote_dependents_of_failed_graphs, diff_resources, resource_kind};
-use sweep::{mark_approvals_consumed, record_approval_consumed, sweep_recovery_sidecars, tombstone_graph_subtree, warn_pending_recovery_sidecars};

 pub const CLUSTER_CONFIG_FILE: &str = "cluster.yaml";
 pub const CLUSTER_GRAPHS_DIR: &str = "graphs";
@ -44,10 +61,7 @@ pub const CLUSTER_APPROVALS_DIR: &str = "__cluster/approvals";

 /// The store for a load outcome: the declared `storage:` root when present,
 /// the config directory itself otherwise. A bad root is a loud error.
-fn store_for(
-    config_dir: &Path,
-    storage_root: Option<&str>,
-) -> Result<ClusterStore, Diagnostic> {
+fn store_for(config_dir: &Path, storage_root: Option<&str>) -> Result<ClusterStore, Diagnostic> {
    match storage_root {
        Some(root) => ClusterStore::for_storage_root(root),
        None => Ok(ClusterStore::for_config_dir(config_dir)),
@ -179,7 +193,12 @@ pub async fn plan_config_dir(config_dir: impl AsRef<Path>) -> PlanOutput {
        &desired.config_digest,
        &mut diagnostics,
    );
-    classify_changes(&mut changes, &desired.dependencies, &BTreeSet::new(), &approved);
+    classify_changes(
+        &mut changes,
+        &desired.dependencies,
+        &BTreeSet::new(),
+        &approved,
+    );

    // Embed real migration steps for schema updates so plan is a data-aware
    // preview; failures degrade to the digest diff with a warning.
@ -282,9 +301,7 @@ pub async fn apply_config_dir_with_options(
            ok: !has_errors(&diagnostics),
            config_dir,
            actor: actor_for_output.clone(),
-            desired_revision: DesiredRevision {
-                config_digest,
-            },
+            desired_revision: DesiredRevision { config_digest },
            state_observations: observations,
            changes,
            applied_count: 0,
@ -464,8 +481,7 @@ pub async fn apply_config_dir_with_options(
            failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::GraphCreate);
            continue;
        }
-        let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id)
-        else {
+        let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) else {
            continue;
        };
        let graph_uri = backend.graph_root(graph_id);
@ -604,8 +620,7 @@ pub async fn apply_config_dir_with_options(
            failed_graphs.insert(graph_id.clone(), FailedGraphOrigin::SchemaApply);
            continue;
        }
-        let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id)
-        else {
+        let Some(desired_graph) = desired.graphs.iter().find(|graph| &graph.id == graph_id) else {
            continue;
        };
        let graph_uri = backend.graph_root(graph_id);
@ -955,8 +970,10 @@ pub async fn apply_config_dir_with_options(
                                .expect("create/update always carries an after digest"),
                            // Policies record their applied bindings so the
                            // ledger is serving-sufficient (RFC-005 §D3).
-                            applies_to: desired
-                                .policy_bindings
+                            applies_to: desired.policy_bindings.get(&change.resource).cloned(),
+                            embedding_provider: None,
+                            embedding_profile: desired
+                                .embedding_providers
                                .get(&change.resource)
                                .cloned(),
                        },
@ -964,7 +981,10 @@ pub async fn apply_config_dir_with_options(
                    set_resource_status_applied(&mut new_state, &change.resource);
                }
                PlanOperation::Delete => {
-                    new_state.applied_revision.resources.remove(&change.resource);
+                    new_state
+                        .applied_revision
+                        .resources
+                        .remove(&change.resource);
                    new_state.resource_statuses.remove(&change.resource);
                }
            },
@ -1219,7 +1239,6 @@ pub async fn approve_config_dir(
    }
 }

-
 pub async fn status_config_dir(config_dir: impl AsRef<Path>) -> StatusOutput {
    let parsed = parse_cluster_config(config_dir.as_ref());
    let mut diagnostics = parsed.diagnostics;
@ -1238,7 +1257,9 @@ pub async fn status_config_dir(config_dir: impl AsRef<Path>) -> StatusOutput {
        }
    };
    let mut observations = backend.observations();
-    backend.observe_lock(&mut observations, &mut diagnostics).await;
+    backend
+        .observe_lock(&mut observations, &mut diagnostics)
+        .await;
    warn_pending_recovery_sidecars(&parsed.config_dir, &mut diagnostics);

    let mut resource_digests = BTreeMap::new();
@ -1254,9 +1275,7 @@ pub async fn status_config_dir(config_dir: impl AsRef<Path>) -> StatusOutput {
                        // Read-only point-in-time catalog check: report the
                        // findings as diagnostics; persisting Drifted statuses
                        // is refresh's job. Status never writes state.
-                        for (address, finding) in
-                            verify_catalog_payloads(&backend, &state).await
-                        {
+                        for (address, finding) in verify_catalog_payloads(&backend, &state).await {
                            diagnostics.push(payload_finding_diagnostic(&address, &finding));
                        }
                        resource_digests = state_resource_digests(&state);
@ -1312,7 +1331,10 @@ pub async fn force_unlock_config_dir(
    if let Some(raw) = parsed.raw.as_ref() {
        let _settings = validate_cluster_header(raw, &mut diagnostics);
        if !has_errors(&diagnostics) {
-            match backend.force_unlock(lock_id.as_ref(), &mut observations).await {
+            match backend
+                .force_unlock(lock_id.as_ref(), &mut observations)
+                .await
+            {
                Ok(()) => lock_removed = true,
                Err(diagnostic) => diagnostics.push(diagnostic),
            }
@ -1380,7 +1402,10 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St

    let operation_label = state_sync_operation_label(operation);
    let _lock_guard = if desired.state_lock {
-        match backend.acquire_lock(operation_label, &mut observations).await {
+        match backend
+            .acquire_lock(operation_label, &mut observations)
+            .await
+        {
            Ok(guard) => Some(guard),
            Err(diagnostic) => {
                diagnostics.push(diagnostic);
@ -1542,7 +1567,10 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St
        state.state_revision = state.state_revision.saturating_add(1);
    }

-    match backend.write_state(&state, expected_cas.as_deref(), &mut observations).await {
+    match backend
+        .write_state(&state, expected_cas.as_deref(), &mut observations)
+        .await
+    {
        Ok(()) => {
            // Completed sweep sidecars are deleted only after their outcome
            // is durably recorded; on failure they stay and re-sweep.
@ -1569,9 +1597,6 @@ async fn sync_config_dir(config_dir: &Path, operation: StateSyncOperation) -> St
    }
 }

-
-
-
 #[derive(Debug, PartialEq, Eq)]
 enum PayloadFinding {
    Missing,
@ -1650,7 +1675,10 @@ async fn write_resource_payload(
        Diagnostic::error(
            "resource_payload_write_error",
            resource,
-            format!("could not read resource source '{}': {err}", source.display()),
+            format!(
+                "could not read resource source '{}': {err}",
+                source.display()
+            ),
        )
    })?;
    if sha256_hex(&bytes) != expected_digest {
@ -1692,7 +1720,11 @@ async fn write_resource_payload(
 fn recompute_state_graph_digests(state: &mut ClusterState, desired: &DesiredCluster) {
    for graph in &desired.graphs {
        let graph_address = graph_address(&graph.id);
-        if !state.applied_revision.resources.contains_key(&graph_address) {
+        if !state
+            .applied_revision
+            .resources
+            .contains_key(&graph_address)
+        {
            continue;
        }
        let schema_digest = state
@ -1701,11 +1733,26 @@ fn recompute_state_graph_digests(state: &mut ClusterState, desired: &DesiredClus
            .get(&schema_address(&graph.id))
            .map(|resource| resource.digest.clone());
        let query_digests = state_query_digests_for_graph(state, &graph.id);
-        let digest = graph_digest(&graph.id, schema_digest.as_ref(), Some(&query_digests));
-        state
-            .applied_revision
-            .resources
-            .insert(graph_address, StateResource { digest, applies_to: None });
+        let embedding_provider = graph.embedding_provider.as_deref();
+        let embedding_provider_digest = embedding_provider
+            .and_then(|address| state.applied_revision.resources.get(address))
+            .map(|resource| resource.digest.clone());
+        let digest = graph_digest(
+            &graph.id,
+            schema_digest.as_ref(),
+            Some(&query_digests),
+            embedding_provider,
+            embedding_provider_digest.as_ref(),
+        );
+        state.applied_revision.resources.insert(
+            graph_address,
+            StateResource {
+                digest,
+                applies_to: None,
+                embedding_provider: graph.embedding_provider.clone(),
+                embedding_profile: None,
+            },
+        );
    }
 }

@ -1773,7 +1820,6 @@ fn duplicate_key_diagnostics(text: &str) -> Vec<Diagnostic> {
    diagnostics
 }

-
 fn strip_comment(line: &str) -> String {
    let mut in_single_quote = false;
    let mut in_double_quote = false;
@ -1796,7 +1842,6 @@ fn strip_comment(line: &str) -> String {
    line.to_string()
 }

-
 fn state_query_digests_for_graph(state: &ClusterState, graph_id: &str) -> BTreeMap<String, String> {
    let prefix = format!("query.{graph_id}.");
    state
@ -1811,6 +1856,23 @@ fn state_query_digests_for_graph(state: &ClusterState, graph_id: &str) -> BTreeM
        .collect()
 }

+fn state_graph_embedding_provider(state: &ClusterState, graph_id: &str) -> Option<String> {
+    state
+        .applied_revision
+        .resources
+        .get(&graph_address(graph_id))
+        .and_then(|resource| resource.embedding_provider.clone())
+}
+
+fn state_embedding_provider_digest(
+    state: &ClusterState,
+    embedding_provider: Option<&str>,
+) -> Option<String> {
+    embedding_provider
+        .and_then(|address| state.applied_revision.resources.get(address))
+        .map(|resource| resource.digest.clone())
+}
+
 fn set_resource_status_applied(state: &mut ClusterState, address: &str) {
    state.resource_statuses.insert(
        address.to_string(),
@ -1843,6 +1905,8 @@ fn graph_digest(
    graph_id: &str,
    schema_digest: Option<&String>,
    query_digests: Option<&BTreeMap<String, String>>,
+    embedding_provider: Option<&str>,
+    embedding_provider_digest: Option<&String>,
 ) -> String {
    let mut input = format!(
        "graph\0{graph_id}\0schema\0{}\0",
@ -1857,6 +1921,21 @@ fn graph_digest(
            input.push('\0');
        }
    }
+    if let Some(provider) = embedding_provider {
+        input.push_str("embedding_provider\0");
+        input.push_str(provider);
+        input.push('\0');
+        input.push_str(embedding_provider_digest.map_or("", String::as_str));
+        input.push('\0');
+    }
+    sha256_hex(input.as_bytes())
+}
+
+fn embedding_provider_digest(profile: &EmbeddingProviderConfig) -> String {
+    let mut input = String::from("embedding-provider\0");
+    let config_semantics =
+        serde_json::to_string(profile).expect("embedding provider config must serialize");
+    input.push_str(&config_semantics);
    sha256_hex(input.as_bytes())
 }

@ -1930,7 +2009,6 @@ fn display_path(path: &Path) -> String {
    path.display().to_string()
 }

-
 #[cfg(test)]
 #[path = "tests.rs"]
 mod tests;
--- a/crates/omnigraph-cluster/src/serve.rs
+++ b/crates/omnigraph-cluster/src/serve.rs
@ -8,6 +8,7 @@ use super::*;
 pub struct ServingGraph {
    pub graph_id: String,
    pub root: PathBuf,
+    pub embedding: Option<EmbeddingProviderConfig>,
 }

 /// One stored query: its graph binding, registry name, and verified source.
@ -225,15 +226,73 @@ async fn read_snapshot_with_store(
        return Err(diagnostics);
    };

+    let mut embedding_profiles: BTreeMap<String, EmbeddingProviderConfig> = BTreeMap::new();
+    for (address, entry) in &state.applied_revision.resources {
+        if !matches!(resource_kind(address), ResourceKind::EmbeddingProvider(_)) {
+            continue;
+        }
+        let Some(profile) = entry.embedding_profile.clone() else {
+            diagnostics.push(Diagnostic::error(
+                "embedding_provider_profile_missing",
+                address.clone(),
+                "no applied embedding provider profile recorded; re-run `cluster apply` to backfill",
+            ));
+            continue;
+        };
+        let actual_digest = embedding_provider_digest(&profile);
+        if actual_digest != entry.digest {
+            diagnostics.push(Diagnostic::error(
+                "embedding_provider_digest_mismatch",
+                address.clone(),
+                format!(
+                    "applied embedding provider profile does not match its recorded digest (actual sha256:{actual_digest}); run `cluster refresh` then `cluster apply`, and restart"
+                ),
+            ));
+            continue;
+        }
+        embedding_profiles.insert(address.clone(), profile);
+    }
+
    let mut graphs = Vec::new();
    let mut queries = Vec::new();
    let mut policies = Vec::new();
    for (address, entry) in &state.applied_revision.resources {
        match resource_kind(address) {
            ResourceKind::Graph(graph_id) => {
+                let embedding = match entry.embedding_provider.as_deref() {
+                    Some(provider_address) => match resource_kind(provider_address) {
+                        ResourceKind::EmbeddingProvider(_) => {
+                            match embedding_profiles.get(provider_address) {
+                                Some(profile) => Some(profile.clone()),
+                                None => {
+                                    diagnostics.push(Diagnostic::error(
+                                        "embedding_provider_missing",
+                                        address.clone(),
+                                        format!(
+                                            "graph references `{provider_address}`, but no applied embedding provider profile is available; re-run `cluster apply`"
+                                        ),
+                                    ));
+                                    None
+                                }
+                            }
+                        }
+                        _ => {
+                            diagnostics.push(Diagnostic::error(
+                                "wrong_kind_reference",
+                                address.clone(),
+                                format!(
+                                    "graph embedding_provider expects `provider.embedding.<name>`, got `{provider_address}`"
+                                ),
+                            ));
+                            None
+                        }
+                    },
+                    None => None,
+                };
                graphs.push(ServingGraph {
                    root: PathBuf::from(backend.graph_root(&graph_id)),
                    graph_id,
+                    embedding,
                });
            }
            ResourceKind::Schema(_) => {}
@ -241,7 +300,10 @@ async fn read_snapshot_with_store(
                let ResourceKind::Query { graph, name } = &kind else {
                    unreachable!()
                };
-                match backend.read_verified_payload(&kind, &entry.digest, address).await {
+                match backend
+                    .read_verified_payload(&kind, &entry.digest, address)
+                    .await
+                {
                    Ok(source) => queries.push(ServingQuery {
                        graph_id: graph.clone(),
                        name: name.clone(),
@ -262,7 +324,10 @@ async fn read_snapshot_with_store(
                    ));
                    continue;
                };
-                match backend.read_verified_payload(&kind, &entry.digest, address).await {
+                match backend
+                    .read_verified_payload(&kind, &entry.digest, address)
+                    .await
+                {
                    Ok(source) => policies.push(ServingPolicy {
                        name: name.clone(),
                        source,
@ -271,6 +336,7 @@ async fn read_snapshot_with_store(
                    Err(diagnostic) => diagnostics.push(diagnostic),
                }
            }
+            ResourceKind::EmbeddingProvider(_) => {}
            ResourceKind::Unknown => {}
        }
    }
@ -338,4 +404,3 @@ mod tests {
        );
    }
 }
-
--- a/crates/omnigraph-cluster/src/sweep.rs
+++ b/crates/omnigraph-cluster/src/sweep.rs
@ -19,13 +19,29 @@ pub(crate) async fn sweep_recovery_sidecars(
    for (path, sidecar) in backend.list_recovery_sidecars(diagnostics).await {
        match sidecar.kind {
            RecoverySidecarKind::GraphCreate => {
-                sweep_graph_create_sidecar(backend, path, sidecar, state, diagnostics, &mut outcome).await;
+                sweep_graph_create_sidecar(
+                    backend,
+                    path,
+                    sidecar,
+                    state,
+                    diagnostics,
+                    &mut outcome,
+                )
+                .await;
            }
            RecoverySidecarKind::SchemaApply => {
                sweep_schema_apply_sidecar(path, sidecar, state, diagnostics, &mut outcome).await;
            }
            RecoverySidecarKind::GraphDelete => {
-                sweep_graph_delete_sidecar(backend, path, sidecar, state, diagnostics, &mut outcome).await;
+                sweep_graph_delete_sidecar(
+                    backend,
+                    path,
+                    sidecar,
+                    state,
+                    diagnostics,
+                    &mut outcome,
+                )
+                .await;
            }
        }
    }
@ -71,15 +87,30 @@ pub(crate) async fn sweep_graph_create_sidecar(
                    StateResource {
                        digest: live_digest.clone(),
                        applies_to: None,
+                        embedding_provider: None,
+                        embedding_profile: None,
                    },
                );
                let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id);
-                let composite =
-                    graph_digest(&sidecar.graph_id, Some(&live_digest), Some(&query_digests));
-                state
-                    .applied_revision
-                    .resources
-                    .insert(graph_address.clone(), StateResource { digest: composite, applies_to: None });
+                let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id);
+                let embedding_provider_digest =
+                    state_embedding_provider_digest(state, embedding_provider.as_deref());
+                let composite = graph_digest(
+                    &sidecar.graph_id,
+                    Some(&live_digest),
+                    Some(&query_digests),
+                    embedding_provider.as_deref(),
+                    embedding_provider_digest.as_ref(),
+                );
+                state.applied_revision.resources.insert(
+                    graph_address.clone(),
+                    StateResource {
+                        digest: composite,
+                        applies_to: None,
+                        embedding_provider,
+                        embedding_profile: None,
+                    },
+                );
                set_resource_status_applied(state, &graph_address);
                set_resource_status_applied(state, &schema_addr);
                state.recovery_records.insert(
@ -200,14 +231,30 @@ pub(crate) async fn sweep_schema_apply_sidecar(
            StateResource {
                digest: live_digest.clone(),
                applies_to: None,
+                embedding_provider: None,
+                embedding_profile: None,
            },
        );
        let query_digests = state_query_digests_for_graph(state, &sidecar.graph_id);
-        let composite = graph_digest(&sidecar.graph_id, Some(&live_digest), Some(&query_digests));
-        state
-            .applied_revision
-            .resources
-            .insert(graph_address.clone(), StateResource { digest: composite, applies_to: None });
+        let embedding_provider = state_graph_embedding_provider(state, &sidecar.graph_id);
+        let embedding_provider_digest =
+            state_embedding_provider_digest(state, embedding_provider.as_deref());
+        let composite = graph_digest(
+            &sidecar.graph_id,
+            Some(&live_digest),
+            Some(&query_digests),
+            embedding_provider.as_deref(),
+            embedding_provider_digest.as_ref(),
+        );
+        state.applied_revision.resources.insert(
+            graph_address.clone(),
+            StateResource {
+                digest: composite,
+                applies_to: None,
+                embedding_provider,
+                embedding_profile: None,
+            },
+        );
        set_resource_status_applied(state, &graph_address);
        set_resource_status_applied(state, &schema_addr);
        state.recovery_records.insert(
@ -274,7 +321,11 @@ pub(crate) async fn sweep_graph_delete_sidecar(
        return;
    }

-    if !state.applied_revision.resources.contains_key(&graph_address) {
+    if !state
+        .applied_revision
+        .resources
+        .contains_key(&graph_address)
+    {
        // Row 7: already tombstoned (or never recorded); crash fell between
        // the state CAS and sidecar delete.
        outcome.completed_sidecars.push(path);
@ -283,7 +334,12 @@ pub(crate) async fn sweep_graph_delete_sidecar(

    // Row 7b: the root is gone, the ledger is stale — roll forward the
    // tombstone, consume the approval the sidecar carries, audit.
-    tombstone_graph_subtree(state, &sidecar.graph_id, sidecar.approval_id.as_deref(), sidecar.actor.as_deref());
+    tombstone_graph_subtree(
+        state,
+        &sidecar.graph_id,
+        sidecar.approval_id.as_deref(),
+        sidecar.actor.as_deref(),
+    );
    state.recovery_records.insert(
        sidecar.operation_id.clone(),
        json!({
@ -342,7 +398,11 @@ pub(crate) fn tombstone_graph_subtree(
 /// Record approval consumption in the state ledger. The artifact FILE is
 /// rewritten with consumed_at only after the state write lands, so a failed
 /// CAS leaves the approval valid for the retry.
-pub(crate) fn record_approval_consumed(state: &mut ClusterState, approval_id: &str, operation_id: &str) {
+pub(crate) fn record_approval_consumed(
+    state: &mut ClusterState,
+    approval_id: &str,
+    operation_id: &str,
+) {
    state.approval_records.insert(
        approval_id.to_string(),
        json!({
--- a/crates/omnigraph-cluster/src/tests.rs
+++ b/crates/omnigraph-cluster/src/tests.rs
@ -56,6 +56,39 @@ policies:
        dir
    }

+    fn write_mock_embedding_cluster(config_dir: &Path, model: &str) {
+        fs::write(
+            config_dir.join(CLUSTER_CONFIG_FILE),
+            format!(
+                r#"
+version: 1
+metadata:
+  name: test
+state:
+  backend: cluster
+  lock: true
+providers:
+  embedding:
+    default:
+      kind: mock
+      model: {model}
+graphs:
+  knowledge:
+    schema: ./people.pg
+    embedding_provider: default
+    queries:
+      find_person:
+        file: ./people.gq
+policies:
+  base:
+    file: ./base.policy.yaml
+    applies_to: [knowledge]
+"#
+            ),
+        )
+        .unwrap();
+    }
+
    async fn init_derived_graph(root: &Path) {
        let graph_dir = root.join(CLUSTER_GRAPHS_DIR);
        fs::create_dir_all(&graph_dir).unwrap();
@ -194,6 +227,95 @@ policies:
        assert!(codes.contains("dangling_graph_reference"));
    }

+    #[test]
+    fn embedding_provider_config_accepts_provider_resources_and_graph_refs() {
+        let dir = fixture();
+        write_mock_embedding_cluster(dir.path(), "recorded-x");
+
+        let out = validate_config_dir(dir.path());
+        assert!(out.ok, "{:?}", out.diagnostics);
+        let provider_digest = out
+            .resource_digests
+            .get("provider.embedding.default")
+            .expect("provider resource digest");
+        assert!(
+            out.resources
+                .iter()
+                .any(|resource| resource.address == "provider.embedding.default"
+                    && resource.kind == "embedding_provider"
+                    && resource.path.is_none())
+        );
+        assert!(
+            out.dependencies
+                .iter()
+                .any(|dep| dep.from == "graph.knowledge" && dep.to == "provider.embedding.default"),
+            "{:?}",
+            out.dependencies
+        );
+        let schema_digest = out.resource_digests.get("schema.knowledge").unwrap();
+        let query_digest = out
+            .resource_digests
+            .get("query.knowledge.find_person")
+            .unwrap();
+        let expected_graph_digest = graph_digest(
+            "knowledge",
+            Some(schema_digest),
+            Some(
+                &[("find_person".to_string(), query_digest.clone())]
+                    .into_iter()
+                    .collect(),
+            ),
+            Some("provider.embedding.default"),
+            Some(provider_digest),
+        );
+        assert_eq!(
+            out.resource_digests["graph.knowledge"],
+            expected_graph_digest
+        );
+    }
+
+    #[test]
+    fn embedding_provider_config_rejects_bad_refs_and_inline_secrets() {
+        let dir = fixture();
+        fs::write(
+            dir.path().join(CLUSTER_CONFIG_FILE),
+            r#"
+version: 1
+providers:
+  embedding:
+    default:
+      kind: openai-compatible
+      api_key: sk-inline
+graphs:
+  knowledge:
+    schema: ./people.pg
+    embedding_provider: provider.policy.default
+  missing_provider:
+    schema: ./people.pg
+    embedding_provider: absent
+"#,
+        )
+        .unwrap();
+        let out = validate_config_dir(dir.path());
+        assert!(!out.ok);
+        let codes: BTreeSet<_> = out.diagnostics.iter().map(|d| d.code.as_str()).collect();
+        assert!(
+            codes.contains("embedding_api_key_inline"),
+            "{:?}",
+            out.diagnostics
+        );
+        assert!(
+            codes.contains("wrong_kind_reference"),
+            "{:?}",
+            out.diagnostics
+        );
+        assert!(
+            codes.contains("dangling_embedding_provider_reference"),
+            "{:?}",
+            out.diagnostics
+        );
+    }
+
    #[test]
    fn query_key_mismatch_fails() {
        let dir = fixture();
@ -1012,8 +1134,13 @@ graphs:
        let out = validate_config_dir(config_dir);
        assert!(out.ok, "{:?}", out.diagnostics);
        let schema_digest = out.resource_digests.get("schema.knowledge").unwrap().clone();
-        let graph_composite =
-            graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
+        let graph_composite = graph_digest(
+            "knowledge",
+            Some(&schema_digest),
+            Some(&BTreeMap::new()),
+            None,
+            None,
+        );
        write_state_resources(
            config_dir,
            &[
@ -1122,6 +1249,8 @@ graphs:
                    .into_iter()
                    .collect(),
            ),
+            None,
+            None,
        );
        assert_eq!(resources["graph.knowledge"]["digest"], expected_composite);
        assert_eq!(
@ -1136,6 +1265,117 @@ graphs:
        assert!(!dir.path().join(CLUSTER_LOCK_FILE).exists());
    }

+    #[tokio::test]
+    async fn apply_records_embedding_provider_profile_and_graph_binding() {
+        let dir = fixture();
+        write_mock_embedding_cluster(dir.path(), "recorded-x");
+        write_applyable_state(dir.path());
+        let desired = validate_config_dir(dir.path());
+        let query_digest = desired
+            .resource_digests
+            .get("query.knowledge.find_person")
+            .unwrap()
+            .clone();
+        let schema_digest = desired
+            .resource_digests
+            .get("schema.knowledge")
+            .unwrap()
+            .clone();
+        let provider_digest = desired
+            .resource_digests
+            .get("provider.embedding.default")
+            .unwrap()
+            .clone();
+
+        let out = apply_config_dir(dir.path()).await;
+        assert!(out.ok, "{:?}", out.diagnostics);
+        assert!(out.converged, "{out:?}");
+
+        let state = read_state_json(dir.path());
+        let resources = &state["applied_revision"]["resources"];
+        let provider = resources["provider.embedding.default"]
+            .as_object()
+            .expect("provider resource");
+        assert_eq!(provider["digest"], provider_digest);
+        assert_eq!(provider["embedding_profile"]["kind"], "mock");
+        assert_eq!(provider["embedding_profile"]["model"], "recorded-x");
+        assert!(provider["embedding_profile"].get("api_key").is_none());
+        assert_eq!(
+            resources["graph.knowledge"]["embedding_provider"],
+            "provider.embedding.default"
+        );
+        let expected_graph_digest = graph_digest(
+            "knowledge",
+            Some(&schema_digest),
+            Some(
+                &[("find_person".to_string(), query_digest)]
+                    .into_iter()
+                    .collect(),
+            ),
+            Some("provider.embedding.default"),
+            Some(&provider_digest),
+        );
+        assert_eq!(resources["graph.knowledge"]["digest"], expected_graph_digest);
+    }
+
+    #[tokio::test]
+    async fn embedding_provider_changes_update_provider_and_graph_plan() {
+        let dir = fixture();
+        write_mock_embedding_cluster(dir.path(), "recorded-x");
+        write_applyable_state(dir.path());
+        let first = apply_config_dir(dir.path()).await;
+        assert!(first.ok && first.converged, "{first:?}");
+
+        write_mock_embedding_cluster(dir.path(), "recorded-y");
+        let plan = plan_config_dir(dir.path()).await;
+        assert!(plan.ok, "{:?}", plan.diagnostics);
+        let by_resource: BTreeMap<&str, &PlanChange> = plan
+            .changes
+            .iter()
+            .map(|change| (change.resource.as_str(), change))
+            .collect();
+        assert_eq!(
+            by_resource["provider.embedding.default"].operation,
+            PlanOperation::Update
+        );
+        assert_eq!(
+            by_resource["provider.embedding.default"].disposition,
+            Some(ApplyDisposition::Applied)
+        );
+        assert_eq!(
+            by_resource["graph.knowledge"].operation,
+            PlanOperation::Update
+        );
+        assert_eq!(
+            by_resource["graph.knowledge"].disposition,
+            Some(ApplyDisposition::Derived)
+        );
+    }
+
+    #[tokio::test]
+    async fn embedding_binding_survives_refresh() {
+        let dir = fixture();
+        init_derived_graph(dir.path()).await;
+        write_mock_embedding_cluster(dir.path(), "recorded-x");
+        write_applyable_state(dir.path());
+        let apply = apply_config_dir(dir.path()).await;
+        assert!(apply.ok && apply.converged, "{apply:?}");
+
+        let refresh = refresh_config_dir(dir.path()).await;
+        assert!(refresh.ok, "{:?}", refresh.diagnostics);
+
+        let state = read_state_json(dir.path());
+        let resources = &state["applied_revision"]["resources"];
+        assert_eq!(
+            resources["graph.knowledge"]["embedding_provider"],
+            "provider.embedding.default"
+        );
+        assert_eq!(
+            resources["provider.embedding.default"]["embedding_profile"]["model"],
+            "recorded-x"
+        );
+    }
+
    fn desired_revision_digest(out: &ApplyOutput) -> String {
        out.desired_revision.config_digest.clone().unwrap()
    }
@ -1150,8 +1390,13 @@ graphs:
            .unwrap()
            .clone();
        let old_digest = "0".repeat(64);
-        let graph_composite =
-            graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
+        let graph_composite = graph_digest(
+            "knowledge",
+            Some(&schema_digest),
+            Some(&BTreeMap::new()),
+            None,
+            None,
+        );
        write_state_resources(
            dir.path(),
            &[
@ -1190,8 +1435,13 @@ graphs:
            .clone();
        let stale_query_digest = "1".repeat(64);
        let stale_policy_digest = "2".repeat(64);
-        let graph_composite =
-            graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
+        let graph_composite = graph_digest(
+            "knowledge",
+            Some(&schema_digest),
+            Some(&BTreeMap::new()),
+            None,
+            None,
+        );
        write_state_resources(
            dir.path(),
            &[
@ -1234,6 +1484,8 @@ graphs:
            "knowledge",
            Some(&schema_digest),
            Some(&[("find_person".to_string(), query_digest)].into_iter().collect()),
+            None,
+            None,
        );
        assert_eq!(resources["graph.knowledge"]["digest"], expected_composite);
    }
@ -1494,8 +1746,13 @@ graphs:
            .get("schema.knowledge")
            .unwrap()
            .clone();
-        let graph_composite =
-            graph_digest("knowledge", Some(&schema_digest), Some(&BTreeMap::new()));
+        let graph_composite = graph_digest(
+            "knowledge",
+            Some(&schema_digest),
+            Some(&BTreeMap::new()),
+            None,
+            None,
+        );
        write_state_resources(
            dir.path(),
            &[
@ -2864,6 +3121,54 @@ policies:
        assert!(snapshot.policies[0].source.contains("rules:"));
    }

+    #[tokio::test]
+    async fn serving_snapshot_uses_applied_embedding_provider_profile() {
+        let dir = fixture();
+        init_derived_graph(dir.path()).await;
+        write_mock_embedding_cluster(dir.path(), "recorded-x");
+        write_applyable_state(dir.path());
+        let converge = apply_config_dir(dir.path()).await;
+        assert!(converge.converged, "{converge:?}");
+
+        let snapshot = read_serving_snapshot(dir.path()).await.unwrap();
+        let profile = snapshot.graphs[0].embedding.as_ref().unwrap();
+        assert_eq!(profile.kind.as_deref(), Some("mock"));
+        assert_eq!(profile.model.as_deref(), Some("recorded-x"));
+    }
+
+    #[tokio::test]
+    async fn serving_snapshot_refuses_missing_embedding_provider_metadata() {
+        let dir = fixture();
+        init_derived_graph(dir.path()).await;
+        write_mock_embedding_cluster(dir.path(), "recorded-x");
+        write_applyable_state(dir.path());
+        let converge = apply_config_dir(dir.path()).await;
+        assert!(converge.converged, "{converge:?}");
+
+        let mut state = read_state_json(dir.path());
+        state["applied_revision"]["resources"]["provider.embedding.default"]
+            .as_object_mut()
+            .unwrap()
+            .remove("embedding_profile");
+        fs::write(
+            dir.path().join(CLUSTER_STATE_FILE),
+            serde_json::to_string_pretty(&state).unwrap(),
+        )
+        .unwrap();
+
+        let err = read_serving_snapshot(dir.path()).await.unwrap_err();
+        assert!(
+            err.iter()
+                .any(|diagnostic| diagnostic.code == "embedding_provider_profile_missing"),
+            "{err:?}"
+        );
+        assert!(
+            err.iter()
+                .any(|diagnostic| diagnostic.code == "embedding_provider_missing"),
+            "{err:?}"
+        );
+    }
+
    #[tokio::test]
    async fn serving_snapshot_refuses_missing_state() {
        let dir = fixture();
--- a/crates/omnigraph-cluster/src/types.rs
+++ b/crates/omnigraph-cluster/src/types.rs
@ -325,6 +325,7 @@ pub(crate) struct DesiredCluster {
    /// The declared `storage:` root, if any (None ⇒ the config dir itself).
    pub(crate) storage_root: Option<String>,
    pub(crate) state_lock: bool,
+    pub(crate) embedding_providers: BTreeMap<String, EmbeddingProviderConfig>,
    pub(crate) graphs: Vec<DesiredGraph>,
    pub(crate) resource_digests: BTreeMap<String, String>,
    pub(crate) resources: Vec<ResourceSummary>,
@ -337,6 +338,7 @@ pub(crate) struct DesiredCluster {
 pub(crate) struct DesiredGraph {
    pub(crate) id: String,
    pub(crate) schema_digest: String,
+    pub(crate) embedding_provider: Option<String>,
 }

 #[derive(Debug)]
@ -376,6 +378,8 @@ pub(crate) struct RawClusterConfig {
    #[serde(default)]
    pub(crate) state: StateConfig,
    #[serde(default)]
+    pub(crate) providers: ProvidersConfig,
+    #[serde(default)]
    pub(crate) graphs: BTreeMap<String, GraphConfig>,
    #[serde(default)]
    pub(crate) policies: BTreeMap<String, PolicyConfig>,
@ -394,12 +398,123 @@ pub(crate) struct StateConfig {
    pub(crate) lock: Option<bool>,
 }

+#[derive(Debug, Default, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub(crate) struct ProvidersConfig {
+    #[serde(default)]
+    pub(crate) embedding: BTreeMap<String, EmbeddingProviderConfig>,
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub(crate) struct GraphConfig {
    pub(crate) schema: PathBuf,
    #[serde(default)]
    pub(crate) queries: QueriesDecl,
+    /// Optional reference to a top-level `providers.embedding.<name>` profile.
+    #[serde(default)]
+    pub(crate) embedding_provider: Option<String>,
+}
+
+/// A named cluster embedding provider profile (RFC-012 Phase 5). `kind`/`base_url`/
+/// `model` default exactly as the engine's `EmbeddingConfig::from_env` does.
+/// `api_key`, when required, must be a `${NAME}` env reference resolved at
+/// serving boot, never an inline secret.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+#[serde(deny_unknown_fields)]
+pub struct EmbeddingProviderConfig {
+    #[serde(default, alias = "provider", skip_serializing_if = "Option::is_none")]
+    pub kind: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub base_url: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub api_key: Option<String>,
+}
+
+impl EmbeddingProviderConfig {
+    pub(crate) fn validate(&self, path: String, diagnostics: &mut Vec<Diagnostic>) {
+        if let Err(error) = omnigraph::embedding::EmbeddingConfig::from_parts(
+            self.kind.as_deref(),
+            self.base_url.clone(),
+            self.model.clone(),
+            "validation-placeholder".to_string(),
+        ) {
+            diagnostics.push(Diagnostic::error(
+                "invalid_embedding_provider",
+                path.clone(),
+                error.to_string(),
+            ));
+        }
+
+        if self.kind.as_deref() == Some("mock") {
+            if let Some(api_key) = self.api_key.as_deref() {
+                if secret_ref_name(api_key).is_err() {
+                    diagnostics.push(Diagnostic::error(
+                        "embedding_api_key_inline",
+                        format!("{path}.api_key"),
+                        "embedding api_key must be a ${NAME} env reference, not an inline secret",
+                    ));
+                }
+            }
+            return;
+        }
+
+        match self.api_key.as_deref() {
+            Some(api_key) if secret_ref_name(api_key).is_err() => diagnostics.push(
+                Diagnostic::error(
+                    "embedding_api_key_inline",
+                    format!("{path}.api_key"),
+                    "embedding api_key must be a ${NAME} env reference, not an inline secret",
+                ),
+            ),
+            Some(_) => {}
+            None => diagnostics.push(Diagnostic::error(
+                "embedding_api_key_required",
+                format!("{path}.api_key"),
+                "non-mock embedding providers must set api_key to a ${NAME} env reference",
+            )),
+        }
+    }
+
+    /// Resolve into an engine `EmbeddingConfig`, reading the `${NAME}` api-key
+    /// reference from process env. Mock profiles do not read env and may omit
+    /// `api_key`; real providers error if the reference is missing or unset.
+    pub fn resolve(&self) -> Result<omnigraph::embedding::EmbeddingConfig, String> {
+        let api_key = if self.kind.as_deref() == Some("mock") {
+            String::new()
+        } else {
+            resolve_secret_ref(self.api_key.as_deref().ok_or_else(|| {
+                "embedding api_key is required for non-mock providers".to_string()
+            })?)?
+        };
+        omnigraph::embedding::EmbeddingConfig::from_parts(
+            self.kind.as_deref(),
+            self.base_url.clone(),
+            self.model.clone(),
+            api_key,
+        )
+        .map_err(|e| e.to_string())
+    }
+}
+
+fn secret_ref_name(value: &str) -> Result<&str, String> {
+    value
+        .trim()
+        .strip_prefix("${")
+        .and_then(|s| s.strip_suffix('}'))
+        .filter(|name| !name.trim().is_empty())
+        .ok_or_else(|| {
+            format!("embedding api_key must be a ${{NAME}} env reference, got '{}'", value.trim())
+        })
+}
+
+/// Resolve a `${NAME}` secret reference from process env. Rejects an inline value
+/// (anything not wrapped in `${…}`) so secrets never sit in the cluster config.
+fn resolve_secret_ref(value: &str) -> Result<String, String> {
+    let name = secret_ref_name(value)?;
+    std::env::var(name).map_err(|_| format!("embedding api_key env var '{name}' is not set"))
 }

 /// How a graph declares its stored queries. Terraform-style: the `.gq`
@ -457,6 +572,16 @@ pub(crate) struct StateResource {
    /// non-policy resources.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub(crate) applies_to: Option<Vec<String>>,
+    /// Graph resources only: the applied `provider.embedding.<name>` binding.
+    /// The provider profile itself is stored on the provider resource so
+    /// serving can boot without re-reading mutable desired config.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub(crate) embedding_provider: Option<String>,
+    /// Embedding provider resources only: the applied profile with unresolved
+    /// `${ENV}` references. The server resolves the referenced env var exactly
+    /// once at boot and injects the resulting engine config into the graph.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub(crate) embedding_profile: Option<EmbeddingProviderConfig>,
 }

 #[derive(Debug, Serialize, Deserialize)]
@ -518,3 +643,74 @@ pub(crate) struct SweepOutcome {
    /// files are rewritten with consumed_at only after the state write lands.
    pub(crate) consumed_approvals: Vec<String>,
 }
+
+#[cfg(test)]
+mod embedding_provider_config_tests {
+    use super::EmbeddingProviderConfig;
+
+    #[test]
+    fn resolves_secret_from_env_and_applies_defaults() {
+        // SAFETY: a unique var name, no concurrent reader.
+        unsafe { std::env::set_var("OG_TEST_EMBED_KEY_A", "secret-x") };
+        let profile = EmbeddingProviderConfig {
+            kind: Some("openai-compatible".to_string()),
+            base_url: None,
+            model: Some("m".to_string()),
+            api_key: Some("${OG_TEST_EMBED_KEY_A}".to_string()),
+        };
+        let config = profile.resolve().unwrap();
+        assert_eq!(config.api_key, "secret-x");
+        assert_eq!(config.model, "m");
+        unsafe { std::env::remove_var("OG_TEST_EMBED_KEY_A") };
+    }
+
+    #[test]
+    fn rejects_inline_api_key() {
+        let profile = EmbeddingProviderConfig {
+            kind: None,
+            base_url: None,
+            model: None,
+            api_key: Some("sk-inline".to_string()),
+        };
+        let err = profile.resolve().unwrap_err();
+        assert!(err.contains("${NAME}"), "got: {err}");
+    }
+
+    #[test]
+    fn errors_on_unset_secret() {
+        let profile = EmbeddingProviderConfig {
+            kind: None,
+            base_url: None,
+            model: None,
+            api_key: Some("${OG_TEST_DEFINITELY_UNSET_VAR}".to_string()),
+        };
+        let err = profile.resolve().unwrap_err();
+        assert!(err.contains("not set"), "got: {err}");
+    }
+
+    #[test]
+    fn rejects_unknown_provider() {
+        unsafe { std::env::set_var("OG_TEST_EMBED_KEY_B", "x") };
+        let profile = EmbeddingProviderConfig {
+            kind: Some("cohere".to_string()),
+            base_url: None,
+            model: None,
+            api_key: Some("${OG_TEST_EMBED_KEY_B}".to_string()),
+        };
+        let err = profile.resolve().unwrap_err();
+        assert!(err.contains("unknown embedding provider"), "got: {err}");
+        unsafe { std::env::remove_var("OG_TEST_EMBED_KEY_B") };
+    }
+
+    #[test]
+    fn mock_does_not_require_secret_env() {
+        let profile = EmbeddingProviderConfig {
+            kind: Some("mock".to_string()),
+            base_url: None,
+            model: Some("cluster-mock".to_string()),
+            api_key: None,
+        };
+        let config = profile.resolve().unwrap();
+        assert_eq!(config.model, "cluster-mock");
+    }
+}
--- a/crates/omnigraph-compiler/Cargo.toml
+++ b/crates/omnigraph-compiler/Cargo.toml
@ -20,10 +20,5 @@ pest_derive = { workspace = true }
 thiserror = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
-reqwest = { workspace = true }
 ahash = { workspace = true }
-tokio = { workspace = true }
 sha2 = { workspace = true }
-
-[dev-dependencies]
-tokio = { workspace = true }
--- a/crates/omnigraph-compiler/src/catalog/mod.rs
+++ b/crates/omnigraph-compiler/src/catalog/mod.rs
@ -26,6 +26,15 @@ pub struct InterfaceType {
    pub properties: HashMap<String, PropType>,
 }

+/// The `@embed` binding for a vector property: its source text property and,
+/// optionally, the embedding model recorded by `@embed("source", model="…")`.
+/// The model is what the query-time same-space check validates against.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct EmbedSource {
+    pub source: String,
+    pub model: Option<String>,
+}
+
 #[derive(Debug, Clone)]
 pub struct NodeType {
    pub name: String,
@ -42,8 +51,8 @@ pub struct NodeType {
    pub range_constraints: Vec<RangeConstraint>,
    /// Regex check constraints
    pub check_constraints: Vec<CheckConstraint>,
-    /// Maps @embed target property -> source text property
-    pub embed_sources: HashMap<String, String>,
+    /// Maps @embed target property -> its source text property + recorded model.
+    pub embed_sources: HashMap<String, EmbedSource>,
    pub blob_properties: HashSet<String>,
    pub arrow_schema: SchemaRef,
 }
@ -156,14 +165,18 @@ pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
                if matches!(prop.prop_type.scalar, ScalarType::Blob) {
                    blob_properties.insert(prop.name.clone());
                }
-                // Extract @embed from property annotations (stays as annotation)
-                if let Some(source_prop) = prop
-                    .annotations
-                    .iter()
-                    .find(|ann| ann.name == "embed")
-                    .and_then(|ann| ann.value.clone())
-                {
-                    embed_sources.insert(prop.name.clone(), source_prop);
+                // Extract @embed: the source text property (positional) and the
+                // optional recorded model (the `model` kwarg).
+                if let Some(ann) = prop.annotations.iter().find(|ann| ann.name == "embed") {
+                    if let Some(source) = ann.value.clone() {
+                        embed_sources.insert(
+                            prop.name.clone(),
+                            EmbedSource {
+                                source,
+                                model: ann.kwargs.get("model").cloned(),
+                            },
+                        );
+                    }
                }
            }

--- a/crates/omnigraph-compiler/src/catalog/schema_plan.rs
+++ b/crates/omnigraph-compiler/src/catalog/schema_plan.rs
@ -1137,6 +1137,7 @@ node Person @description("new") {
            annotations: vec![Annotation {
                name: "description".to_string(),
                value: Some("new".to_string()),
+                kwargs: Default::default(),
            }],
        }));
    }
--- a/crates/omnigraph-compiler/src/catalog/tests.rs
+++ b/crates/omnigraph-compiler/src/catalog/tests.rs
@ -31,6 +31,33 @@ fn test_build_catalog() {
    assert!(catalog.node_types.contains_key("Company"));
 }

+#[test]
+fn test_embed_source_records_model_kwarg() {
+    let schema = parse_schema(
+        r#"
+node Doc {
+title: String
+embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large")
+plain: Vector(3) @embed("title")
+}
+"#,
+    )
+    .unwrap();
+    let catalog = build_catalog(&schema).unwrap();
+    let doc = catalog.node_types.get("Doc").unwrap();
+
+    let embedding = doc.embed_sources.get("embedding").unwrap();
+    assert_eq!(embedding.source, "title");
+    assert_eq!(
+        embedding.model.as_deref(),
+        Some("openai/text-embedding-3-large")
+    );
+
+    let plain = doc.embed_sources.get("plain").unwrap();
+    assert_eq!(plain.source, "title");
+    assert_eq!(plain.model, None);
+}
+
 #[test]
 fn test_edge_lookup() {
    let schema = parse_schema(test_schema()).unwrap();
--- a/crates/omnigraph-compiler/src/embedding.rs
+++ b/crates/omnigraph-compiler/src/embedding.rs
@ -1,379 +0,0 @@
-#![allow(dead_code)]
-
-use std::time::Duration;
-
-use reqwest::Client;
-use serde::Deserialize;
-use tokio::time::sleep;
-
-use crate::error::{NanoError, Result};
-
-const DEFAULT_EMBED_MODEL: &str = "text-embedding-3-small";
-const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
-const DEFAULT_TIMEOUT_MS: u64 = 30_000;
-const DEFAULT_RETRY_ATTEMPTS: usize = 4;
-const DEFAULT_RETRY_BACKOFF_MS: u64 = 200;
-
-#[derive(Clone)]
-enum EmbeddingTransport {
-    Mock,
-    OpenAi {
-        api_key: String,
-        base_url: String,
-        http: Client,
-    },
-}
-
-#[derive(Clone)]
-pub(crate) struct EmbeddingClient {
-    model: String,
-    retry_attempts: usize,
-    retry_backoff_ms: u64,
-    transport: EmbeddingTransport,
-}
-
-struct EmbedCallError {
-    message: String,
-    retryable: bool,
-}
-
-#[derive(Debug, Deserialize)]
-struct OpenAiEmbeddingResponse {
-    data: Vec<OpenAiEmbeddingDatum>,
-}
-
-#[derive(Debug, Deserialize)]
-struct OpenAiEmbeddingDatum {
-    index: usize,
-    embedding: Vec<f32>,
-}
-
-#[derive(Debug, Deserialize)]
-struct OpenAiErrorEnvelope {
-    error: OpenAiErrorBody,
-}
-
-#[derive(Debug, Deserialize)]
-struct OpenAiErrorBody {
-    message: String,
-}
-
-impl EmbeddingClient {
-    pub(crate) fn from_env() -> Result<Self> {
-        let model = std::env::var("NANOGRAPH_EMBED_MODEL")
-            .ok()
-            .map(|v| v.trim().to_string())
-            .filter(|v| !v.is_empty())
-            .unwrap_or_else(|| DEFAULT_EMBED_MODEL.to_string());
-        let retry_attempts =
-            parse_env_usize("NANOGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS);
-        let retry_backoff_ms =
-            parse_env_u64("NANOGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS);
-
-        if env_flag("NANOGRAPH_EMBEDDINGS_MOCK") {
-            return Ok(Self {
-                model,
-                retry_attempts,
-                retry_backoff_ms,
-                transport: EmbeddingTransport::Mock,
-            });
-        }
-
-        let api_key = std::env::var("OPENAI_API_KEY")
-            .ok()
-            .map(|v| v.trim().to_string())
-            .filter(|v| !v.is_empty())
-            .ok_or_else(|| {
-                NanoError::Execution(
-                    "OPENAI_API_KEY is required when an embedding call is needed".to_string(),
-                )
-            })?;
-        let base_url = std::env::var("OPENAI_BASE_URL")
-            .ok()
-            .map(|v| v.trim_end_matches('/').to_string())
-            .filter(|v| !v.is_empty())
-            .unwrap_or_else(|| DEFAULT_OPENAI_BASE_URL.to_string());
-        let timeout_ms = parse_env_u64("NANOGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS);
-        let http = Client::builder()
-            .timeout(Duration::from_millis(timeout_ms))
-            .build()
-            .map_err(|e| {
-                NanoError::Execution(format!("failed to initialize HTTP client: {}", e))
-            })?;
-
-        Ok(Self {
-            model,
-            retry_attempts,
-            retry_backoff_ms,
-            transport: EmbeddingTransport::OpenAi {
-                api_key,
-                base_url,
-                http,
-            },
-        })
-    }
-
-    #[cfg(test)]
-    pub(crate) fn mock_for_tests() -> Self {
-        Self {
-            model: DEFAULT_EMBED_MODEL.to_string(),
-            retry_attempts: DEFAULT_RETRY_ATTEMPTS,
-            retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS,
-            transport: EmbeddingTransport::Mock,
-        }
-    }
-
-    pub(crate) fn model(&self) -> &str {
-        &self.model
-    }
-
-    pub(crate) async fn embed_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
-        let mut vectors = self.embed_texts(&[input.to_string()], expected_dim).await?;
-        vectors.pop().ok_or_else(|| {
-            NanoError::Execution("embedding provider returned no vector".to_string())
-        })
-    }
-
-    pub(crate) async fn embed_texts(
-        &self,
-        inputs: &[String],
-        expected_dim: usize,
-    ) -> Result<Vec<Vec<f32>>> {
-        if expected_dim == 0 {
-            return Err(NanoError::Execution(
-                "embedding dimension must be greater than zero".to_string(),
-            ));
-        }
-        if inputs.is_empty() {
-            return Ok(Vec::new());
-        }
-
-        match &self.transport {
-            EmbeddingTransport::Mock => Ok(inputs
-                .iter()
-                .map(|input| mock_embedding(input, expected_dim))
-                .collect()),
-            EmbeddingTransport::OpenAi { .. } => {
-                self.embed_texts_openai_with_retry(inputs, expected_dim)
-                    .await
-            }
-        }
-    }
-
-    async fn embed_texts_openai_with_retry(
-        &self,
-        inputs: &[String],
-        expected_dim: usize,
-    ) -> Result<Vec<Vec<f32>>> {
-        let max_attempt = self.retry_attempts.max(1);
-        let mut attempt = 0usize;
-        loop {
-            attempt += 1;
-            match self.embed_texts_openai_once(inputs, expected_dim).await {
-                Ok(vectors) => return Ok(vectors),
-                Err(err) => {
-                    if !err.retryable || attempt >= max_attempt {
-                        return Err(NanoError::Execution(err.message));
-                    }
-                    let shift = (attempt - 1).min(10) as u32;
-                    let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift);
-                    sleep(Duration::from_millis(delay)).await;
-                }
-            }
-        }
-    }
-
-    async fn embed_texts_openai_once(
-        &self,
-        inputs: &[String],
-        expected_dim: usize,
-    ) -> std::result::Result<Vec<Vec<f32>>, EmbedCallError> {
-        let (api_key, base_url, http) = match &self.transport {
-            EmbeddingTransport::OpenAi {
-                api_key,
-                base_url,
-                http,
-            } => (api_key, base_url, http),
-            EmbeddingTransport::Mock => unreachable!("mock transport should not call OpenAI"),
-        };
-
-        let request = serde_json::json!({
-            "model": self.model,
-            "input": inputs,
-            "dimensions": expected_dim,
-        });
-        let url = format!("{}/embeddings", base_url);
-        let response = http
-            .post(&url)
-            .bearer_auth(api_key)
-            .json(&request)
-            .send()
-            .await;
-
-        let response = match response {
-            Ok(resp) => resp,
-            Err(err) => {
-                let retryable = err.is_timeout() || err.is_connect() || err.is_request();
-                return Err(EmbedCallError {
-                    message: format!("embedding request failed: {}", err),
-                    retryable,
-                });
-            }
-        };
-
-        let status = response.status();
-        let body = match response.text().await {
-            Ok(body) => body,
-            Err(err) => {
-                return Err(EmbedCallError {
-                    message: format!(
-                        "embedding response read failed (status {}): {}",
-                        status, err
-                    ),
-                    retryable: status.is_server_error() || status.as_u16() == 429,
-                });
-            }
-        };
-
-        if !status.is_success() {
-            let message = parse_openai_error_message(&body).unwrap_or_else(|| body.clone());
-            return Err(EmbedCallError {
-                message: format!(
-                    "embedding request failed with status {}: {}",
-                    status, message
-                ),
-                retryable: status.is_server_error() || status.as_u16() == 429,
-            });
-        }
-
-        let mut parsed: OpenAiEmbeddingResponse =
-            serde_json::from_str(&body).map_err(|err| EmbedCallError {
-                message: format!("embedding response decode failed: {}", err),
-                retryable: false,
-            })?;
-
-        if parsed.data.len() != inputs.len() {
-            return Err(EmbedCallError {
-                message: format!(
-                    "embedding response size mismatch: expected {}, got {}",
-                    inputs.len(),
-                    parsed.data.len()
-                ),
-                retryable: false,
-            });
-        }
-
-        parsed.data.sort_by_key(|item| item.index);
-        let mut vectors = Vec::with_capacity(parsed.data.len());
-        for (idx, item) in parsed.data.into_iter().enumerate() {
-            if item.index != idx {
-                return Err(EmbedCallError {
-                    message: format!(
-                        "embedding response index mismatch at position {}: got {}",
-                        idx, item.index
-                    ),
-                    retryable: false,
-                });
-            }
-            if item.embedding.len() != expected_dim {
-                return Err(EmbedCallError {
-                    message: format!(
-                        "embedding dimension mismatch: expected {}, got {}",
-                        expected_dim,
-                        item.embedding.len()
-                    ),
-                    retryable: false,
-                });
-            }
-            vectors.push(item.embedding);
-        }
-        Ok(vectors)
-    }
-}
-
-fn parse_openai_error_message(body: &str) -> Option<String> {
-    serde_json::from_str::<OpenAiErrorEnvelope>(body)
-        .ok()
-        .map(|e| e.error.message)
-        .filter(|msg| !msg.trim().is_empty())
-}
-
-fn parse_env_usize(name: &str, default: usize) -> usize {
-    std::env::var(name)
-        .ok()
-        .and_then(|v| v.parse::<usize>().ok())
-        .filter(|v| *v > 0)
-        .unwrap_or(default)
-}
-
-fn parse_env_u64(name: &str, default: u64) -> u64 {
-    std::env::var(name)
-        .ok()
-        .and_then(|v| v.parse::<u64>().ok())
-        .filter(|v| *v > 0)
-        .unwrap_or(default)
-}
-
-fn env_flag(name: &str) -> bool {
-    std::env::var(name)
-        .ok()
-        .map(|v| {
-            let s = v.trim().to_ascii_lowercase();
-            s == "1" || s == "true" || s == "yes" || s == "on"
-        })
-        .unwrap_or(false)
-}
-
-fn mock_embedding(input: &str, dim: usize) -> Vec<f32> {
-    let mut seed = fnv1a64(input.as_bytes());
-    let mut out = Vec::with_capacity(dim);
-    for _ in 0..dim {
-        seed = xorshift64(seed);
-        let ratio = (seed as f64 / u64::MAX as f64) as f32;
-        out.push((ratio * 2.0) - 1.0);
-    }
-
-    let norm = out
-        .iter()
-        .map(|v| (*v as f64) * (*v as f64))
-        .sum::<f64>()
-        .sqrt() as f32;
-    if norm > f32::EPSILON {
-        for value in &mut out {
-            *value /= norm;
-        }
-    }
-    out
-}
-
-fn fnv1a64(bytes: &[u8]) -> u64 {
-    let mut hash = 14695981039346656037u64;
-    for byte in bytes {
-        hash ^= *byte as u64;
-        hash = hash.wrapping_mul(1099511628211u64);
-    }
-    hash
-}
-
-fn xorshift64(mut x: u64) -> u64 {
-    x ^= x << 13;
-    x ^= x >> 7;
-    x ^= x << 17;
-    x
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[tokio::test]
-    async fn mock_embeddings_are_deterministic() {
-        let client = EmbeddingClient::mock_for_tests();
-        let a = client.embed_text("alpha", 8).await.unwrap();
-        let b = client.embed_text("alpha", 8).await.unwrap();
-        let c = client.embed_text("beta", 8).await.unwrap();
-        assert_eq!(a, b);
-        assert_ne!(a, c);
-        assert_eq!(a.len(), 8);
-    }
-}
--- a/crates/omnigraph-compiler/src/lib.rs
+++ b/crates/omnigraph-compiler/src/lib.rs
@ -1,5 +1,4 @@
 pub mod catalog;
-pub mod embedding;
 pub mod error;
 pub mod ir;
 pub mod json_output;
--- a/crates/omnigraph-compiler/src/query/typecheck.rs
+++ b/crates/omnigraph-compiler/src/query/typecheck.rs
@ -261,13 +261,13 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
                        continue;
                    }

-                    if let Some(source_prop) = node_type.embed_sources.get(prop_name) {
-                        if assigned_props.contains(source_prop.as_str()) {
+                    if let Some(embed) = node_type.embed_sources.get(prop_name) {
+                        if assigned_props.contains(embed.source.as_str()) {
                            continue;
                        }
                        return Err(NanoError::Type(format!(
                            "T12: insert for `{}` must provide non-nullable property `{}` or @embed source `{}`",
-                            insert.type_name, prop_name, source_prop
+                            insert.type_name, prop_name, embed.source
                        )));
                    }

--- a/crates/omnigraph-compiler/src/schema/ast.rs
+++ b/crates/omnigraph-compiler/src/schema/ast.rs
@ -1,3 +1,5 @@
+use std::collections::BTreeMap;
+
 use crate::types::PropType;
 use serde::{Deserialize, Serialize};

@ -50,6 +52,11 @@ pub struct PropDecl {
 pub struct Annotation {
    pub name: String,
    pub value: Option<String>,
+    /// Keyword arguments, e.g. `model="…"` on `@embed("source", model="…")`.
+    /// Empty is skipped in serialization so existing schemas' IR JSON (and
+    /// hash) stay byte-identical; `BTreeMap` keeps the order deterministic.
+    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
+    pub kwargs: BTreeMap<String, String>,
 }

 /// A typed constraint declared in a node or edge body.
--- a/crates/omnigraph-compiler/src/schema/parser.rs
+++ b/crates/omnigraph-compiler/src/schema/parser.rs
@ -556,12 +556,32 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
 fn parse_annotation(pair: pest::iterators::Pair<Rule>) -> Result<Annotation> {
    let mut inner = pair.into_inner();
    let name = inner.next().unwrap().as_str().to_string();
-    let value = inner
-        .next()
-        .map(|p| decode_string_literal(p.as_str()))
-        .transpose()?;
+    let mut value = None;
+    let mut kwargs = std::collections::BTreeMap::new();
+    if let Some(args) = inner.next() {
+        // `annotation_args`: one positional arg followed by zero or more
+        // `key = literal` kwargs (e.g. `@embed("source", model="…")`).
+        for arg in args.into_inner() {
+            match arg.as_rule() {
+                Rule::annotation_arg => {
+                    value = Some(decode_string_literal(arg.as_str())?);
+                }
+                Rule::annotation_kwarg => {
+                    let mut kw = arg.into_inner();
+                    let key = kw.next().unwrap().as_str().to_string();
+                    let raw = kw.next().unwrap().as_str();
+                    kwargs.insert(key, decode_string_literal(raw)?);
+                }
+                _ => {}
+            }
+        }
+    }

-    Ok(Annotation { name, value })
+    Ok(Annotation {
+        name,
+        value,
+        kwargs,
+    })
 }

 fn validate_string_annotation(
@ -823,6 +843,17 @@ fn validate_property_annotations(
                        type_name, source_prop
                    )));
                }
+
+                // `model` is the only supported kwarg; reject the rest loudly so
+                // a typo can't be silently ignored (it would never validate).
+                for key in ann.kwargs.keys() {
+                    if key != "model" {
+                        return Err(NanoError::Parse(format!(
+                            "@embed on {}.{} has unknown argument '{}=' (only 'model' is supported)",
+                            type_name, prop.name, key
+                        )));
+                    }
+                }
            }
            _ => {}
        }
--- a/crates/omnigraph-compiler/src/schema/parser_tests.rs
+++ b/crates/omnigraph-compiler/src/schema/parser_tests.rs
@ -508,6 +508,66 @@ embedding: Vector(3) @embed(title)
    }
 }

+#[test]
+fn test_parse_embed_annotation_with_model_kwarg() {
+    let input = r#"
+node Doc {
+title: String
+embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large")
+}
+"#;
+    let schema = parse_schema(input).unwrap();
+    match &schema.declarations[0] {
+        SchemaDecl::Node(n) => {
+            let ann = &n.properties[1].annotations[0];
+            assert_eq!(ann.name, "embed");
+            assert_eq!(ann.value.as_deref(), Some("title"));
+            assert_eq!(
+                ann.kwargs.get("model").map(String::as_str),
+                Some("openai/text-embedding-3-large")
+            );
+        }
+        _ => panic!("expected Node"),
+    }
+}
+
+#[test]
+fn test_parse_embed_annotation_without_model_has_empty_kwargs() {
+    let input = r#"
+node Doc {
+title: String
+embedding: Vector(3) @embed("title")
+}
+"#;
+    let schema = parse_schema(input).unwrap();
+    match &schema.declarations[0] {
+        SchemaDecl::Node(n) => {
+            let ann = &n.properties[1].annotations[0];
+            assert!(ann.kwargs.is_empty());
+            // Empty kwargs must NOT serialize, so existing schemas' IR JSON (and
+            // thus the schema hash) stay byte-identical after this field is added.
+            let json = serde_json::to_string(ann).unwrap();
+            assert!(!json.contains("kwargs"), "unexpected kwargs in {json}");
+        }
+        _ => panic!("expected Node"),
+    }
+}
+
+#[test]
+fn test_parse_embed_annotation_rejects_unknown_kwarg() {
+    let input = r#"
+node Doc {
+title: String
+embedding: Vector(3) @embed("title", provider="openai")
+}
+"#;
+    let err = parse_schema(input).unwrap_err();
+    assert!(
+        err.to_string().contains("only 'model' is supported"),
+        "got: {err}"
+    );
+}
+
 #[test]
 fn test_parse_edge_no_body() {
    let input = "edge WorksAt: Person -> Company\n";
--- a/crates/omnigraph-compiler/src/schema/schema.pest
+++ b/crates/omnigraph-compiler/src/schema/schema.pest
@ -42,8 +42,10 @@ enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ }
 base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }

 // Annotation rule excludes constraint keywords followed by "(" — those are body_constraints
-annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? }
+annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_args ~ ")")? }
+annotation_args = { annotation_arg ~ ("," ~ annotation_kwarg)* }
 annotation_arg = { literal | ident }
+annotation_kwarg = { ident ~ "=" ~ literal }

 literal = { string_lit | float_lit | integer | bool_lit }

--- a/crates/omnigraph-server/src/lib.rs
+++ b/crates/omnigraph-server/src/lib.rs
@ -209,6 +209,9 @@ pub struct GraphStartupConfig {
    pub graph_id: String,
    pub uri: String,
    pub policy: Option<PolicySource>,
+    /// Pre-resolved embedding config from an applied cluster provider profile.
+    /// Legacy config paths leave this unset and continue to use env resolution.
+    pub embedding: Option<omnigraph::embedding::EmbeddingConfig>,
    /// Per-graph stored-query registry, loaded and identity-checked at
    /// settings-build time; type-checked against the schema when this
    /// graph's engine opens.
@ -1088,6 +1091,11 @@ async fn open_single_graph(cfg: GraphStartupConfig) -> Result<Arc<GraphHandle>>
    let db = Omnigraph::open(&uri)
        .await
        .map_err(|err| color_eyre::eyre::eyre!("open graph '{}' at {}: {err}", graph_id, uri))?;
+    let db = if let Some(embedding) = cfg.embedding {
+        db.with_embedding_config(Arc::new(embedding))
+    } else {
+        db
+    };

    // Validate this graph's stored queries against the live schema and
    // resolve them to an attachable handle (refuse boot on breakage).
--- a/crates/omnigraph-server/src/settings.rs
+++ b/crates/omnigraph-server/src/settings.rs
@ -98,6 +98,15 @@ pub(crate) async fn load_cluster_settings(
            graph_id: graph.graph_id.clone(),
            uri: graph.root.to_string_lossy().to_string(),
            policy: graph_policies.get(&graph.graph_id).cloned(),
+            embedding: graph
+                .embedding
+                .as_ref()
+                .map(|profile| {
+                    profile.resolve().map_err(|err| {
+                        eyre!("embedding provider for graph '{}': {err}", graph.graph_id)
+                    })
+                })
+                .transpose()?,
            queries: registry,
        });
    }
@ -517,6 +526,7 @@ mod tests {
                        .to_string_lossy()
                        .into_owned(),
                    policy: None,
+                    embedding: None,
                    queries: crate::queries::QueryRegistry::default(),
                }],
                config_path: temp.path().join("omnigraph.yaml"),
@ -568,6 +578,7 @@ mod tests {
                        .to_string_lossy()
                        .into_owned(),
                    policy: None,
+                    embedding: None,
                    queries: crate::queries::QueryRegistry::default(),
                }],
                config_path: temp.path().join("cluster"),
--- a/crates/omnigraph-server/tests/multi_graph.rs
+++ b/crates/omnigraph-server/tests/multi_graph.rs
@ -5,9 +5,12 @@ use std::fs;

 use axum::body::{Body, to_bytes};
 use axum::http::{Method, Request, StatusCode};
-use omnigraph_server::api::ErrorOutput;
+use omnigraph::db::Omnigraph;
+use omnigraph::loader::{LoadMode, load_jsonl};
+use omnigraph_server::api::{ErrorOutput, ReadRequest};
 use omnigraph_server::{AppState, build_app};
 use serde_json::Value;
+use serial_test::serial;
 use tower::ServiceExt;


@ -457,6 +460,180 @@ async fn cluster_boot_serves_applied_state() {
    assert_eq!(status, StatusCode::OK, "{body}");
 }

+#[tokio::test(flavor = "multi_thread")]
+#[serial]
+async fn cluster_boot_injects_embedding_provider_config() {
+    const EMBED_SCHEMA: &str = r#"
+node Doc {
+    slug: String @key
+    title: String @index
+    embedding: Vector(4) @embed("title", model="cluster-mock") @index
+}
+"#;
+    const EMBED_QUERY: &str = r#"
+query vector_search_string($q: String) {
+    match { $d: Doc }
+    return { $d.slug, $d.title }
+    order { nearest($d.embedding, $q) }
+    limit 3
+}
+"#;
+
+    let alpha = mock_embedding("alpha", 4);
+    let beta = mock_embedding("beta", 4);
+    let gamma = mock_embedding("gamma", 4);
+    let data = format!(
+        concat!(
+            r#"{{"type":"Doc","data":{{"slug":"alpha-doc","title":"alpha guide","embedding":[{}]}}}}"#,
+            "\n",
+            r#"{{"type":"Doc","data":{{"slug":"beta-doc","title":"beta guide","embedding":[{}]}}}}"#,
+            "\n",
+            r#"{{"type":"Doc","data":{{"slug":"gamma-doc","title":"gamma handbook","embedding":[{}]}}}}"#
+        ),
+        format_vector(&alpha),
+        format_vector(&beta),
+        format_vector(&gamma),
+    );
+
+    let temp = tempfile::tempdir().unwrap();
+    fs::write(temp.path().join("docs.pg"), EMBED_SCHEMA).unwrap();
+    fs::write(temp.path().join("search.gq"), EMBED_QUERY).unwrap();
+    fs::write(
+        temp.path().join("cluster.yaml"),
+        r#"
+version: 1
+providers:
+  embedding:
+    default:
+      kind: mock
+      model: cluster-mock
+graphs:
+  knowledge:
+    schema: ./docs.pg
+    embedding_provider: default
+    queries:
+      vector_search_string:
+        file: ./search.gq
+"#,
+    )
+    .unwrap();
+    let import = omnigraph_cluster::import_config_dir(temp.path()).await;
+    assert!(import.ok, "{:?}", import.diagnostics);
+    let apply = omnigraph_cluster::apply_config_dir(temp.path()).await;
+    assert!(apply.ok && apply.converged, "{:?}", apply.diagnostics);
+
+    let graph_uri = temp
+        .path()
+        .join("graphs/knowledge.omni")
+        .to_string_lossy()
+        .to_string();
+    let mut db = Omnigraph::open(&graph_uri).await.unwrap();
+    load_jsonl(&mut db, &data, LoadMode::Overwrite)
+        .await
+        .unwrap();
+
+    let _guard = EnvGuard::set(&[
+        ("OMNIGRAPH_EMBEDDINGS_MOCK", None),
+        ("OMNIGRAPH_EMBED_PROVIDER", None),
+        ("OMNIGRAPH_EMBED_BASE_URL", None),
+        ("OMNIGRAPH_EMBED_MODEL", None),
+        ("OPENROUTER_API_KEY", None),
+        ("OPENAI_API_KEY", None),
+        ("GEMINI_API_KEY", None),
+    ]);
+    let settings = cluster_settings(temp.path()).await.unwrap();
+    let omnigraph_server::ServerConfigMode::Multi {
+        graphs,
+        config_path,
+        server_policy,
+    } = settings.mode
+    else {
+        panic!("cluster boot must select multi-graph routing");
+    };
+    let state = omnigraph_server::open_multi_graph_state(
+        graphs,
+        Vec::new(),
+        server_policy.as_ref(),
+        config_path,
+    )
+    .await
+    .unwrap();
+    let app = build_app(state);
+
+    let read = ReadRequest {
+        query_source: EMBED_QUERY.to_string(),
+        query_name: Some("vector_search_string".to_string()),
+        params: Some(serde_json::json!({ "q": "alpha" })),
+        branch: Some("main".to_string()),
+        snapshot: None,
+    };
+    let (status, body) = json_response(
+        &app,
+        Request::builder()
+            .uri("/graphs/knowledge/read")
+            .method(Method::POST)
+            .header("content-type", "application/json")
+            .body(Body::from(serde_json::to_vec(&read).unwrap()))
+            .unwrap(),
+    )
+    .await;
+
+    assert_eq!(status, StatusCode::OK, "{body}");
+    assert_eq!(body["row_count"], 3);
+    assert_eq!(body["rows"][0]["d.slug"], "alpha-doc");
+}
+
+#[tokio::test(flavor = "multi_thread")]
+#[serial]
+async fn cluster_boot_refuses_missing_embedding_secret_env() {
+    let temp = tempfile::tempdir().unwrap();
+    fs::write(
+        temp.path().join("people.pg"),
+        "\nnode Person {\n  name: String @key\n}\n",
+    )
+    .unwrap();
+    fs::write(
+        temp.path().join("people.gq"),
+        "\nquery find_person($name: String) {\n  match { $p: Person { name: $name } }\n  return { $p.name }\n}\n",
+    )
+    .unwrap();
+    fs::write(
+        temp.path().join("cluster.yaml"),
+        r#"
+version: 1
+providers:
+  embedding:
+    default:
+      kind: openai-compatible
+      api_key: ${OG_TEST_MISSING_EMBED_KEY}
+graphs:
+  knowledge:
+    schema: ./people.pg
+    embedding_provider: default
+    queries:
+      find_person:
+        file: ./people.gq
+"#,
+    )
+    .unwrap();
+    let import = omnigraph_cluster::import_config_dir(temp.path()).await;
+    assert!(import.ok, "{:?}", import.diagnostics);
+    let apply = omnigraph_cluster::apply_config_dir(temp.path()).await;
+    assert!(apply.ok && apply.converged, "{:?}", apply.diagnostics);
+
+    let _guard = EnvGuard::set(&[
+        ("OG_TEST_MISSING_EMBED_KEY", None),
+        ("OMNIGRAPH_EMBEDDINGS_MOCK", None),
+    ]);
+    let err = cluster_settings(temp.path()).await.unwrap_err();
+    let message = err.to_string();
+    assert!(
+        message.contains("embedding provider for graph 'knowledge'"),
+        "{message}"
+    );
+    assert!(message.contains("OG_TEST_MISSING_EMBED_KEY"), "{message}");
+}
+
 #[tokio::test]
 async fn cluster_boot_wires_policy_bindings_into_cedar_slots() {
    let temp = tempfile::tempdir().unwrap();
--- a/crates/omnigraph/src/db/omnigraph.rs
+++ b/crates/omnigraph/src/db/omnigraph.rs
@ -158,6 +158,17 @@ pub struct Omnigraph {
    /// `apply_schema_as` consults this field (PR #2 proof-of-concept);
    /// PR #3 fans the `enforce()` call out to the remaining writers.
    policy: Option<Arc<dyn omnigraph_policy::PolicyChecker>>,
+    /// Lazily-built, reused-across-queries embedding client. Built on the first
+    /// `nearest($v, "string")` that needs server-side embedding (so a graph that
+    /// never embeds needs no provider key), then shared by every later query —
+    /// avoids the per-query `from_env()` rebuild and keeps the provider HTTP
+    /// connection pool warm. `OnceCell` guarantees a single initialization.
+    embedding: Arc<tokio::sync::OnceCell<crate::embedding::EmbeddingClient>>,
+    /// Optional pre-resolved embedding config (RFC-012 Phase 5), injected from an
+    /// applied cluster `providers.embedding` profile via [`Omnigraph::with_embedding_config`].
+    /// When set, the embedding cell builds its client from this instead of
+    /// `EmbeddingClient::from_env()`; `None` keeps the env fallback.
+    embedding_config: Option<Arc<crate::embedding::EmbeddingConfig>>,
 }

 /// Whether [`Omnigraph::open`] runs the open-time recovery sweep.
@ -321,6 +332,8 @@ impl Omnigraph {
            write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()),
            merge_exclusive: Arc::new(tokio::sync::Mutex::new(())),
            policy: None,
+            embedding: Arc::new(tokio::sync::OnceCell::new()),
+            embedding_config: None,
        })
    }

@ -420,6 +433,8 @@ impl Omnigraph {
            write_queue: Arc::new(crate::db::write_queue::WriteQueueManager::new()),
            merge_exclusive: Arc::new(tokio::sync::Mutex::new(())),
            policy: None,
+            embedding: Arc::new(tokio::sync::OnceCell::new()),
+            embedding_config: None,
        })
    }

@ -467,6 +482,29 @@ impl Omnigraph {
        self
    }

+    /// The lazily-initialized, reused-across-queries embedding client cell
+    /// (see the `embedding` field doc). The query executor resolves the client
+    /// through this on the first `nearest($v, "string")` that needs embedding.
+    pub(crate) fn embedding_cell(
+        &self,
+    ) -> &tokio::sync::OnceCell<crate::embedding::EmbeddingClient> {
+        &self.embedding
+    }
+
+    /// Install a pre-resolved embedding config (RFC-012 Phase 5). Builder-style,
+    /// mirroring [`Omnigraph::with_policy`]: a graph served from a cluster
+    /// embedding provider profile injects it here; an embedded/CLI caller that doesn't
+    /// call this keeps the `EmbeddingClient::from_env()` fallback.
+    pub fn with_embedding_config(mut self, config: Arc<crate::embedding::EmbeddingConfig>) -> Self {
+        self.embedding_config = Some(config);
+        self
+    }
+
+    /// The injected embedding config, if any (see the `embedding_config` field).
+    pub(crate) fn embedding_config_ref(&self) -> Option<&crate::embedding::EmbeddingConfig> {
+        self.embedding_config.as_deref()
+    }
+
    /// Engine-layer policy enforcement gate (MR-722 chassis core).
    ///
    /// * If no policy is installed → no-op (returns `Ok(())`).
--- a/crates/omnigraph/src/embedding.rs
+++ b/crates/omnigraph/src/embedding.rs
@ -8,29 +8,157 @@ use tokio::time::sleep;

 use crate::error::{OmniError, Result};

-const GEMINI_EMBED_MODEL: &str = "gemini-embedding-2-preview";
+const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1";
+const DEFAULT_OPENROUTER_MODEL: &str = "openai/text-embedding-3-large";
+const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
+const DEFAULT_OPENAI_MODEL: &str = "text-embedding-3-large";
 const DEFAULT_GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta";
+const DEFAULT_GEMINI_MODEL: &str = "gemini-embedding-2";
 const DEFAULT_TIMEOUT_MS: u64 = 30_000;
 const DEFAULT_RETRY_ATTEMPTS: usize = 4;
 const DEFAULT_RETRY_BACKOFF_MS: u64 = 200;
-const QUERY_TASK_TYPE: &str = "RETRIEVAL_QUERY";
-const DOCUMENT_TASK_TYPE: &str = "RETRIEVAL_DOCUMENT";
+const DEFAULT_DEADLINE_MS: u64 = 60_000;
+const GEMINI_QUERY_TASK_TYPE: &str = "RETRIEVAL_QUERY";
+const GEMINI_DOCUMENT_TASK_TYPE: &str = "RETRIEVAL_DOCUMENT";

-#[derive(Clone, Debug)]
-enum EmbeddingTransport {
+/// Which embedding API a client speaks. Each variant owns its request shape,
+/// auth, and response parsing; everything else (retry, deadline, normalization,
+/// tracing) is provider-independent.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Provider {
+    /// OpenAI-compatible (`POST {base}/embeddings`, bearer auth,
+    /// `{model, input, dimensions}`). Covers OpenRouter (the default gateway),
+    /// OpenAI direct, and self-hosted endpoints (vLLM/Ollama/LM Studio).
+    OpenAiCompatible,
+    /// Google Gemini `generativelanguage` (`POST {base}/models/{model}:embedContent`,
+    /// `x-goog-api-key`), with `RETRIEVAL_QUERY` / `RETRIEVAL_DOCUMENT` task types.
+    Gemini,
+    /// Deterministic, offline. No network, no key.
    Mock,
-    Gemini {
+}
+
+/// Whether the text being embedded is a search query or a stored document.
+/// Only Gemini distinguishes these (`RETRIEVAL_QUERY` vs `RETRIEVAL_DOCUMENT`);
+/// OpenAI-compatible providers and Mock produce the identical request for both,
+/// which is also the same-space property a query relies on.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum EmbedRole {
+    Query,
+    Document,
+}
+
+/// The single source of truth for how embedding text becomes a vector:
+/// provider + model + endpoint + key. Resolved once (from env for direct
+/// engine/CLI callers, or from an applied cluster `providers.embedding` profile
+/// at server boot) and shared by the query path and the offline CLI so stored
+/// and query vectors stay same-space by construction.
+#[derive(Clone, Debug)]
+pub struct EmbeddingConfig {
+    pub provider: Provider,
+    pub model: String,
+    pub base_url: String,
+    pub api_key: String,
+}
+
+impl EmbeddingConfig {
+    /// Resolve from the environment. Precedence:
+    /// 1. `OMNIGRAPH_EMBEDDINGS_MOCK` → Mock.
+    /// 2. `OMNIGRAPH_EMBED_PROVIDER` (`openai-compatible`|`openai`|`gemini`|`mock`);
+    ///    unset defaults to `openai-compatible` (OpenRouter).
+    /// 3. `OMNIGRAPH_EMBED_BASE_URL` else the provider default.
+    /// 4. `OMNIGRAPH_EMBED_MODEL` else the provider default.
+    /// 5. provider api-key env (`OPENROUTER_API_KEY`/`OPENAI_API_KEY`, or `GEMINI_API_KEY`).
+    pub fn from_env() -> Result<Self> {
+        if env_flag("OMNIGRAPH_EMBEDDINGS_MOCK") {
+            return Ok(Self::mock());
+        }
+
+        let alias = env_string("OMNIGRAPH_EMBED_PROVIDER");
+        if alias.as_deref() == Some("mock") {
+            return Ok(Self::mock());
+        }
+
+        let (provider, default_base, default_model, key_envs) = provider_profile(alias.as_deref())?;
+        let base_url = env_string("OMNIGRAPH_EMBED_BASE_URL")
+            .unwrap_or_else(|| default_base.to_string())
+            .trim_end_matches('/')
+            .to_string();
+        let model =
+            env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_else(|| default_model.to_string());
+
+        let api_key = key_envs.iter().copied().find_map(env_string).ok_or_else(|| {
+            OmniError::manifest_internal(format!(
+                "{} is required for the {} embedding provider",
+                key_envs.join(" or "),
+                alias.as_deref().unwrap_or("openai-compatible")
+            ))
+        })?;
+
+        Ok(Self {
+            provider,
+            model,
+            base_url,
+            api_key,
+        })
+    }
+
+    /// Build a config from explicit parts — the cluster `providers.embedding` profile path
+    /// (RFC-012 Phase 5). `provider`/`base_url`/`model` default exactly as
+    /// `from_env` does (shared `provider_profile`); `api_key` is already resolved
+    /// (the cluster path resolves a `${NAME}` ref before calling this).
+    pub fn from_parts(
+        provider: Option<&str>,
+        base_url: Option<String>,
+        model: Option<String>,
        api_key: String,
-        base_url: String,
-        http: Client,
-    },
+    ) -> Result<Self> {
+        if provider == Some("mock") {
+            // An explicit `model` (e.g. a cluster `providers.embedding` profile) is
+            // authoritative — it is what the same-space check compares against —
+            // so honor it; fall back to `mock()`'s env-based model only when the
+            // caller supplied none. Without this, a profile's `model` is silently
+            // dropped and the same-space check resolves to OMNIGRAPH_EMBED_MODEL.
+            let mut config = Self::mock();
+            if let Some(model) = model {
+                config.model = model;
+            }
+            return Ok(config);
+        }
+        let (provider, default_base, default_model, _key_envs) = provider_profile(provider)?;
+        let base_url = base_url
+            .unwrap_or_else(|| default_base.to_string())
+            .trim_end_matches('/')
+            .to_string();
+        let model = model.unwrap_or_else(|| default_model.to_string());
+        Ok(Self {
+            provider,
+            model,
+            base_url,
+            api_key,
+        })
+    }
+
+    fn mock() -> Self {
+        Self {
+            provider: Provider::Mock,
+            // Honor OMNIGRAPH_EMBED_MODEL so the same-space check is exercisable
+            // under mock; the mock vectors themselves don't depend on the model.
+            model: env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_default(),
+            base_url: String::new(),
+            api_key: String::new(),
+        }
+    }
 }

 #[derive(Clone, Debug)]
 pub struct EmbeddingClient {
+    config: EmbeddingConfig,
+    http: Client,
    retry_attempts: usize,
    retry_backoff_ms: u64,
-    transport: EmbeddingTransport,
+    /// Total wall-clock budget for one embed call, across all retries
+    /// (`OMNIGRAPH_EMBED_DEADLINE_MS`). `0` = unbounded.
+    deadline_ms: u64,
 }

 struct EmbedCallError {
@ -58,35 +186,39 @@ struct GoogleErrorBody {
    message: String,
 }

+#[derive(Debug, Deserialize)]
+struct OpenAiEmbeddingResponse {
+    data: Vec<OpenAiEmbeddingDatum>,
+}
+
+#[derive(Debug, Deserialize)]
+struct OpenAiEmbeddingDatum {
+    index: usize,
+    embedding: Vec<f32>,
+}
+
+#[derive(Debug, Deserialize)]
+struct OpenAiErrorEnvelope {
+    error: OpenAiErrorBody,
+}
+
+#[derive(Debug, Deserialize)]
+struct OpenAiErrorBody {
+    message: String,
+}
+
 impl EmbeddingClient {
    pub fn from_env() -> Result<Self> {
+        Self::new(EmbeddingConfig::from_env()?)
+    }
+
+    pub fn new(config: EmbeddingConfig) -> Result<Self> {
        let retry_attempts =
            parse_env_usize("OMNIGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS);
        let retry_backoff_ms =
            parse_env_u64("OMNIGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS);
-
-        if env_flag("OMNIGRAPH_EMBEDDINGS_MOCK") {
-            return Ok(Self {
-                retry_attempts,
-                retry_backoff_ms,
-                transport: EmbeddingTransport::Mock,
-            });
-        }
-
-        let api_key = std::env::var("GEMINI_API_KEY")
-            .ok()
-            .map(|v| v.trim().to_string())
-            .filter(|v| !v.is_empty())
-            .ok_or_else(|| {
-                OmniError::manifest_internal(
-                    "GEMINI_API_KEY is required when nearest() needs a string embedding",
-                )
-            })?;
-        let base_url = std::env::var("OMNIGRAPH_GEMINI_BASE_URL")
-            .ok()
-            .map(|v| v.trim_end_matches('/').to_string())
-            .filter(|v| !v.is_empty())
-            .unwrap_or_else(|| DEFAULT_GEMINI_BASE_URL.to_string());
+        let deadline_ms =
+            parse_env_u64_allow_zero("OMNIGRAPH_EMBED_DEADLINE_MS", DEFAULT_DEADLINE_MS);
        let timeout_ms = parse_env_u64("OMNIGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS);
        let http = Client::builder()
            .timeout(Duration::from_millis(timeout_ms))
@ -96,39 +228,36 @@ impl EmbeddingClient {
            })?;

        Ok(Self {
+            config,
+            http,
            retry_attempts,
            retry_backoff_ms,
-            transport: EmbeddingTransport::Gemini {
-                api_key,
-                base_url,
-                http,
-            },
+            deadline_ms,
        })
    }

+    pub fn config(&self) -> &EmbeddingConfig {
+        &self.config
+    }
+
    #[cfg(test)]
    fn mock_for_tests() -> Self {
-        Self {
-            retry_attempts: DEFAULT_RETRY_ATTEMPTS,
-            retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS,
-            transport: EmbeddingTransport::Mock,
-        }
+        Self::new(EmbeddingConfig::mock()).expect("mock client builds")
    }

    pub async fn embed_query_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
-        self.embed_text(input, expected_dim, QUERY_TASK_TYPE).await
+        self.embed_text(input, expected_dim, EmbedRole::Query).await
    }

    pub async fn embed_document_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
-        self.embed_text(input, expected_dim, DOCUMENT_TASK_TYPE)
-            .await
+        self.embed_text(input, expected_dim, EmbedRole::Document).await
    }

    async fn embed_text(
        &self,
        input: &str,
        expected_dim: usize,
-        task_type: &'static str,
+        role: EmbedRole,
    ) -> Result<Vec<f32>> {
        if expected_dim == 0 {
            return Err(OmniError::manifest_internal(
@ -136,10 +265,71 @@ impl EmbeddingClient {
            ));
        }

-        match &self.transport {
-            EmbeddingTransport::Mock => Ok(mock_embedding(input, expected_dim)),
-            EmbeddingTransport::Gemini { .. } => {
-                self.with_retry(|| self.embed_text_gemini_once(input, expected_dim, task_type))
+        let started = std::time::Instant::now();
+        let result = self
+            .run_with_deadline(self.embed_text_inner(input, expected_dim, role))
+            .await;
+        let elapsed_ms = started.elapsed().as_millis() as u64;
+
+        match &result {
+            Ok(_) => tracing::info!(
+                target: "omnigraph::embedding",
+                provider = ?self.config.provider,
+                model = %self.config.model,
+                dim = expected_dim,
+                elapsed_ms,
+                outcome = "ok",
+                "embedding succeeded"
+            ),
+            Err(err) => tracing::warn!(
+                target: "omnigraph::embedding",
+                provider = ?self.config.provider,
+                model = %self.config.model,
+                dim = expected_dim,
+                elapsed_ms,
+                outcome = "error",
+                error = %err,
+                "embedding failed"
+            ),
+        }
+        result
+    }
+
+    /// Bound the whole embed operation (all retries + backoff) by `deadline_ms`,
+    /// so a degraded provider can never hang the caller for the full retry
+    /// envelope. Applies to every embed call (query and document). `0` =
+    /// unbounded. Embedding has no Lance/manifest side effects, so cancelling the
+    /// in-flight request future on elapse is safe.
+    async fn run_with_deadline<F>(&self, fut: F) -> Result<Vec<f32>>
+    where
+        F: Future<Output = Result<Vec<f32>>>,
+    {
+        if self.deadline_ms == 0 {
+            return fut.await;
+        }
+        match tokio::time::timeout(Duration::from_millis(self.deadline_ms), fut).await {
+            Ok(res) => res,
+            Err(_elapsed) => Err(OmniError::manifest_internal(format!(
+                "embedding deadline exceeded after {} ms (provider={:?}, model={})",
+                self.deadline_ms, self.config.provider, self.config.model
+            ))),
+        }
+    }
+
+    async fn embed_text_inner(
+        &self,
+        input: &str,
+        expected_dim: usize,
+        role: EmbedRole,
+    ) -> Result<Vec<f32>> {
+        match self.config.provider {
+            Provider::Mock => Ok(mock_embedding(input, expected_dim)),
+            Provider::Gemini => {
+                self.with_retry(|| self.embed_gemini_once(input, expected_dim, role))
+                    .await
+            }
+            Provider::OpenAiCompatible => {
+                self.with_retry(|| self.embed_openai_once(input, expected_dim))
                    .await
            }
        }
@ -160,6 +350,14 @@ impl EmbeddingClient {
                    if !err.retryable || attempt >= max_attempt {
                        return Err(OmniError::manifest_internal(err.message));
                    }
+                    tracing::warn!(
+                        target: "omnigraph::embedding",
+                        provider = ?self.config.provider,
+                        model = %self.config.model,
+                        attempt,
+                        error = %err.message,
+                        "embedding attempt failed, retrying"
+                    );
                    let shift = (attempt - 1).min(10) as u32;
                    let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift);
                    sleep(Duration::from_millis(delay)).await;
@ -168,25 +366,27 @@ impl EmbeddingClient {
        }
    }

-    async fn embed_text_gemini_once(
+    async fn embed_gemini_once(
        &self,
        input: &str,
        expected_dim: usize,
-        task_type: &'static str,
+        role: EmbedRole,
    ) -> std::result::Result<Vec<f32>, EmbedCallError> {
-        let (api_key, base_url, http) = match &self.transport {
-            EmbeddingTransport::Gemini {
-                api_key,
-                base_url,
-                http,
-            } => (api_key, base_url, http),
-            EmbeddingTransport::Mock => unreachable!("mock transport should not call Gemini"),
+        let task_type = match role {
+            EmbedRole::Query => GEMINI_QUERY_TASK_TYPE,
+            EmbedRole::Document => GEMINI_DOCUMENT_TASK_TYPE,
        };

-        let response = http
-            .post(gemini_endpoint(base_url))
-            .header("x-goog-api-key", api_key)
-            .json(&build_gemini_request(input, expected_dim, task_type))
+        let response = self
+            .http
+            .post(gemini_endpoint(&self.config.base_url, &self.config.model))
+            .header("x-goog-api-key", &self.config.api_key)
+            .json(&build_gemini_request(
+                &self.config.model,
+                input,
+                expected_dim,
+                task_type,
+            ))
            .send()
            .await;
        let response = match response {
@ -205,10 +405,7 @@ impl EmbeddingClient {
            Ok(body) => body,
            Err(err) => {
                return Err(EmbedCallError {
-                    message: format!(
-                        "embedding response read failed (status {}): {}",
-                        status, err
-                    ),
+                    message: format!("embedding response read failed (status {}): {}", status, err),
                    retryable: status.is_server_error() || status.as_u16() == 429,
                });
            }
@ -217,10 +414,7 @@ impl EmbeddingClient {
        if !status.is_success() {
            let message = parse_google_error_message(&body).unwrap_or(body);
            return Err(EmbedCallError {
-                message: format!(
-                    "embedding request failed with status {}: {}",
-                    status, message
-                ),
+                message: format!("embedding request failed with status {}: {}", status, message),
                retryable: status.is_server_error() || status.as_u16() == 429,
            });
        }
@ -238,19 +432,85 @@ impl EmbeddingClient {
            }
        })
    }
+
+    async fn embed_openai_once(
+        &self,
+        input: &str,
+        expected_dim: usize,
+    ) -> std::result::Result<Vec<f32>, EmbedCallError> {
+        let response = self
+            .http
+            .post(format!("{}/embeddings", self.config.base_url))
+            .bearer_auth(&self.config.api_key)
+            .json(&build_openai_request(&self.config.model, input, expected_dim))
+            .send()
+            .await;
+        let response = match response {
+            Ok(response) => response,
+            Err(err) => {
+                let retryable = err.is_timeout() || err.is_connect() || err.is_request();
+                return Err(EmbedCallError {
+                    message: format!("embedding request failed: {}", err),
+                    retryable,
+                });
+            }
+        };
+
+        let status = response.status();
+        let body = match response.text().await {
+            Ok(body) => body,
+            Err(err) => {
+                return Err(EmbedCallError {
+                    message: format!("embedding response read failed (status {}): {}", status, err),
+                    retryable: status.is_server_error() || status.as_u16() == 429,
+                });
+            }
+        };
+
+        if !status.is_success() {
+            let message = parse_openai_error_message(&body).unwrap_or(body);
+            return Err(EmbedCallError {
+                message: format!("embedding request failed with status {}: {}", status, message),
+                retryable: status.is_server_error() || status.as_u16() == 429,
+            });
+        }
+
+        let parsed: OpenAiEmbeddingResponse =
+            serde_json::from_str(&body).map_err(|err| EmbedCallError {
+                message: format!("embedding response decode failed: {}", err),
+                retryable: false,
+            })?;
+
+        // The query path embeds exactly one string, so expect one datum at index 0.
+        let datum = parsed
+            .data
+            .into_iter()
+            .find(|d| d.index == 0)
+            .ok_or_else(|| EmbedCallError {
+                message: "embedding response missing data[0]".to_string(),
+                retryable: false,
+            })?;
+
+        validate_and_normalize_embedding(datum.embedding, expected_dim).map_err(|message| {
+            EmbedCallError {
+                message,
+                retryable: false,
+            }
+        })
+    }
 }

-fn gemini_endpoint(base_url: &str) -> String {
+fn gemini_endpoint(base_url: &str, model: &str) -> String {
    format!(
        "{}/models/{}:embedContent",
        base_url.trim_end_matches('/'),
-        GEMINI_EMBED_MODEL
+        model
    )
 }

-fn build_gemini_request(input: &str, expected_dim: usize, task_type: &'static str) -> Value {
+fn build_gemini_request(model: &str, input: &str, expected_dim: usize, task_type: &str) -> Value {
    json!({
-        "model": format!("models/{}", GEMINI_EMBED_MODEL),
+        "model": format!("models/{}", model),
        "content": {
            "parts": [
                {
@ -263,6 +523,14 @@ fn build_gemini_request(input: &str, expected_dim: usize, task_type: &'static st
    })
 }

+fn build_openai_request(model: &str, input: &str, expected_dim: usize) -> Value {
+    json!({
+        "model": model,
+        "input": [input],
+        "dimensions": expected_dim,
+    })
+}
+
 fn validate_and_normalize_embedding(
    values: Vec<f32>,
    expected_dim: usize,
@ -298,6 +566,57 @@ fn parse_google_error_message(body: &str) -> Option<String> {
        .filter(|msg| !msg.trim().is_empty())
 }

+fn parse_openai_error_message(body: &str) -> Option<String> {
+    serde_json::from_str::<OpenAiErrorEnvelope>(body)
+        .ok()
+        .map(|e| e.error.message)
+        .filter(|msg| !msg.trim().is_empty())
+}
+
+/// Map a provider alias to `(provider, default base URL, default model, ordered
+/// api-key envs)`. Shared by `from_env` and `from_parts` so both apply identical
+/// defaults: `openai-compatible`/unset → the OpenRouter gateway, `openai` →
+/// OpenAI's own host. `mock` is handled by callers before this is reached. The
+/// `Provider` enum alone would collapse the two openai aliases, so the alias
+/// (not the enum) determines the key-env order here.
+fn provider_profile(
+    alias: Option<&str>,
+) -> Result<(Provider, &'static str, &'static str, &'static [&'static str])> {
+    Ok(match alias {
+        None | Some("openai-compatible") => (
+            Provider::OpenAiCompatible,
+            DEFAULT_OPENROUTER_BASE_URL,
+            DEFAULT_OPENROUTER_MODEL,
+            &["OPENROUTER_API_KEY", "OPENAI_API_KEY"],
+        ),
+        Some("openai") => (
+            Provider::OpenAiCompatible,
+            DEFAULT_OPENAI_BASE_URL,
+            DEFAULT_OPENAI_MODEL,
+            &["OPENAI_API_KEY"],
+        ),
+        Some("gemini") => (
+            Provider::Gemini,
+            DEFAULT_GEMINI_BASE_URL,
+            DEFAULT_GEMINI_MODEL,
+            &["GEMINI_API_KEY"],
+        ),
+        Some(other) => {
+            return Err(OmniError::manifest_internal(format!(
+                "unknown embedding provider '{}' (expected openai-compatible|openai|gemini|mock)",
+                other
+            )));
+        }
+    })
+}
+
+fn env_string(name: &str) -> Option<String> {
+    std::env::var(name)
+        .ok()
+        .map(|v| v.trim().to_string())
+        .filter(|v| !v.is_empty())
+}
+
 fn parse_env_usize(name: &str, default: usize) -> usize {
    std::env::var(name)
        .ok()
@ -314,6 +633,15 @@ fn parse_env_u64(name: &str, default: u64) -> u64 {
        .unwrap_or(default)
 }

+/// Like [`parse_env_u64`] but accepts `0` as a meaningful value (the deadline
+/// uses `0` for "unbounded").
+fn parse_env_u64_allow_zero(name: &str, default: u64) -> u64 {
+    std::env::var(name)
+        .ok()
+        .and_then(|v| v.trim().parse::<u64>().ok())
+        .unwrap_or(default)
+}
+
 fn env_flag(name: &str) -> bool {
    std::env::var(name)
        .ok()
@ -395,6 +723,25 @@ mod tests {
        }
    }

+    // Every test that calls `EmbeddingConfig::from_env` clears the full set of
+    // embedding env vars first so the host environment can't leak in.
+    const EMBED_ENV: &[&str] = &[
+        "OMNIGRAPH_EMBEDDINGS_MOCK",
+        "OMNIGRAPH_EMBED_PROVIDER",
+        "OMNIGRAPH_EMBED_BASE_URL",
+        "OMNIGRAPH_EMBED_MODEL",
+        "OPENROUTER_API_KEY",
+        "OPENAI_API_KEY",
+        "GEMINI_API_KEY",
+    ];
+
+    fn cleared_env(extra: &[(&'static str, Option<&str>)]) -> EnvGuard {
+        let mut vars: Vec<(&'static str, Option<&str>)> =
+            EMBED_ENV.iter().map(|n| (*n, None)).collect();
+        vars.extend_from_slice(extra);
+        EnvGuard::set(&vars)
+    }
+
    #[tokio::test]
    async fn mock_embeddings_are_deterministic() {
        let client = EmbeddingClient::mock_for_tests();
@ -407,18 +754,30 @@ mod tests {
    }

    #[test]
-    fn gemini_request_uses_preview_model_retrieval_query_and_dimension() {
-        let request = build_gemini_request("alpha", 4, QUERY_TASK_TYPE);
-        assert_eq!(request["model"], "models/gemini-embedding-2-preview");
-        assert_eq!(request["taskType"], QUERY_TASK_TYPE);
+    fn gemini_request_uses_model_retrieval_query_and_dimension() {
+        let request =
+            build_gemini_request("gemini-embedding-2", "alpha", 4, GEMINI_QUERY_TASK_TYPE);
+        assert_eq!(request["model"], "models/gemini-embedding-2");
+        assert_eq!(request["taskType"], GEMINI_QUERY_TASK_TYPE);
        assert_eq!(request["outputDimensionality"], 4);
        assert_eq!(request["content"]["parts"][0]["text"], "alpha");
    }

    #[test]
    fn gemini_document_request_uses_retrieval_document_task_type() {
-        let request = build_gemini_request("alpha", 4, DOCUMENT_TASK_TYPE);
-        assert_eq!(request["taskType"], DOCUMENT_TASK_TYPE);
+        let request =
+            build_gemini_request("gemini-embedding-2", "alpha", 4, GEMINI_DOCUMENT_TASK_TYPE);
+        assert_eq!(request["taskType"], GEMINI_DOCUMENT_TASK_TYPE);
+    }
+
+    #[test]
+    fn openai_request_uses_model_input_array_and_dimensions() {
+        let request = build_openai_request("openai/text-embedding-3-large", "alpha", 4);
+        assert_eq!(request["model"], "openai/text-embedding-3-large");
+        assert_eq!(request["input"][0], "alpha");
+        assert!(request["input"].is_array());
+        assert_eq!(request["dimensions"], 4);
+        assert!(request.get("taskType").is_none());
    }

    #[test]
@ -475,15 +834,202 @@ mod tests {
        assert!(err.to_string().contains("do not retry"));
    }

+    #[tokio::test]
+    async fn run_with_deadline_aborts_slow_future() {
+        let mut client = EmbeddingClient::mock_for_tests();
+        client.deadline_ms = 20;
+        let slow = async {
+            tokio::time::sleep(Duration::from_secs(5)).await;
+            Ok(vec![0.0_f32])
+        };
+        let err = client.run_with_deadline(slow).await.unwrap_err();
+        assert!(err.to_string().contains("deadline exceeded"));
+    }
+
+    #[tokio::test]
+    async fn run_with_deadline_passes_through_fast_future() {
+        let client = EmbeddingClient::mock_for_tests();
+        let ok = client
+            .run_with_deadline(async { Ok(vec![1.0_f32, 2.0]) })
+            .await
+            .unwrap();
+        assert_eq!(ok, vec![1.0, 2.0]);
+    }
+
+    #[tokio::test]
+    async fn run_with_deadline_zero_is_unbounded() {
+        let mut client = EmbeddingClient::mock_for_tests();
+        client.deadline_ms = 0;
+        let ok = client
+            .run_with_deadline(async { Ok(vec![3.0_f32]) })
+            .await
+            .unwrap();
+        assert_eq!(ok, vec![3.0]);
+    }
+
    #[test]
    #[serial]
-    fn from_env_requires_gemini_api_key_when_not_mocking() {
-        let _guard = EnvGuard::set(&[
-            ("OMNIGRAPH_EMBEDDINGS_MOCK", None),
-            ("GEMINI_API_KEY", None),
-        ]);
+    fn from_env_defaults_to_openai_compatible_openrouter() {
+        let _guard = cleared_env(&[("OPENROUTER_API_KEY", Some("sk-test"))]);
+        let config = EmbeddingConfig::from_env().unwrap();
+        assert_eq!(config.provider, Provider::OpenAiCompatible);
+        assert_eq!(config.base_url, DEFAULT_OPENROUTER_BASE_URL);
+        assert_eq!(config.model, DEFAULT_OPENROUTER_MODEL);
+        assert_eq!(config.api_key, "sk-test");
+    }

-        let err = EmbeddingClient::from_env().unwrap_err();
-        assert!(err.to_string().contains("GEMINI_API_KEY"));
+    #[test]
+    #[serial]
+    fn from_env_openai_alias_uses_openai_host_not_openrouter() {
+        let _guard = cleared_env(&[
+            ("OMNIGRAPH_EMBED_PROVIDER", Some("openai")),
+            ("OPENAI_API_KEY", Some("k")),
+        ]);
+        let config = EmbeddingConfig::from_env().unwrap();
+        assert_eq!(config.provider, Provider::OpenAiCompatible);
+        assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL); // api.openai.com, not OpenRouter
+        assert_eq!(config.model, DEFAULT_OPENAI_MODEL); // text-embedding-3-large, no openai/ prefix
+        assert_eq!(config.api_key, "k");
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_openai_alias_prefers_openai_key_over_openrouter() {
+        // `openai` targets api.openai.com, so an OpenRouter key must not be sent there.
+        let _guard = cleared_env(&[
+            ("OMNIGRAPH_EMBED_PROVIDER", Some("openai")),
+            ("OPENROUTER_API_KEY", Some("router")),
+            ("OPENAI_API_KEY", Some("openai")),
+        ]);
+        let config = EmbeddingConfig::from_env().unwrap();
+        assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL);
+        assert_eq!(config.api_key, "openai");
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_openai_alias_errors_when_only_openrouter_key_is_set() {
+        let _guard = cleared_env(&[
+            ("OMNIGRAPH_EMBED_PROVIDER", Some("openai")),
+            ("OPENROUTER_API_KEY", Some("router")),
+        ]);
+        let err = EmbeddingConfig::from_env().unwrap_err();
+        assert!(err.to_string().contains("OPENAI_API_KEY"), "got: {err}");
+    }
+
+    #[test]
+    fn from_parts_applies_provider_defaults_and_overrides() {
+        let openrouter = EmbeddingConfig::from_parts(None, None, None, "k".to_string()).unwrap();
+        assert_eq!(openrouter.provider, Provider::OpenAiCompatible);
+        assert_eq!(openrouter.base_url, DEFAULT_OPENROUTER_BASE_URL);
+        assert_eq!(openrouter.model, DEFAULT_OPENROUTER_MODEL);
+        assert_eq!(openrouter.api_key, "k");
+
+        let gemini =
+            EmbeddingConfig::from_parts(Some("gemini"), None, None, "g".to_string()).unwrap();
+        assert_eq!(gemini.provider, Provider::Gemini);
+        assert_eq!(gemini.base_url, DEFAULT_GEMINI_BASE_URL);
+
+        let overridden = EmbeddingConfig::from_parts(
+            Some("openai"),
+            Some("https://x/v1/".to_string()),
+            Some("custom".to_string()),
+            "k".to_string(),
+        )
+        .unwrap();
+        assert_eq!(overridden.base_url, "https://x/v1"); // trailing slash trimmed
+        assert_eq!(overridden.model, "custom");
+
+        let err =
+            EmbeddingConfig::from_parts(Some("cohere"), None, None, "k".to_string()).unwrap_err();
+        assert!(
+            err.to_string().contains("unknown embedding provider"),
+            "got: {err}"
+        );
+    }
+
+    #[test]
+    #[serial]
+    fn from_parts_mock_honors_an_explicit_model() {
+        // A cluster `providers.embedding` profile that sets `kind: mock, model: X`
+        // must resolve to model X — it is what the query-time same-space check
+        // compares against. Env cleared so the assertion isolates the arg.
+        let _guard = cleared_env(&[]);
+        let pinned =
+            EmbeddingConfig::from_parts(Some("mock"), None, Some("recorded-x".to_string()), String::new())
+                .unwrap();
+        assert_eq!(pinned.provider, Provider::Mock);
+        assert_eq!(pinned.model, "recorded-x");
+        // With no explicit model, mock falls back to its env-based default (here
+        // empty, since the env is cleared).
+        let bare = EmbeddingConfig::from_parts(Some("mock"), None, None, String::new()).unwrap();
+        assert_eq!(bare.provider, Provider::Mock);
+        assert_eq!(bare.model, "");
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_openai_compatible_prefers_openrouter_key() {
+        let _guard = cleared_env(&[
+            ("OPENROUTER_API_KEY", Some("router")),
+            ("OPENAI_API_KEY", Some("openai")),
+        ]);
+        let config = EmbeddingConfig::from_env().unwrap();
+        assert_eq!(config.api_key, "router");
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_explicit_gemini_provider() {
+        let _guard = cleared_env(&[
+            ("OMNIGRAPH_EMBED_PROVIDER", Some("gemini")),
+            ("GEMINI_API_KEY", Some("g-key")),
+        ]);
+        let config = EmbeddingConfig::from_env().unwrap();
+        assert_eq!(config.provider, Provider::Gemini);
+        assert_eq!(config.base_url, DEFAULT_GEMINI_BASE_URL);
+        assert_eq!(config.model, DEFAULT_GEMINI_MODEL);
+        assert_eq!(config.api_key, "g-key");
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_base_url_and_model_overrides_apply() {
+        let _guard = cleared_env(&[
+            ("OMNIGRAPH_EMBED_PROVIDER", Some("openai-compatible")),
+            ("OMNIGRAPH_EMBED_BASE_URL", Some("https://example.test/v1/")),
+            ("OMNIGRAPH_EMBED_MODEL", Some("custom/model")),
+            ("OPENAI_API_KEY", Some("k")),
+        ]);
+        let config = EmbeddingConfig::from_env().unwrap();
+        assert_eq!(config.base_url, "https://example.test/v1"); // trailing slash trimmed
+        assert_eq!(config.model, "custom/model");
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_unknown_provider_errors() {
+        let _guard = cleared_env(&[("OMNIGRAPH_EMBED_PROVIDER", Some("cohere"))]);
+        let err = EmbeddingConfig::from_env().unwrap_err();
+        assert!(err.to_string().contains("unknown embedding provider"));
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_errors_when_no_key_present() {
+        let _guard = cleared_env(&[]);
+        let err = EmbeddingConfig::from_env().unwrap_err();
+        assert!(err.to_string().contains("OPENROUTER_API_KEY or OPENAI_API_KEY"));
+    }
+
+    #[test]
+    #[serial]
+    fn from_env_mock_flag_wins() {
+        let _guard = cleared_env(&[
+            ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")),
+            ("OMNIGRAPH_EMBED_PROVIDER", Some("gemini")),
+        ]);
+        let config = EmbeddingConfig::from_env().unwrap();
+        assert_eq!(config.provider, Provider::Mock);
    }
 }
--- a/crates/omnigraph/src/exec/query.rs
+++ b/crates/omnigraph/src/exec/query.rs
@ -2,6 +2,30 @@ use super::*;

 use super::projection::{apply_filter, apply_ordering, project_return};

+/// Bundles the per-handle embedding client cell with the optional injected
+/// config (RFC-012 Phase 5) so the lazy init uses the injected config when
+/// present, else `EmbeddingClient::from_env()`. Threaded through the query path
+/// in place of the bare cell, preserving laziness (a graph that never embeds
+/// builds no client and needs no key).
+pub(crate) struct EmbeddingResolver<'a> {
+    cell: &'a tokio::sync::OnceCell<EmbeddingClient>,
+    config: Option<&'a crate::embedding::EmbeddingConfig>,
+}
+
+impl EmbeddingResolver<'_> {
+    async fn resolve(&self) -> Result<&EmbeddingClient> {
+        let config = self.config.cloned();
+        self.cell
+            .get_or_try_init(|| async move {
+                match config {
+                    Some(cfg) => EmbeddingClient::new(cfg),
+                    None => EmbeddingClient::from_env(),
+                }
+            })
+            .await
+    }
+}
+
 impl Omnigraph {
    /// Run a named query against an explicit branch or snapshot target.
    pub async fn query(
@ -31,7 +55,18 @@ impl Omnigraph {
            GraphIndexHandle::none()
        };

-        execute_query(&ir, params, &resolved.snapshot, &graph_index, &catalog).await
+        execute_query(
+            &ir,
+            params,
+            &resolved.snapshot,
+            &graph_index,
+            &catalog,
+            &EmbeddingResolver {
+                cell: self.embedding_cell(),
+                config: self.embedding_config_ref(),
+            },
+        )
+        .await
    }

    /// Run a named query against the graph as it existed at a prior manifest version.
@ -72,7 +107,18 @@ impl Omnigraph {
            GraphIndexHandle::none()
        };

-        execute_query(&ir, params, &snapshot, &graph_index, &catalog).await
+        execute_query(
+            &ir,
+            params,
+            &snapshot,
+            &graph_index,
+            &catalog,
+            &EmbeddingResolver {
+                cell: self.embedding_cell(),
+                config: self.embedding_config_ref(),
+            },
+        )
+        .await
    }
 }

@ -102,6 +148,7 @@ async fn extract_search_mode(
    ir: &QueryIR,
    params: &ParamMap,
    catalog: &Catalog,
+    embedding: &EmbeddingResolver<'_>,
 ) -> Result<SearchMode> {
    if ir.order_by.is_empty() {
        return Ok(SearchMode::default());
@ -114,7 +161,8 @@ async fn extract_search_mode(
            query,
        } => {
            let vec =
-                resolve_nearest_query_vec(ir, catalog, variable, property, query, params).await?;
+                resolve_nearest_query_vec(ir, catalog, variable, property, query, params, embedding)
+                    .await?;
            let k = ir.limit.ok_or_else(|| {
                OmniError::manifest("nearest() ordering requires a limit clause".to_string())
            })? as usize;
@ -157,9 +205,10 @@ async fn extract_search_mode(
                .unwrap_or(60) as u32;

            let primary_mode =
-                extract_sub_search_mode(ir, primary, params, catalog, ir.limit).await?;
+                extract_sub_search_mode(ir, primary, params, catalog, ir.limit, embedding).await?;
            let secondary_mode =
-                extract_sub_search_mode(ir, secondary, params, catalog, ir.limit).await?;
+                extract_sub_search_mode(ir, secondary, params, catalog, ir.limit, embedding)
+                    .await?;

            Ok(SearchMode {
                rrf: Some(RrfMode {
@ -182,6 +231,7 @@ async fn extract_sub_search_mode(
    params: &ParamMap,
    catalog: &Catalog,
    limit: Option<u64>,
+    embedding: &EmbeddingResolver<'_>,
 ) -> Result<SearchMode> {
    match expr {
        IRExpr::Nearest {
@ -190,7 +240,8 @@ async fn extract_sub_search_mode(
            query,
        } => {
            let vec =
-                resolve_nearest_query_vec(ir, catalog, variable, property, query, params).await?;
+                resolve_nearest_query_vec(ir, catalog, variable, property, query, params, embedding)
+                    .await?;
            let k = limit.unwrap_or(100) as usize;
            Ok(SearchMode {
                nearest: Some((variable.clone(), property.clone(), vec, k)),
@ -229,15 +280,34 @@ async fn resolve_nearest_query_vec(
    property: &str,
    expr: &IRExpr,
    params: &ParamMap,
+    embedding: &EmbeddingResolver<'_>,
 ) -> Result<Vec<f32>> {
    let lit = resolve_literal_or_param(expr, params)?;
    match lit {
        Literal::List(_) => literal_to_f32_vec(&lit),
        Literal::String(text) => {
-            let expected_dim = nearest_property_dimension(ir, catalog, variable, property)?;
-            EmbeddingClient::from_env()?
-                .embed_query_text(&text, expected_dim)
-                .await
+            let (expected_dim, recorded_model) =
+                nearest_property_dim_and_model(ir, catalog, variable, property)?;
+            // Lazily resolve the per-handle client once, then reuse it across
+            // queries (keeps the provider connection pool warm); a graph that
+            // never embeds never builds a client and needs no provider key.
+            let client = embedding.resolve().await?;
+            // Same-space guarantee: if the property recorded the model that
+            // produced its stored vectors (`@embed("…", model="…")`), the query
+            // embedder must resolve to that same model — otherwise the comparison
+            // is across vector spaces. Reject loudly instead of ranking garbage.
+            if let Some(recorded) = &recorded_model {
+                let resolved = &client.config().model;
+                if resolved != recorded {
+                    return Err(OmniError::manifest(format!(
+                        "nearest() on '{property}': its stored vectors were embedded with model \
+                         '{recorded}', but the query embedder resolves to '{resolved}'. Set \
+                         OMNIGRAPH_EMBED_MODEL='{recorded}' (and the matching provider) or re-embed \
+                         the stored vectors."
+                    )));
+                }
+            }
+            client.embed_query_text(&text, expected_dim).await
        }
        _ => Err(OmniError::manifest(
            "nearest query must be a string or list of floats".to_string(),
@ -279,12 +349,14 @@ fn literal_to_f32_vec(lit: &Literal) -> Result<Vec<f32>> {
    }
 }

-fn nearest_property_dimension(
+/// Resolve the nearest() target property's vector dimension and the embedding
+/// model recorded for it via `@embed("…", model="…")` (`None` if unrecorded).
+fn nearest_property_dim_and_model(
    ir: &QueryIR,
    catalog: &Catalog,
    variable: &str,
    property: &str,
-) -> Result<usize> {
+) -> Result<(usize, Option<String>)> {
    let type_name = resolve_binding_type_name(&ir.pipeline, variable).ok_or_else(|| {
        OmniError::manifest_internal(format!(
            "nearest() variable '${}' is not bound to a node type in the lowered pipeline",
@ -303,13 +375,20 @@ fn nearest_property_dimension(
            type_name, property
        ))
    })?;
-    match prop.scalar {
-        ScalarType::Vector(dim) if !prop.list => Ok(dim as usize),
-        _ => Err(OmniError::manifest_internal(format!(
-            "nearest() property '{}.{}' is not a scalar vector",
-            type_name, property
-        ))),
-    }
+    let dim = match prop.scalar {
+        ScalarType::Vector(dim) if !prop.list => dim as usize,
+        _ => {
+            return Err(OmniError::manifest_internal(format!(
+                "nearest() property '{}.{}' is not a scalar vector",
+                type_name, property
+            )));
+        }
+    };
+    let recorded_model = node_type
+        .embed_sources
+        .get(property)
+        .and_then(|embed| embed.model.clone());
+    Ok((dim, recorded_model))
 }

 fn resolve_binding_type_name<'a>(pipeline: &'a [IROp], variable: &str) -> Option<&'a str> {
@ -341,8 +420,9 @@ pub async fn execute_query(
    snapshot: &Snapshot,
    graph_index: &GraphIndexHandle<'_>,
    catalog: &Catalog,
+    embedding: &EmbeddingResolver<'_>,
 ) -> Result<QueryResult> {
-    let search_mode = extract_search_mode(ir, params, catalog).await?;
+    let search_mode = extract_search_mode(ir, params, catalog, embedding).await?;

    // RRF requires forked execution
    if let Some(ref rrf) = search_mode.rrf {
--- a/crates/omnigraph/tests/search.rs
+++ b/crates/omnigraph/tests/search.rs
@ -60,6 +60,15 @@ query hybrid_search_string($vq: String, $tq: String) {
    limit 3
 }
 "#;
+// Same shape as MOCK_SEARCH_SCHEMA but the vector records the model that
+// produced its stored vectors, opting into the query-time same-space check.
+const MODEL_RECORDED_SCHEMA: &str = r#"
+node Doc {
+    slug: String @key
+    title: String @index
+    embedding: Vector(4) @embed("title", model="test-model-a") @index
+}
+"#;
 const SEARCH_MUTATIONS: &str = r#"
 query insert_doc($slug: String, $title: String, $body: String, $embedding: Vector(4)) {
    insert Doc {
@ -89,6 +98,15 @@ async fn init_mock_embedding_search_db(dir: &tempfile::TempDir) -> Omnigraph {
    db
 }

+async fn init_model_recorded_search_db(dir: &tempfile::TempDir) -> Omnigraph {
+    let uri = dir.path().to_str().unwrap();
+    let mut db = Omnigraph::init(uri, MODEL_RECORDED_SCHEMA).await.unwrap();
+    load_jsonl(&mut db, &mock_embedding_seed_data(), LoadMode::Overwrite)
+        .await
+        .unwrap();
+    db
+}
+
 fn mock_embedding_seed_data() -> String {
    [
        ("alpha-doc", "alpha guide", mock_embedding("alpha", 4)),
@ -510,9 +528,14 @@ async fn explicit_vector_nearest_does_not_require_gemini_credentials() {

 #[tokio::test]
 #[serial]
-async fn string_nearest_requires_gemini_credentials_when_mock_is_disabled() {
+async fn string_nearest_requires_provider_credentials_when_mock_is_disabled() {
+    // With mock off and no provider key, the default (openai-compatible)
+    // provider fails loudly rather than silently producing garbage vectors.
    let _guard = EnvGuard::set(&[
        ("OMNIGRAPH_EMBEDDINGS_MOCK", None),
+        ("OMNIGRAPH_EMBED_PROVIDER", None),
+        ("OPENROUTER_API_KEY", None),
+        ("OPENAI_API_KEY", None),
        ("GEMINI_API_KEY", None),
    ]);

@ -528,7 +551,105 @@ async fn string_nearest_requires_gemini_credentials_when_mock_is_disabled() {
    .await
    .unwrap_err();

-    assert!(err.to_string().contains("GEMINI_API_KEY"));
+    assert!(
+        err.to_string()
+            .contains("OPENROUTER_API_KEY or OPENAI_API_KEY"),
+        "unexpected error: {err}"
+    );
+}
+
+#[tokio::test]
+#[serial]
+async fn nearest_string_passes_when_query_model_matches_recorded_model() {
+    let _guard = EnvGuard::set(&[
+        ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")),
+        ("OMNIGRAPH_EMBED_MODEL", Some("test-model-a")),
+        ("OMNIGRAPH_EMBED_PROVIDER", None),
+        ("OPENROUTER_API_KEY", None),
+        ("OPENAI_API_KEY", None),
+        ("GEMINI_API_KEY", None),
+    ]);
+
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_model_recorded_search_db(&dir).await;
+
+    let result = query_main(
+        &mut db,
+        MOCK_SEARCH_QUERIES,
+        "vector_search_string",
+        &params(&[("$q", "alpha")]),
+    )
+    .await
+    .unwrap();
+
+    assert_eq!(result_slugs(&result)[0], "alpha-doc");
+}
+
+#[tokio::test]
+#[serial]
+async fn nearest_string_errors_when_query_model_differs_from_recorded_model() {
+    let _guard = EnvGuard::set(&[
+        ("OMNIGRAPH_EMBEDDINGS_MOCK", Some("1")),
+        ("OMNIGRAPH_EMBED_MODEL", Some("test-model-b")),
+        ("OMNIGRAPH_EMBED_PROVIDER", None),
+        ("OPENROUTER_API_KEY", None),
+        ("OPENAI_API_KEY", None),
+        ("GEMINI_API_KEY", None),
+    ]);
+
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_model_recorded_search_db(&dir).await;
+
+    let err = query_main(
+        &mut db,
+        MOCK_SEARCH_QUERIES,
+        "vector_search_string",
+        &params(&[("$q", "alpha")]),
+    )
+    .await
+    .unwrap_err();
+
+    // The error must name both the recorded model and the resolved one.
+    let msg = err.to_string();
+    assert!(msg.contains("test-model-a"), "got: {msg}");
+    assert!(msg.contains("test-model-b"), "got: {msg}");
+}
+
+#[tokio::test]
+#[serial]
+async fn injected_embedding_config_is_used_instead_of_env() {
+    // No mock flag and no provider keys in env, so `from_env()` would error.
+    // Injecting a Mock config proves the resolver uses the injected config
+    // (RFC-012 Phase 5), and its model satisfies the recorded same-space check.
+    let _guard = EnvGuard::set(&[
+        ("OMNIGRAPH_EMBEDDINGS_MOCK", None),
+        ("OMNIGRAPH_EMBED_PROVIDER", None),
+        ("OMNIGRAPH_EMBED_MODEL", None),
+        ("OPENROUTER_API_KEY", None),
+        ("OPENAI_API_KEY", None),
+        ("GEMINI_API_KEY", None),
+    ]);
+
+    let dir = tempfile::tempdir().unwrap();
+    let mut db = init_model_recorded_search_db(&dir)
+        .await
+        .with_embedding_config(std::sync::Arc::new(omnigraph::embedding::EmbeddingConfig {
+            provider: omnigraph::embedding::Provider::Mock,
+            model: "test-model-a".to_string(),
+            base_url: String::new(),
+            api_key: String::new(),
+        }));
+
+    let result = query_main(
+        &mut db,
+        MOCK_SEARCH_QUERIES,
+        "vector_search_string",
+        &params(&[("$q", "alpha")]),
+    )
+    .await
+    .unwrap();
+
+    assert_eq!(result_slugs(&result)[0], "alpha-doc");
 }

 // ─── BM25 search ────────────────────────────────────────────────────────────
--- a/docs/dev/execution.md
+++ b/docs/dev/execution.md
@ -177,4 +177,4 @@ For all three modes, a mid-load failure (RI / cardinality violation, validation

 ## Embeddings during load

-If a node type has `@embed` properties, the loader calls the engine embedding client (Gemini, RETRIEVAL_DOCUMENT) per row to populate the vector column. See [embeddings.md](../user/search/embeddings.md).
+The loader does **not** embed `@embed` properties at load time. `@embed` is a catalog annotation consumed by query typecheck/lint; vectors are supplied directly in the load data, or pre-filled by the offline `omnigraph embed` pipeline. Query-time `nearest($v, "string")` auto-embeds the query string via the provider-independent embedding client. See [embeddings.md](../user/search/embeddings.md). (Ingest-time `@embed` execution is a planned RFC-012 phase.)
--- a/docs/dev/index.md
+++ b/docs/dev/index.md
@ -81,6 +81,7 @@ Working documents for in-flight feature work. Removed when the work lands.
 | Unify CLI embedded/remote access paths — parity referee, shared wire-DTO crate, `GraphClient` trait, declared plane capabilities | [rfc-009-unify-access-paths.md](rfc-009-unify-access-paths.md) |
 | Restructure the CLI around explicit planes — one graph-addressing model, declared capability surface, plane-grouped help (expands RFC-009 Phase 4) | [rfc-010-cli-planes-restructure.md](rfc-010-cli-planes-restructure.md) |
 | CLI refactoring — one addressing & config model post-`omnigraph.yaml`: scope + `--graph` + derived access path, served-default / privileged-direct, profiles, named queries, capability classifier (completes RFC-008) | [rfc-011-cli-refactoring.md](rfc-011-cli-refactoring.md) |
+| Provider-independent embedding configuration — one resolved `EmbeddingConfig` + sealed provider enum (Gemini/OpenAI/Mock), identity recorded in the schema IR, query-time same-space validation, NFR floor | [rfc-012-embedding-provider-config.md](rfc-012-embedding-provider-config.md) |

 ## Boundary

--- a/docs/dev/rfc-012-embedding-provider-config.md
+++ b/docs/dev/rfc-012-embedding-provider-config.md
@ -0,0 +1,295 @@
+# RFC: Provider-Independent Embedding Configuration
+
+**Status:** Accepted — Phases 1-5 implemented
+**Date:** 2026-06-15
+**Builds on:** the engine embedding client (`crates/omnigraph/src/embedding.rs`), the `@embed` catalog
+annotation (`omnigraph-compiler/src/catalog`), the cluster `providers.embedding` surface
+([cluster-config-specs.md](cluster-config-specs.md), [rfc-007-operator-config.md](rfc-007-operator-config.md)
+for the secret-resolution pattern).
+**Target release:** staged — NFR floor first, then the provider-independent config core; ingest-time `@embed`
+execution is a separate later phase.
+
+## Summary
+
+OmniGraph's embedding subsystem is **hardwired to a single provider (Google Gemini)** and has no recorded
+link between the model that produced a stored vector and the model that embeds a query string. Today that
+happens to be self-consistent (one live client embeds both sides), but it is consistent by accident, not by
+construction: the provider is hardcoded, the model is a moving `-preview` target, nothing validates that a
+query vector and a stored vector share a space, and the one configurable knob (key + base URL) cannot change
+the provider or model.
+
+This RFC makes embedding **provider-independent**: one resolved `EmbeddingConfig { provider, model, base_url,
+api_key, dim, normalize }` behind a sealed provider abstraction, resolved once and shared by every embedder.
+The **primary variant is OpenAI-compatible** — a single request/response shape (`POST {base}/embeddings`,
+`{model, input, dimensions}`) that covers **OpenRouter** (the recommended default gateway, one key for Gemini,
+OpenAI, Mistral, BGE, Qwen, sentence-transformers, …), OpenAI direct, and any self-hosted OpenAI-compatible
+endpoint (vLLM, Ollama, LM Studio, Together). A native **Gemini** (`generativelanguage`) variant is retained
+for shops that want to hit Google directly with its `RETRIEVAL_QUERY`/`RETRIEVAL_DOCUMENT` task-type
+asymmetry, plus a deterministic **Mock**. The embedding *identity* (provider + model + dim) is recorded in the
+schema IR so it travels with the data, and a query whose resolved embedder cannot match the stored vectors'
+recorded identity is **rejected with a typed error instead of silently ranking across vector spaces.**
+Provider/endpoint wiring lands on the already-reserved cluster `providers.embedding` field; secrets follow the
+existing operator-credential pattern; no secret ever enters the schema.
+
+This RFC supersedes the framing in `docs/user/search/embeddings.md` that described "two embedding clients
+with different defaults" — one of those clients was dead code with zero callers and has been removed (see
+Phase 1); the OpenAI request shape returns as a first-class *provider variant* of the one client, not as a
+second parallel client.
+
+## Motivation
+
+This work originated in an external handoff that reported a live cross-provider bug: gemini-3072 stored
+vectors compared against OpenAI-1536 query vectors, silently. Investigation against the current source showed
+the reported mechanism is **inaccurate** — the OpenAI client it blamed (`omnigraph-compiler/src/embedding.rs`)
+was `pub(crate)`, `#![allow(dead_code)]`, and had **zero callers**; the live `nearest("string")` path and the
+offline `omnigraph embed` CLI both use the engine **Gemini** client; and `@embed` does no ingest-time
+embedding at all. So the documented happy path is self-consistent. But the investigation surfaced four real
+problems the handoff's instincts correctly smelled:
+
+- **P1 — Provider is hardwired.** The one live client builds Google `generativelanguage` requests; only key +
+  base URL are configurable, not the provider or model. A non-Gemini shop cannot use `nearest("string")`
+  without a Gemini key, and cannot make it produce non-Gemini vectors. If they store their own vectors and
+  query with `nearest("string")`, the query is embedded with Gemini → a silent cross-space ranking. This is
+  the handoff's failure, reached by a different cause.
+- **P2 — A dead, divergent second client + stale docs** invited exactly the misdiagnosis the handoff made.
+- **P3 — No same-space guarantee recorded with the data.** Nothing stamps which model/dim produced a stored
+  vector, so write-side and read-side embedders can drift with no validation.
+- **P4 — `@embed` is declarative-in-name-only.** It records a source property for the typechecker but never
+  embeds at ingest; the docs claimed otherwise.
+
+Per the project's first principle, the lower-liability shape is **one provider-independent client with the
+identity recorded next to the data**, not N independently-defaulted clients kept in lockstep by discipline.
+Hardcoding one provider mortgages every future "we need OpenAI / a local model / Vertex" against a rewrite;
+recording identity once closes the silent-wrong-results class by construction.
+
+## Current state — which API we actually use
+
+| | Live engine client (`crates/omnigraph/src/embedding.rs`) | Deleted dead client (was `omnigraph-compiler/src/embedding.rs`) |
+|---|---|---|
+| Provider | **Google Gemini Developer API** (`generativelanguage`, *not* Vertex AI) | OpenAI |
+| Endpoint | `POST {base}/models/{model}:embedContent` | `POST {base}/embeddings` |
+| Auth | header `x-goog-api-key`, env `GEMINI_API_KEY` | `Authorization: Bearer`, env `OPENAI_API_KEY` |
+| Model | `gemini-embedding-2-preview` (hardcoded) | `text-embedding-3-small` (env `NANOGRAPH_EMBED_MODEL`) |
+| Base default | `https://generativelanguage.googleapis.com/v1beta` | `https://api.openai.com/v1` |
+| Request body | `{model, content:{parts:[{text}]}, taskType, outputDimensionality}` | `{model, input:[…], dimensions}` |
+| Response | `{embedding:{values:[f32]}}` | `{data:[{index, embedding:[f32]}]}` |
+| Task types | `RETRIEVAL_QUERY` / `RETRIEVAL_DOCUMENT` | none |
+| Status | **live** — used by `nearest("string")` and `omnigraph embed` | **removed in Phase 1** (zero callers) |
+
+Both shapes honour a requested output dimensionality (Gemini `outputDimensionality`, OpenAI `dimensions`)
+driven by the target column width, so dimension is already schema-driven. The two known shapes are exactly the
+two initial provider variants this RFC defines — the OpenAI shape returns from git history as a `Provider`
+variant of the single client.
+
+## Guide-level explanation
+
+### Configuring a provider (operator view)
+
+Pick a provider for the graph in `cluster.yaml` (the team-owned surface), referencing a secret by name. The
+recommended default routes through OpenRouter (OpenAI-compatible, one key for many models):
+
+```yaml
+providers:
+  embedding:
+    default:
+      kind: openai-compatible           # openai-compatible | gemini | mock
+      base_url: https://openrouter.ai/api/v1
+      model: google/gemini-embedding-2  # or openai/text-embedding-3-large, mistralai/mistral-embed, …
+      api_key: ${OPENROUTER_API_KEY}
+graphs:
+  knowledge:
+    schema: knowledge.pg
+    embedding_provider: default
+```
+
+The same `openai-compatible` kind points at OpenAI direct (`base_url: https://api.openai.com/v1`,
+`model: text-embedding-3-large`) or a self-hosted endpoint (vLLM/Ollama/LM Studio) by changing `base_url`. Use
+`kind: gemini` only to reach Google's `generativelanguage` API directly (it keeps the query/document
+task-type asymmetry that the OpenAI-compatible shape does not expose). Dimensions are schema-driven by the
+target `Vector(N)` column, not duplicated in the provider profile.
+
+The zero-config tier keeps working with env only (`OMNIGRAPH_EMBED_PROVIDER`, `OMNIGRAPH_EMBED_BASE_URL`,
+`OMNIGRAPH_EMBED_MODEL`, and the provider api-key env — `OPENROUTER_API_KEY` / `OPENAI_API_KEY` /
+`GEMINI_API_KEY`), so no cluster file is required for a single-graph setup.
+
+### Recording identity in the schema
+
+`@embed` grows optional arguments that pin the embedding identity to the vector column:
+
+```pg
+node Doc {
+  slug: String @key
+  text: String
+  v: Vector(3072) @embed("text", model="gemini-embedding-2", dim=3072) @index
+}
+```
+
+The single-argument form `@embed("text")` keeps working unchanged. The recorded identity persists in the
+schema IR (`_schema.ir.json`) and so travels with `schema apply` and `schema show`.
+
+### What a mismatch looks like
+
+If the resolved read-side embedder cannot produce the recorded identity (wrong model, wrong dim, wrong
+provider), `nearest($v, "string")` fails with a typed error naming both sides, instead of returning a
+plausible-but-meaningless ranking. Changing the recorded identity on an existing column is a loud schema-apply
+refusal (it is a re-embed, a deliberate migration step), reusing the migration planner's existing
+annotation-change rejection.
+
+## Reference-level design
+
+### One client, sealed provider abstraction
+
+Replace the two-variant `EmbeddingTransport` with a resolved config plus a sealed provider enum:
+
+```text
+EmbeddingConfig { provider: Provider, model, base_url, api_key, dim, normalize }
+enum Provider {
+  OpenAiCompatible,   // POST {base}/embeddings, Bearer auth, {model, input, dimensions} → {data:[{embedding,index}]}
+                      //   covers OpenRouter (default gateway), OpenAI direct, vLLM/Ollama/LM Studio/Together
+  Gemini,             // POST {base}/models/{model}:embedContent, x-goog-api-key, with RETRIEVAL_QUERY/DOCUMENT task types
+  Mock,               // deterministic, offline
+}
+struct EmbeddingClient { config, http, retry, deadline }
+```
+
+`Provider` owns the per-API differences (endpoint suffix, auth header, request JSON, response JSON, task-type
+support); the client owns retry/backoff, the deadline, normalization, and tracing — all provider-independent.
+**OpenRouter is not a distinct variant** — it is `OpenAiCompatible` with `base_url =
+https://openrouter.ai/api/v1`, which is the point: one OpenAI-compatible shape gives provider-independence
+across every model OpenRouter fronts, so the gateway does the multi-provider fan-out and OmniGraph carries one
+request shape. The native `Gemini` variant exists only for direct-to-Google with task-type asymmetry. An enum
+(not a trait) is the earned complexity for this small, first-party set; if third-party plug-in providers are
+ever needed, the enum becomes a trait behind the same `EmbeddingConfig` surface without touching callers.
+
+The OpenAI-compatible `input` accepts an **array**, giving batch embedding for free — which the later
+ingest phase needs for throughput, and which removes the open dependency on Gemini's native
+`batchEmbedContents`.
+
+### Config resolution (resolved once, shared)
+
+Precedence, highest first for served cluster graphs: applied cluster `providers.embedding.<name>` profile →
+env (`OMNIGRAPH_EMBED_*`, provider api-key env) → built-in defaults. The cluster `api_key` value is a
+`${NAME}` env reference resolved at server boot; plaintext never lives in the schema, state ledger, or any
+checked-in file. Resolution happens once per graph handle; the resolved client is shared by
+`nearest("string")`. Direct single-graph serving, embedded callers, and the offline CLI keep the env path
+unless they inject an `EmbeddingConfig` directly.
+
+### Identity recorded in the schema IR (not a new store)
+
+The `@embed` args serialize into `PropertyIR.annotations` → `_schema.ir.json`, which `schema apply` already
+persists atomically and which the catalog (the one thing `nearest()` reads at query time) is built from. No
+new metadata store, no manifest column, no extra read on the query path. The migration planner already rejects
+non-description annotation changes as `UnsupportedChange`, so "recorded identity is immutable without a
+deliberate re-embed migration" is the default behaviour, not new code. (A second, optional copy in Lance
+field metadata — co-located with the vectors — is available later by activating the currently no-op
+`UpdatePropertyMetadata` migration step; out of scope here.)
+
+### Query-time validation
+
+`resolve_nearest_query_vec` compares the resolved read-side identity against the column's recorded identity
+before embedding; on mismatch it returns a typed `OmniError` naming recorded vs resolved (model, dim,
+provider). This is the only behaviour that closes P3 by construction.
+
+### NFR floor (independent of the provider work)
+
+- **Deadline:** wrap every embed call (query or document) in a total-operation deadline
+  (`OMNIGRAPH_EMBED_DEADLINE_MS`) so a degraded provider cannot hang the caller for the current ~121 s worst
+  case (4 × 30 s timeout + backoff).
+- **Observability:** `tracing` span per embed call (provider, model, dim, attempts, outcome, elapsed; `warn!`
+  per retry; token usage when the provider returns it). The subsystem has zero instrumentation today.
+- **Single normalization:** one `normalize_vector` (the dead client carried a divergent second copy; removed
+  in Phase 1).
+- **Stable model:** make the model configurable and default to a stable (non-`-preview`) model once the GA
+  name is confirmed.
+
+### Ingest-time `@embed` (later phase, not this RFC's core)
+
+Making `@embed` embed at ingest is a separate phase with a hard constraint: embedding is a slow, external,
+**non-idempotent** side effect, so it must run **entirely before staging** — in the pure in-memory phase,
+before any `stage_*`/Lance HEAD move, alongside the existing constraint validation — so a mid-load provider
+failure aborts with zero drift. It must never sit inside or after the commit protocol, because the recovery
+sweep cannot re-run or undo an external embedding. It also needs a content-hash skip (so `load --mode
+overwrite` does not re-bill every row), batching, and a bounded-concurrency stage. Specified here only to fix
+the design constraint; deferred to its own RFC/phase.
+
+### Phasing (implementation order)
+
+| Phase | Scope | Demo |
+|---|---|---|
+| **1 — NFR floor + dead-client removal** | deadline, observability, single normalize, configurable model, delete dead client + `NANOGRAPH_*` | a hung provider fails at the deadline; embed calls traced; `rg NANOGRAPH_` empty |
+| **2 — Provider-independent config** | `EmbeddingConfig` + `Provider` enum (OpenAiCompatible covering OpenRouter/OpenAI/local, Gemini, Mock); env-first resolution; client reuse | point `base_url` at OpenRouter, run `nearest("string")`, get correct neighbours vs OpenRouter-stored vectors; CLI shares the config |
+| **3 — Record identity in schema IR** | `@embed` args grammar + catalog + IR persistence | `schema show` reflects recorded model/dim |
+| **4 — Query-time validation** | compare resolved vs recorded; typed error; planner refusal on identity change | stored model A vs read model B → loud error, never silent garbage |
+| **5 — Cluster provider wiring** | `providers.embedding` resources; `graphs.<id>.embedding_provider`; `${NAME}` resolution at server boot | provider profile resolved from applied cluster state; legacy `omnigraph.yaml` untouched |
+| later | ingest-time `@embed` (Shape C) | separate RFC |
+
+**Status:** Phases 1–5 are implemented (`@embed("…", model="…")` is recorded in the schema IR and validated at
+query time with a typed same-space error; an unrecorded `@embed` keeps working with no check; cluster-served
+graphs can bind an applied `providers.embedding` profile). Ingest-time `@embed` remains.
+
+## Invariants & deny-list check
+
+- **Invariant 9 (integrity failures are loud):** strengthened — query-time identity mismatch becomes a typed
+  error instead of silent wrong results.
+- **Invariant 10 (query semantics are first-class IR concepts):** embedding identity becomes IR/catalog data,
+  not an out-of-band env guess.
+- **Invariant 11 (transport stays at the boundary):** strengthened — Phase 1 removes the HTTP client + async
+  runtime (`reqwest`, `tokio`) from `omnigraph-compiler`, whose own manifest advertises "Zero Lance
+  dependency"; the embedding HTTP client lives only in the engine.
+- **Invariant 12 / secret handling:** api-keys resolve through the existing credential chain; never in schema
+  or checked-in config.
+- **Invariant 13 (bounded & observable):** addressed — the deadline bounds latency; tracing makes the
+  subsystem observable.
+- **Deny-list — "silent fallback / dropped rows":** the cross-space ranking is exactly a silent-wrong-result;
+  this RFC closes it.
+- **Deny-list — "new write paths that advance Lance HEAD before manifest publish without a recovery
+  sidecar":** the ingest phase (deferred) explicitly keeps embedding *before* staging, so it does not create a
+  new HEAD-advancing write path. No invariant is weakened.
+
+## Drawbacks & alternatives
+
+- **Do nothing.** The happy path works today, so the live risk is narrow (P1 + P3). But the provider hardwiring
+  and missing validation are a latent silent-wrong-results class that bites the first non-Gemini user.
+- **Interim env-only provider switch (no schema record).** Cheaper, but leaves the same-space guarantee to
+  operator discipline (fails P3). Folded in as Phase 2's env-first resolution, with Phases 3–4 adding the
+  record/validate guarantee.
+- **Trait-based provider plug-ins now.** Rejected as unearned complexity for two first-party providers; the
+  enum upgrades to a trait behind the same surface if needed.
+- **Stamp identity in the manifest or Lance field metadata instead of the IR.** The manifest is the wrong
+  granularity; field metadata needs net-new wiring and a query-path dataset open. The IR is where `@embed`
+  already lives and is already read at query time (see spike).
+
+## Reversibility
+
+Mostly reversible. Phases 1–2 and 5 are code/config (env, CLI, cluster keys) and cheap to undo. Phase 3
+(recording identity in the schema IR) is **near-permanent** — it changes the on-disk `_schema.ir.json` shape
+and the schema hash — so it earns the most scrutiny: the single-arg `@embed` form stays byte-compatible, and
+recorded identity is additive (absent identity = today's behaviour). Provider request/response shapes are
+external API contracts, not our format, so adding providers is reversible.
+
+## Gateway tradeoff (OpenRouter)
+
+Routing through OpenRouter (the default) buys provider-independence with one key and one billing relationship,
+batch input, and access to the GA `google/gemini-embedding-2`. Costs to accept, all controllable:
+
+- **Extra network hop** → more query-path latency. The Phase-1 deadline bounds it; the cache mitigates repeats.
+- **Text transits a third party.** OpenRouter's `provider: { data_collection }` routing preference controls
+  retention; shops with strict residency requirements use `kind: gemini`/`openai-compatible` pointed at the
+  provider (or a self-hosted endpoint) directly instead of the gateway. Provider-independence means this is a
+  config change, not a code change.
+- **Loses Gemini's task-type asymmetry** when Gemini is reached via the OpenAI-compatible gateway (both sides
+  embed symmetrically). This is a retrieval-quality cost, **not** a same-space correctness cost — both stored
+  and query vectors take the identical path, so they stay in one space by construction. Shops that want the
+  asymmetry use `kind: gemini`.
+
+## Unresolved questions
+
+- GA Gemini model name — **resolved:** `google/gemini-embedding-2` (via OpenRouter) / `gemini-embedding-2`
+  (direct), 128–3072 dims (recommended 768/1536/3072). Default flips off `-preview` in Phase 2.
+- Gemini `batchEmbedContents` availability — **moot** when going through the OpenAI-compatible gateway (its
+  `input` array batches); still relevant only for the direct `kind: gemini` path.
+- Identity granularity: per-vector-property args vs one graph-level default profile referenced by name.
+- Whether to backfill recorded identity for existing graphs, or treat absent-identity as "unvalidated, legacy"
+  permanently.
+- Default model for the zero-config tier: `google/gemini-embedding-2` vs `openai/text-embedding-3-large`
+  (both 3072-capable) — pick the project default.
--- a/docs/user/clusters/config.md
+++ b/docs/user/clusters/config.md
@ -12,9 +12,9 @@ that ledger, manually remove a held local state lock by exact lock id, and
 catalog writes, **graph creation** (a declared graph that does not exist yet
 is initialized by apply at the derived root), **schema updates** (soft drops
 only), and — behind an explicit, digest-bound **approval** — **graph
-deletion**. It does not perform data-loss schema migrations or start servers:
-a separate `omnigraph-server --cluster <dir>` serves the applied revision on
-its next (re)start.
+deletion**. It does not perform data-loss schema migrations, start servers,
+or run data loads. A server can boot from the applied ledger with
+`omnigraph-server --cluster <config-dir | storage-root>`.

 ## Commands

@ -56,7 +56,7 @@ The exact contract:

 ## Supported `cluster.yaml`

-Stage 3A accepts only this resource subset:
+The current config surface accepts this resource subset:

 ```yaml
 version: 1
@ -67,9 +67,18 @@ state:
  backend: cluster
  lock: true

+providers:
+  embedding:
+    default:
+      kind: openai-compatible
+      base_url: https://openrouter.ai/api/v1
+      model: openai/text-embedding-3-large
+      api_key: ${OPENROUTER_API_KEY}
+
 graphs:
  knowledge:
    schema: knowledge.pg
+    embedding_provider: default
    queries: queries/          # discover every `query <name>` in queries/*.gq

 policies:
@ -98,6 +107,17 @@ updates all of its queries together. Paths are relative to the config
 directory — the cluster is one explicit folder, so no `./` prefixes are
 needed.

+`providers.embedding.<name>` defines a query-time embedding provider profile
+for cluster-served graphs. A graph opts in with `embedding_provider: <name>`;
+bare names normalize to `provider.embedding.<name>`. Supported provider
+`kind` values are `openai-compatible` (default/OpenRouter-compatible),
+`openai` (OpenAI's own host), `gemini`, and `mock`. Real providers require
+`api_key: ${ENV_VAR}`; inline secrets are rejected. The env var is resolved
+only when a `--cluster` server boots, so `cluster validate`, `plan`, and
+`apply` do not need deployment secrets. `mock` is deterministic and does not
+require `api_key`. Vector dimensions stay schema-driven by the target
+`Vector(N)` column, not the provider profile.
+
 `storage:` (optional) is the **storage root URI** for everything the cluster
 stores — the state ledger, lock, content-addressed catalog, recovery
 sidecars, approval artifacts, and the derived graph roots
@ -132,10 +152,12 @@ operation is active.
 - stored-query parsing and query-name matching
 - stored-query type-checking against the desired schema
 - policy `applies_to` graph references
+- embedding provider profiles and graph `embedding_provider` references

-Fields reserved for later phases, such as `pipelines`, `embeddings`, `ui`,
-`aliases`, and `bindings`, fail with a typed diagnostic instead of being
-silently ignored.
+Fields reserved for later phases, such as `pipelines`, top-level
+`embeddings`, `ui`, `aliases`, and `bindings`, fail with a typed diagnostic
+instead of being silently ignored. Under `providers`, only `embedding` is
+supported today; other provider namespaces fail as unsupported config.

 ## Planning

@ -155,9 +177,21 @@ resource is planned as a create. If present, the file must use this shape:
  "applied_revision": {
    "config_digest": "...",
    "resources": {
-      "graph.knowledge": { "digest": "..." },
      "schema.knowledge": { "digest": "..." },
      "query.knowledge.find_experts": { "digest": "..." },
+      "provider.embedding.default": {
+        "digest": "...",
+        "embedding_profile": {
+          "kind": "openai-compatible",
+          "base_url": "https://openrouter.ai/api/v1",
+          "model": "openai/text-embedding-3-large",
+          "api_key": "${OPENROUTER_API_KEY}"
+        }
+      },
+      "graph.knowledge": {
+        "digest": "...",
+        "embedding_provider": "provider.embedding.default"
+      },
      "policy.base": {
        "digest": "...",
        "applies_to": ["cluster", "graph.knowledge"]
--- a/docs/user/reference/constants.md
+++ b/docs/user/reference/constants.md
@ -19,11 +19,13 @@
 | Expand mode override | `OMNIGRAPH_TRAVERSAL_MODE` (`indexed`\|`csr`; unset = cost-based auto) | traversal |
 | Default body limit | `1 MB` | HTTP server |
 | Ingest body limit | `32 MB` | HTTP server |
-| Engine embed model | `gemini-embedding-2-preview` | engine embedding |
-| Compiler embed model | `text-embedding-3-small` | compiler embedding |
-| Embed timeout | `30 000 ms` | both clients |
-| Embed retries | `4` | both clients |
-| Embed retry backoff | `200 ms` | both clients |
+| Default embed provider/model | `openai-compatible` / `openai/text-embedding-3-large` | engine embedding |
+| OpenAI-direct embed model | `text-embedding-3-large` | engine embedding |
+| Gemini-direct embed model | `gemini-embedding-2` | engine embedding |
+| Embed deadline | `OMNIGRAPH_EMBED_DEADLINE_MS=60000` | engine embedding |
+| Embed timeout | `OMNIGRAPH_EMBED_TIMEOUT_MS=30000` | engine embedding |
+| Embed retries | `OMNIGRAPH_EMBED_RETRY_ATTEMPTS=4` | engine embedding |
+| Embed retry backoff | `OMNIGRAPH_EMBED_RETRY_BACKOFF_MS=200` | engine embedding |
 | LANCE memory pool default | `1 GB` (raised in v0.3.0) | runtime |

 **Expand traversal dispatch.** With `OMNIGRAPH_TRAVERSAL_MODE` unset, the engine
--- a/docs/user/schema/index.md
+++ b/docs/user/schema/index.md
@ -45,7 +45,7 @@ Edge bodies only allow `@unique` and `@index`.

 - `@<ident>` or `@<ident>(<literal>)` on any declaration or property.
 - Known annotations:
-  - `@embed` on a Vector property — names the *source* property whose text gets embedded into this vector at ingest.
+  - `@embed("source_property")` on a Vector property — records which String property is the embedding source for query-time `nearest($v, "string")` auto-embedding. It is a catalog annotation; it does **not** populate the vector at ingest (supply vectors in load data, or pre-fill via the offline `omnigraph embed` pipeline). An optional `model="…"` kwarg (`@embed("source_property", model="openai/text-embedding-3-large")`) records the embedding model so a `nearest()` query whose embedder uses a different model is rejected loudly; `model` is the only supported kwarg. See [search/embeddings.md](../search/embeddings.md).
  - `@description("…")`, `@instruction("…")` on query declarations (carried through to clients).
 - Custom annotations are accepted by the parser and surfaced in catalog metadata; unrecognized annotations don't fail compilation.

--- a/docs/user/search/embeddings.md
+++ b/docs/user/search/embeddings.md
@ -1,31 +1,112 @@
 # Embeddings

-OmniGraph has **two** embedding clients with different defaults and purposes.
+OmniGraph embeds text through a **single, provider-independent client** resolved from one
+`EmbeddingConfig { provider, model, base_url, api_key }`. The same resolved config is used by the query-time
+auto-embed of a string in `nearest($v, "string")` and by the offline `omnigraph embed` file pipeline, so
+query vectors and document vectors share one model and one vector space.

-## Compiler-side client — query-time normalization
+## Providers

- Default model: `text-embedding-3-small` (OpenAI-style schema)
- Env: `NANOGRAPH_EMBED_MODEL`, `OPENAI_API_KEY`, `OPENAI_BASE_URL` (default `https://api.openai.com/v1`), `NANOGRAPH_EMBEDDINGS_MOCK`, `NANOGRAPH_EMBED_TIMEOUT_MS=30000`, `NANOGRAPH_EMBED_RETRY_ATTEMPTS=4`, `NANOGRAPH_EMBED_RETRY_BACKOFF_MS=200`
- Methods: `embed_text(input, expected_dim)`, `embed_texts(inputs, expected_dim)`
- Mock mode: deterministic FNV-1a + xorshift64 → L2-normalized vectors
+| `provider` | Wire shape | Use it for |
+|---|---|---|
+| `openai-compatible` (default) | `POST {base}/embeddings`, bearer auth, `{model, input, dimensions}` | **OpenRouter** (the default gateway — one key for many models), OpenAI direct, or a self-hosted endpoint (vLLM / Ollama / LM Studio) |
+| `gemini` | `POST {base}/models/{model}:embedContent`, `x-goog-api-key`, with `RETRIEVAL_QUERY` / `RETRIEVAL_DOCUMENT` task types | Reaching Google's `generativelanguage` API directly |
+| `mock` | none — deterministic offline vectors | Tests and local dev without a key |

-## Engine-side client — runtime ingest
+Vectors are stored L2-normalized as `FixedSizeList(Float32, dim)`; the requested output dimension is driven by
+the target column width and sent as Gemini `outputDimensionality` / OpenAI `dimensions`.

- Model: `gemini-embedding-2-preview`
- Env: `GEMINI_API_KEY`, `OMNIGRAPH_GEMINI_BASE_URL` (default Google generativelanguage v1beta), `OMNIGRAPH_EMBED_TIMEOUT_MS=30000`, `OMNIGRAPH_EMBED_RETRY_ATTEMPTS=4`, `OMNIGRAPH_EMBED_RETRY_BACKOFF_MS=200`, `OMNIGRAPH_EMBEDDINGS_MOCK`
- Two task types: `embed_query_text` (RETRIEVAL_QUERY) and `embed_document_text` (RETRIEVAL_DOCUMENT)
- Exponential backoff with retryable detection (timeouts, 429, 5xx)
+## Configuration (cluster)

-## Schema integration
+Cluster-served graphs can pin their query-time embedder in `cluster.yaml`:

-Mark a Vector property with `@embed("source_text_property")`. At ingest, the engine pulls the source text and writes the embedding into the vector column. Stored as L2-normalized FixedSizeList(Float32, dim).
+```yaml
+providers:
+  embedding:
+    default:
+      kind: openai-compatible
+      base_url: https://openrouter.ai/api/v1
+      model: openai/text-embedding-3-large
+      api_key: ${OPENROUTER_API_KEY}
+
+graphs:
+  knowledge:
+    schema: knowledge.pg
+    embedding_provider: default
+```
+
+`embedding_provider` references `providers.embedding.<name>`; bare names are
+normalized to that typed ref. The server resolves `${ENV_VAR}` only when it
+boots from the applied cluster ledger, so `cluster validate`, `plan`, and
+`apply` do not need provider secrets. Inline API keys are rejected. `mock`
+needs no key. Vector dimensions stay schema-driven by the target `Vector(N)`
+column.
+
+Direct single-graph serving, embedded callers, and the offline
+`omnigraph embed` pipeline use environment configuration unless they inject an
+`EmbeddingConfig` directly.
+
+## Configuration (environment)
+
+| Variable | Meaning |
+|---|---|
+| `OMNIGRAPH_EMBED_PROVIDER` | `openai-compatible` (default, → OpenRouter) \| `openai` (→ OpenAI's own host) \| `gemini` \| `mock` |
+| `OMNIGRAPH_EMBED_BASE_URL` | endpoint base; defaults `https://openrouter.ai/api/v1` (`openai-compatible`/unset), `https://api.openai.com/v1` (`openai`), `https://generativelanguage.googleapis.com/v1beta` (`gemini`) |
+| `OMNIGRAPH_EMBED_MODEL` | model id; defaults `openai/text-embedding-3-large` (OpenRouter), `text-embedding-3-large` (`openai`), `gemini-embedding-2` (`gemini`) |
+| `OPENROUTER_API_KEY` / `OPENAI_API_KEY` | api key for `openai-compatible` (OpenRouter preferred) |
+| `GEMINI_API_KEY` | api key for `gemini` |
+| `OMNIGRAPH_EMBED_DEADLINE_MS` | total wall-clock budget for one embed call across all retries (default `60000`; `0` = unbounded) |
+| `OMNIGRAPH_EMBED_TIMEOUT_MS` | per-request HTTP timeout (default `30000`) |
+| `OMNIGRAPH_EMBED_RETRY_ATTEMPTS` / `OMNIGRAPH_EMBED_RETRY_BACKOFF_MS` | retry policy (defaults `4` / `200`) |
+| `OMNIGRAPH_EMBEDDINGS_MOCK` | set truthy to force the deterministic mock provider |
+
+The default zero-config path is OpenRouter: set `OPENROUTER_API_KEY` and run. Reaching Gemini takes
+`OMNIGRAPH_EMBED_PROVIDER=gemini` plus `GEMINI_API_KEY`.
+
+### Behavior notes
+
+- **Bounded latency.** Each embed call is wrapped in `OMNIGRAPH_EMBED_DEADLINE_MS`, so a degraded
+  provider cannot hang a read for the full retry envelope.
+- **Reuse.** The query path builds the client once per graph handle (on the first `nearest($v, "string")`
+  that needs embedding) and reuses it, keeping the provider connection pool warm. A graph that never embeds
+  needs no provider key.
+- **Observability.** Embed calls emit `tracing` events under `target = "omnigraph::embedding"` (provider,
+  model, dim, attempt, elapsed, outcome).
+
+## `@embed` schema annotation
+
+Mark a Vector property with `@embed("source_text_property")`. This is a **catalog annotation** consumed by the
+query typechecker and linter: it records which String property is the embedding source and lets
+`nearest($v, "string")` auto-embed a query string for comparison against that vector column.
+
+Optionally record the model that produced the stored vectors:
+`@embed("source_text_property", model="openai/text-embedding-3-large")`. When a model is recorded, a
+`nearest($v, "string")` query is **rejected with a typed error** unless the resolved query embedder uses the
+same model — so stored and query vectors are guaranteed same-space instead of silently ranking across spaces.
+To fix a mismatch, set `OMNIGRAPH_EMBED_MODEL` (and the matching provider) to the recorded model, or re-embed.
+The recorded model is the literal string, so `openai/text-embedding-3-large` (via OpenRouter) and
+`text-embedding-3-large` (OpenAI direct) are distinct identities; use the matching string. Changing a recorded
+model is a loud `schema apply` refusal (treat it as a re-embed migration). `@embed` without a model keeps
+working with no validation. `model` is the only supported `@embed` argument; any other is a parse error.
+
+**It does not embed at ingest.** Stored vectors are supplied directly in your load data, or pre-filled by the
+offline `omnigraph embed` pipeline below. (Ingest-time execution of `@embed` is a planned enhancement.)

 ## CLI `omnigraph embed` (offline file pipeline)

-Operates on **JSONL files** (not on a graph). Three modes (mutually exclusive):
+Operates on **JSONL files** (not on a graph), using the same resolved provider config. Three modes (mutually
+exclusive):

 - (default) `fill_missing` — only embed rows whose target field is empty
 - `--reembed-all` — overwrite all
 - `--clean` — strip embeddings

 Inputs are either a single seed manifest YAML or `--input/--output/--spec`. Selectors `--type T`, `--select T:field=value` filter rows. Streams JSONL → JSONL.
+
+## Migration
+
+This release has no backwards-compatibility shim (pre-release). The default provider is now OpenRouter, and
+the legacy `OMNIGRAPH_GEMINI_BASE_URL` is removed. A graph whose vectors were produced with
+`gemini-embedding-2-preview` should either re-embed, or pin the query-time embedder to match by setting
+`OMNIGRAPH_EMBED_PROVIDER=gemini` and `OMNIGRAPH_EMBED_MODEL=gemini-embedding-2-preview` (the stored and query
+vectors must come from the same model to be comparable).