diff --git a/crates/omnigraph-cli/src/embed.rs b/crates/omnigraph-cli/src/embed.rs index b1773f6..a0603b7 100644 --- a/crates/omnigraph-cli/src/embed.rs +++ b/crates/omnigraph-cli/src/embed.rs @@ -83,8 +83,6 @@ impl EmbedMode { #[derive(Debug, Clone, Deserialize)] struct EmbedSpec { - #[serde(default)] - model: String, dimension: usize, types: BTreeMap, } @@ -296,7 +294,14 @@ pub(crate) async fn run_embed_job(job: &EmbedJob) -> Result { cleaned_rows, mode: job.mode.as_str(!job.selectors.is_empty()), dimension: job.spec.dimension, - model: job.spec.model.clone(), + // The embedding model is resolved solely from the provider config; the + // spec carries no model field, so there is no second source of truth to + // silently disagree with the API. Report what was actually used (empty + // for `--clean`, which builds no client). + model: client + .as_ref() + .map(|c| c.config().model.clone()) + .unwrap_or_default(), }) } diff --git a/crates/omnigraph/src/embedding.rs b/crates/omnigraph/src/embedding.rs index 70ac9df..9fbf8c0 100644 --- a/crates/omnigraph/src/embedding.rs +++ b/crates/omnigraph/src/embedding.rs @@ -8,8 +8,10 @@ use tokio::time::sleep; use crate::error::{OmniError, Result}; -const DEFAULT_OPENAI_BASE_URL: &str = "https://openrouter.ai/api/v1"; -const DEFAULT_OPENAI_MODEL: &str = "openai/text-embedding-3-large"; +const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1"; +const DEFAULT_OPENROUTER_MODEL: &str = "openai/text-embedding-3-large"; +const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; +const DEFAULT_OPENAI_MODEL: &str = "text-embedding-3-large"; const DEFAULT_GEMINI_BASE_URL: &str = "https://generativelanguage.googleapis.com/v1beta"; const DEFAULT_GEMINI_MODEL: &str = "gemini-embedding-2"; const DEFAULT_TIMEOUT_MS: u64 = 30_000; @@ -35,24 +37,6 @@ pub enum Provider { Mock, } -impl Provider { - fn default_base_url(self) -> &'static str { - match self { - Provider::OpenAiCompatible => DEFAULT_OPENAI_BASE_URL, - Provider::Gemini => DEFAULT_GEMINI_BASE_URL, - Provider::Mock => "", - } - } - - fn default_model(self) -> &'static str { - match self { - Provider::OpenAiCompatible => DEFAULT_OPENAI_MODEL, - Provider::Gemini => DEFAULT_GEMINI_MODEL, - Provider::Mock => "", - } - } -} - /// Whether the text being embedded is a search query or a stored document. /// Only Gemini distinguishes these (`RETRIEVAL_QUERY` vs `RETRIEVAL_DOCUMENT`); /// OpenAI-compatible providers and Mock produce the identical request for both, @@ -89,24 +73,39 @@ impl EmbeddingConfig { return Ok(Self::mock()); } - let provider = match env_string("OMNIGRAPH_EMBED_PROVIDER").as_deref() { - None | Some("openai-compatible") | Some("openai") => Provider::OpenAiCompatible, - Some("gemini") => Provider::Gemini, - Some("mock") => return Ok(Self::mock()), - Some(other) => { - return Err(OmniError::manifest_internal(format!( - "unknown OMNIGRAPH_EMBED_PROVIDER '{}' (expected openai-compatible|gemini|mock)", - other - ))); - } - }; + // The default base URL and model depend on the provider *alias*, not just + // the wire shape: `openai-compatible` (and the unset default) point at the + // OpenRouter gateway, while `openai` points at OpenAI's own host. + let (provider, default_base, default_model) = + match env_string("OMNIGRAPH_EMBED_PROVIDER").as_deref() { + None | Some("openai-compatible") => ( + Provider::OpenAiCompatible, + DEFAULT_OPENROUTER_BASE_URL, + DEFAULT_OPENROUTER_MODEL, + ), + Some("openai") => ( + Provider::OpenAiCompatible, + DEFAULT_OPENAI_BASE_URL, + DEFAULT_OPENAI_MODEL, + ), + Some("gemini") => { + (Provider::Gemini, DEFAULT_GEMINI_BASE_URL, DEFAULT_GEMINI_MODEL) + } + Some("mock") => return Ok(Self::mock()), + Some(other) => { + return Err(OmniError::manifest_internal(format!( + "unknown OMNIGRAPH_EMBED_PROVIDER '{}' (expected openai-compatible|openai|gemini|mock)", + other + ))); + } + }; let base_url = env_string("OMNIGRAPH_EMBED_BASE_URL") - .unwrap_or_else(|| provider.default_base_url().to_string()) + .unwrap_or_else(|| default_base.to_string()) .trim_end_matches('/') .to_string(); let model = - env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_else(|| provider.default_model().to_string()); + env_string("OMNIGRAPH_EMBED_MODEL").unwrap_or_else(|| default_model.to_string()); let api_key = match provider { Provider::OpenAiCompatible => env_string("OPENROUTER_API_KEY") @@ -827,11 +826,25 @@ mod tests { let _guard = cleared_env(&[("OPENROUTER_API_KEY", Some("sk-test"))]); let config = EmbeddingConfig::from_env().unwrap(); assert_eq!(config.provider, Provider::OpenAiCompatible); - assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL); - assert_eq!(config.model, DEFAULT_OPENAI_MODEL); + assert_eq!(config.base_url, DEFAULT_OPENROUTER_BASE_URL); + assert_eq!(config.model, DEFAULT_OPENROUTER_MODEL); assert_eq!(config.api_key, "sk-test"); } + #[test] + #[serial] + fn from_env_openai_alias_uses_openai_host_not_openrouter() { + let _guard = cleared_env(&[ + ("OMNIGRAPH_EMBED_PROVIDER", Some("openai")), + ("OPENAI_API_KEY", Some("k")), + ]); + let config = EmbeddingConfig::from_env().unwrap(); + assert_eq!(config.provider, Provider::OpenAiCompatible); + assert_eq!(config.base_url, DEFAULT_OPENAI_BASE_URL); // api.openai.com, not OpenRouter + assert_eq!(config.model, DEFAULT_OPENAI_MODEL); // text-embedding-3-large, no openai/ prefix + assert_eq!(config.api_key, "k"); + } + #[test] #[serial] fn from_env_openai_compatible_prefers_openrouter_key() { diff --git a/docs/dev/execution.md b/docs/dev/execution.md index 237a7af..e9ac9eb 100644 --- a/docs/dev/execution.md +++ b/docs/dev/execution.md @@ -177,4 +177,4 @@ For all three modes, a mid-load failure (RI / cardinality violation, validation ## Embeddings during load -If a node type has `@embed` properties, the loader calls the engine embedding client (Gemini, RETRIEVAL_DOCUMENT) per row to populate the vector column. See [embeddings.md](../user/search/embeddings.md). +The loader does **not** embed `@embed` properties at load time. `@embed` is a catalog annotation consumed by query typecheck/lint; vectors are supplied directly in the load data, or pre-filled by the offline `omnigraph embed` pipeline. Query-time `nearest($v, "string")` auto-embeds the query string via the provider-independent embedding client. See [embeddings.md](../user/search/embeddings.md). (Ingest-time `@embed` execution is a planned RFC-012 phase.) diff --git a/docs/user/schema/index.md b/docs/user/schema/index.md index d0fcd1b..526f25f 100644 --- a/docs/user/schema/index.md +++ b/docs/user/schema/index.md @@ -45,7 +45,7 @@ Edge bodies only allow `@unique` and `@index`. - `@` or `@()` on any declaration or property. - Known annotations: - - `@embed` on a Vector property — names the *source* property whose text gets embedded into this vector at ingest. + - `@embed("source_property")` on a Vector property — records which String property is the embedding source for query-time `nearest($v, "string")` auto-embedding. It is a catalog annotation; it does **not** populate the vector at ingest (supply vectors in load data, or pre-fill via the offline `omnigraph embed` pipeline). - `@description("…")`, `@instruction("…")` on query declarations (carried through to clients). - Custom annotations are accepted by the parser and surfaced in catalog metadata; unrecognized annotations don't fail compilation. diff --git a/docs/user/search/embeddings.md b/docs/user/search/embeddings.md index de80831..c31e25f 100644 --- a/docs/user/search/embeddings.md +++ b/docs/user/search/embeddings.md @@ -20,9 +20,9 @@ the target column width and sent as Gemini `outputDimensionality` / OpenAI `dime | Variable | Meaning | |---|---| -| `OMNIGRAPH_EMBED_PROVIDER` | `openai-compatible` (default) \| `gemini` \| `mock` | -| `OMNIGRAPH_EMBED_BASE_URL` | endpoint base; default `https://openrouter.ai/api/v1` (openai-compatible) or `https://generativelanguage.googleapis.com/v1beta` (gemini) | -| `OMNIGRAPH_EMBED_MODEL` | model id; default `openai/text-embedding-3-large` (openai-compatible) or `gemini-embedding-2` (gemini) | +| `OMNIGRAPH_EMBED_PROVIDER` | `openai-compatible` (default, → OpenRouter) \| `openai` (→ OpenAI's own host) \| `gemini` \| `mock` | +| `OMNIGRAPH_EMBED_BASE_URL` | endpoint base; defaults `https://openrouter.ai/api/v1` (`openai-compatible`/unset), `https://api.openai.com/v1` (`openai`), `https://generativelanguage.googleapis.com/v1beta` (`gemini`) | +| `OMNIGRAPH_EMBED_MODEL` | model id; defaults `openai/text-embedding-3-large` (OpenRouter), `text-embedding-3-large` (`openai`), `gemini-embedding-2` (`gemini`) | | `OPENROUTER_API_KEY` / `OPENAI_API_KEY` | api key for `openai-compatible` (OpenRouter preferred) | | `GEMINI_API_KEY` | api key for `gemini` | | `OMNIGRAPH_EMBED_QUERY_DEADLINE_MS` | total wall-clock budget for one embed call across all retries (default `60000`; `0` = unbounded) |