From 1a06150c33ab45b1fc334c2d0fb7211e9fdac3dc Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Mon, 15 Jun 2026 21:09:34 +0200 Subject: [PATCH] feat(compiler): record @embed model in the catalog (RFC-012 Phase 3) NodeType.embed_sources becomes HashMap, populated from the @embed source arg + model kwarg; it round-trips through build_catalog_from_ir (the engine's IR-load path), so the recorded model reaches query execution. The migration planner already rejects any @embed change as UnsupportedChange, so changing a recorded model is a loud schema-apply refusal for free. New catalog test. --- crates/omnigraph-compiler/src/catalog/mod.rs | 33 +++++++++++++------ .../omnigraph-compiler/src/catalog/tests.rs | 27 +++++++++++++++ .../omnigraph-compiler/src/query/typecheck.rs | 6 ++-- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/crates/omnigraph-compiler/src/catalog/mod.rs b/crates/omnigraph-compiler/src/catalog/mod.rs index 0bb536d..93f8d89 100644 --- a/crates/omnigraph-compiler/src/catalog/mod.rs +++ b/crates/omnigraph-compiler/src/catalog/mod.rs @@ -26,6 +26,15 @@ pub struct InterfaceType { pub properties: HashMap, } +/// The `@embed` binding for a vector property: its source text property and, +/// optionally, the embedding model recorded by `@embed("source", model="…")`. +/// The model is what the query-time same-space check validates against. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EmbedSource { + pub source: String, + pub model: Option, +} + #[derive(Debug, Clone)] pub struct NodeType { pub name: String, @@ -42,8 +51,8 @@ pub struct NodeType { pub range_constraints: Vec, /// Regex check constraints pub check_constraints: Vec, - /// Maps @embed target property -> source text property - pub embed_sources: HashMap, + /// Maps @embed target property -> its source text property + recorded model. + pub embed_sources: HashMap, pub blob_properties: HashSet, pub arrow_schema: SchemaRef, } @@ -156,14 +165,18 @@ pub fn build_catalog(schema: &SchemaFile) -> Result { if matches!(prop.prop_type.scalar, ScalarType::Blob) { blob_properties.insert(prop.name.clone()); } - // Extract @embed from property annotations (stays as annotation) - if let Some(source_prop) = prop - .annotations - .iter() - .find(|ann| ann.name == "embed") - .and_then(|ann| ann.value.clone()) - { - embed_sources.insert(prop.name.clone(), source_prop); + // Extract @embed: the source text property (positional) and the + // optional recorded model (the `model` kwarg). + if let Some(ann) = prop.annotations.iter().find(|ann| ann.name == "embed") { + if let Some(source) = ann.value.clone() { + embed_sources.insert( + prop.name.clone(), + EmbedSource { + source, + model: ann.kwargs.get("model").cloned(), + }, + ); + } } } diff --git a/crates/omnigraph-compiler/src/catalog/tests.rs b/crates/omnigraph-compiler/src/catalog/tests.rs index 883b4a9..4ab3956 100644 --- a/crates/omnigraph-compiler/src/catalog/tests.rs +++ b/crates/omnigraph-compiler/src/catalog/tests.rs @@ -31,6 +31,33 @@ fn test_build_catalog() { assert!(catalog.node_types.contains_key("Company")); } +#[test] +fn test_embed_source_records_model_kwarg() { + let schema = parse_schema( + r#" +node Doc { +title: String +embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large") +plain: Vector(3) @embed("title") +} +"#, + ) + .unwrap(); + let catalog = build_catalog(&schema).unwrap(); + let doc = catalog.node_types.get("Doc").unwrap(); + + let embedding = doc.embed_sources.get("embedding").unwrap(); + assert_eq!(embedding.source, "title"); + assert_eq!( + embedding.model.as_deref(), + Some("openai/text-embedding-3-large") + ); + + let plain = doc.embed_sources.get("plain").unwrap(); + assert_eq!(plain.source, "title"); + assert_eq!(plain.model, None); +} + #[test] fn test_edge_lookup() { let schema = parse_schema(test_schema()).unwrap(); diff --git a/crates/omnigraph-compiler/src/query/typecheck.rs b/crates/omnigraph-compiler/src/query/typecheck.rs index 658f083..b2c235a 100644 --- a/crates/omnigraph-compiler/src/query/typecheck.rs +++ b/crates/omnigraph-compiler/src/query/typecheck.rs @@ -261,13 +261,13 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param]) continue; } - if let Some(source_prop) = node_type.embed_sources.get(prop_name) { - if assigned_props.contains(source_prop.as_str()) { + if let Some(embed) = node_type.embed_sources.get(prop_name) { + if assigned_props.contains(embed.source.as_str()) { continue; } return Err(NanoError::Type(format!( "T12: insert for `{}` must provide non-nullable property `{}` or @embed source `{}`", - insert.type_name, prop_name, source_prop + insert.type_name, prop_name, embed.source ))); }