feat(compiler): record @embed model in the catalog (RFC-012 Phase 3)

NodeType.embed_sources becomes HashMap<String, EmbedSource { source, model }>, populated from the @embed source arg + model kwarg; it round-trips through build_catalog_from_ir (the engine's IR-load path), so the recorded model reaches query execution. The migration planner already rejects any @embed change as UnsupportedChange, so changing a recorded model is a loud schema-apply refusal for free. New catalog test.
This commit is contained in:
Ragnor Comerford 2026-06-15 21:09:34 +02:00
parent 74476f7f51
commit 1a06150c33
No known key found for this signature in database
3 changed files with 53 additions and 13 deletions

View file

@ -26,6 +26,15 @@ pub struct InterfaceType {
pub properties: HashMap<String, PropType>,
}
/// The `@embed` binding for a vector property: its source text property and,
/// optionally, the embedding model recorded by `@embed("source", model="…")`.
/// The model is what the query-time same-space check validates against.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EmbedSource {
pub source: String,
pub model: Option<String>,
}
#[derive(Debug, Clone)]
pub struct NodeType {
pub name: String,
@ -42,8 +51,8 @@ pub struct NodeType {
pub range_constraints: Vec<RangeConstraint>,
/// Regex check constraints
pub check_constraints: Vec<CheckConstraint>,
/// Maps @embed target property -> source text property
pub embed_sources: HashMap<String, String>,
/// Maps @embed target property -> its source text property + recorded model.
pub embed_sources: HashMap<String, EmbedSource>,
pub blob_properties: HashSet<String>,
pub arrow_schema: SchemaRef,
}
@ -156,14 +165,18 @@ pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
blob_properties.insert(prop.name.clone());
}
// Extract @embed from property annotations (stays as annotation)
if let Some(source_prop) = prop
.annotations
.iter()
.find(|ann| ann.name == "embed")
.and_then(|ann| ann.value.clone())
{
embed_sources.insert(prop.name.clone(), source_prop);
// Extract @embed: the source text property (positional) and the
// optional recorded model (the `model` kwarg).
if let Some(ann) = prop.annotations.iter().find(|ann| ann.name == "embed") {
if let Some(source) = ann.value.clone() {
embed_sources.insert(
prop.name.clone(),
EmbedSource {
source,
model: ann.kwargs.get("model").cloned(),
},
);
}
}
}

View file

@ -31,6 +31,33 @@ fn test_build_catalog() {
assert!(catalog.node_types.contains_key("Company"));
}
#[test]
fn test_embed_source_records_model_kwarg() {
let schema = parse_schema(
r#"
node Doc {
title: String
embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large")
plain: Vector(3) @embed("title")
}
"#,
)
.unwrap();
let catalog = build_catalog(&schema).unwrap();
let doc = catalog.node_types.get("Doc").unwrap();
let embedding = doc.embed_sources.get("embedding").unwrap();
assert_eq!(embedding.source, "title");
assert_eq!(
embedding.model.as_deref(),
Some("openai/text-embedding-3-large")
);
let plain = doc.embed_sources.get("plain").unwrap();
assert_eq!(plain.source, "title");
assert_eq!(plain.model, None);
}
#[test]
fn test_edge_lookup() {
let schema = parse_schema(test_schema()).unwrap();

View file

@ -261,13 +261,13 @@ fn typecheck_mutation(catalog: &Catalog, mutation: &Mutation, params: &[Param])
continue;
}
if let Some(source_prop) = node_type.embed_sources.get(prop_name) {
if assigned_props.contains(source_prop.as_str()) {
if let Some(embed) = node_type.embed_sources.get(prop_name) {
if assigned_props.contains(embed.source.as_str()) {
continue;
}
return Err(NanoError::Type(format!(
"T12: insert for `{}` must provide non-nullable property `{}` or @embed source `{}`",
insert.type_name, prop_name, source_prop
insert.type_name, prop_name, embed.source
)));
}