From 74476f7f51ae551232502ac2b0c8c830cfc991f7 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Mon, 15 Jun 2026 21:09:15 +0200 Subject: [PATCH] feat(compiler): @embed model kwarg in grammar/AST/parser (RFC-012 Phase 3) Annotations gain optional comma-separated key=value kwargs. Annotation keeps value (existing consumers unchanged) and adds kwargs: BTreeMap with serde(default, skip_serializing_if) so empty kwargs are omitted and existing schemas' IR JSON/hash stay byte-identical. The parser rejects any @embed kwarg other than model. render_annotations shows kwargs. 3 new parser tests. --- crates/omnigraph-cli/src/output.rs | 16 ++++- .../src/catalog/schema_plan.rs | 1 + crates/omnigraph-compiler/src/schema/ast.rs | 7 +++ .../omnigraph-compiler/src/schema/parser.rs | 41 +++++++++++-- .../src/schema/parser_tests.rs | 60 +++++++++++++++++++ .../omnigraph-compiler/src/schema/schema.pest | 4 +- 6 files changed, 120 insertions(+), 9 deletions(-) diff --git a/crates/omnigraph-cli/src/output.rs b/crates/omnigraph-cli/src/output.rs index 446c6ca..7f2be2d 100644 --- a/crates/omnigraph-cli/src/output.rs +++ b/crates/omnigraph-cli/src/output.rs @@ -696,9 +696,19 @@ pub(crate) fn render_constraint(constraint: &omnigraph_compiler::schema::ast::Co pub(crate) fn render_annotations(annotations: &[omnigraph_compiler::schema::ast::Annotation]) -> String { annotations .iter() - .map(|annotation| match &annotation.value { - Some(value) => format!("@{}({})", annotation.name, value), - None => format!("@{}", annotation.name), + .map(|annotation| { + let mut args: Vec = Vec::new(); + if let Some(value) = &annotation.value { + args.push(value.clone()); + } + for (key, val) in &annotation.kwargs { + args.push(format!("{}={}", key, val)); + } + if args.is_empty() { + format!("@{}", annotation.name) + } else { + format!("@{}({})", annotation.name, args.join(", ")) + } }) .collect::>() .join(", ") diff --git a/crates/omnigraph-compiler/src/catalog/schema_plan.rs b/crates/omnigraph-compiler/src/catalog/schema_plan.rs index a9e26b2..dc9d466 100644 --- a/crates/omnigraph-compiler/src/catalog/schema_plan.rs +++ b/crates/omnigraph-compiler/src/catalog/schema_plan.rs @@ -1137,6 +1137,7 @@ node Person @description("new") { annotations: vec![Annotation { name: "description".to_string(), value: Some("new".to_string()), + kwargs: Default::default(), }], })); } diff --git a/crates/omnigraph-compiler/src/schema/ast.rs b/crates/omnigraph-compiler/src/schema/ast.rs index f8ed18a..9be0e56 100644 --- a/crates/omnigraph-compiler/src/schema/ast.rs +++ b/crates/omnigraph-compiler/src/schema/ast.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use crate::types::PropType; use serde::{Deserialize, Serialize}; @@ -50,6 +52,11 @@ pub struct PropDecl { pub struct Annotation { pub name: String, pub value: Option, + /// Keyword arguments, e.g. `model="…"` on `@embed("source", model="…")`. + /// Empty is skipped in serialization so existing schemas' IR JSON (and + /// hash) stay byte-identical; `BTreeMap` keeps the order deterministic. + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub kwargs: BTreeMap, } /// A typed constraint declared in a node or edge body. diff --git a/crates/omnigraph-compiler/src/schema/parser.rs b/crates/omnigraph-compiler/src/schema/parser.rs index 43e11ed..c5f4355 100644 --- a/crates/omnigraph-compiler/src/schema/parser.rs +++ b/crates/omnigraph-compiler/src/schema/parser.rs @@ -556,12 +556,32 @@ fn parse_type_ref(pair: pest::iterators::Pair) -> Result { fn parse_annotation(pair: pest::iterators::Pair) -> Result { let mut inner = pair.into_inner(); let name = inner.next().unwrap().as_str().to_string(); - let value = inner - .next() - .map(|p| decode_string_literal(p.as_str())) - .transpose()?; + let mut value = None; + let mut kwargs = std::collections::BTreeMap::new(); + if let Some(args) = inner.next() { + // `annotation_args`: one positional arg followed by zero or more + // `key = literal` kwargs (e.g. `@embed("source", model="…")`). + for arg in args.into_inner() { + match arg.as_rule() { + Rule::annotation_arg => { + value = Some(decode_string_literal(arg.as_str())?); + } + Rule::annotation_kwarg => { + let mut kw = arg.into_inner(); + let key = kw.next().unwrap().as_str().to_string(); + let raw = kw.next().unwrap().as_str(); + kwargs.insert(key, decode_string_literal(raw)?); + } + _ => {} + } + } + } - Ok(Annotation { name, value }) + Ok(Annotation { + name, + value, + kwargs, + }) } fn validate_string_annotation( @@ -823,6 +843,17 @@ fn validate_property_annotations( type_name, source_prop ))); } + + // `model` is the only supported kwarg; reject the rest loudly so + // a typo can't be silently ignored (it would never validate). + for key in ann.kwargs.keys() { + if key != "model" { + return Err(NanoError::Parse(format!( + "@embed on {}.{} has unknown argument '{}=' (only 'model' is supported)", + type_name, prop.name, key + ))); + } + } } _ => {} } diff --git a/crates/omnigraph-compiler/src/schema/parser_tests.rs b/crates/omnigraph-compiler/src/schema/parser_tests.rs index 2302cfb..9a2e1ba 100644 --- a/crates/omnigraph-compiler/src/schema/parser_tests.rs +++ b/crates/omnigraph-compiler/src/schema/parser_tests.rs @@ -508,6 +508,66 @@ embedding: Vector(3) @embed(title) } } +#[test] +fn test_parse_embed_annotation_with_model_kwarg() { + let input = r#" +node Doc { +title: String +embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large") +} +"#; + let schema = parse_schema(input).unwrap(); + match &schema.declarations[0] { + SchemaDecl::Node(n) => { + let ann = &n.properties[1].annotations[0]; + assert_eq!(ann.name, "embed"); + assert_eq!(ann.value.as_deref(), Some("title")); + assert_eq!( + ann.kwargs.get("model").map(String::as_str), + Some("openai/text-embedding-3-large") + ); + } + _ => panic!("expected Node"), + } +} + +#[test] +fn test_parse_embed_annotation_without_model_has_empty_kwargs() { + let input = r#" +node Doc { +title: String +embedding: Vector(3) @embed("title") +} +"#; + let schema = parse_schema(input).unwrap(); + match &schema.declarations[0] { + SchemaDecl::Node(n) => { + let ann = &n.properties[1].annotations[0]; + assert!(ann.kwargs.is_empty()); + // Empty kwargs must NOT serialize, so existing schemas' IR JSON (and + // thus the schema hash) stay byte-identical after this field is added. + let json = serde_json::to_string(ann).unwrap(); + assert!(!json.contains("kwargs"), "unexpected kwargs in {json}"); + } + _ => panic!("expected Node"), + } +} + +#[test] +fn test_parse_embed_annotation_rejects_unknown_kwarg() { + let input = r#" +node Doc { +title: String +embedding: Vector(3) @embed("title", provider="openai") +} +"#; + let err = parse_schema(input).unwrap_err(); + assert!( + err.to_string().contains("only 'model' is supported"), + "got: {err}" + ); +} + #[test] fn test_parse_edge_no_body() { let input = "edge WorksAt: Person -> Company\n"; diff --git a/crates/omnigraph-compiler/src/schema/schema.pest b/crates/omnigraph-compiler/src/schema/schema.pest index 395c516..b02724e 100644 --- a/crates/omnigraph-compiler/src/schema/schema.pest +++ b/crates/omnigraph-compiler/src/schema/schema.pest @@ -42,8 +42,10 @@ enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ } base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" } // Annotation rule excludes constraint keywords followed by "(" — those are body_constraints -annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? } +annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_args ~ ")")? } +annotation_args = { annotation_arg ~ ("," ~ annotation_kwarg)* } annotation_arg = { literal | ident } +annotation_kwarg = { ident ~ "=" ~ literal } literal = { string_lit | float_lit | integer | bool_lit }