feat(compiler): @embed model kwarg in grammar/AST/parser (RFC-012 Phase 3)

Annotations gain optional comma-separated key=value kwargs. Annotation keeps value (existing consumers unchanged) and adds kwargs: BTreeMap with serde(default, skip_serializing_if) so empty kwargs are omitted and existing schemas' IR JSON/hash stay byte-identical. The parser rejects any @embed kwarg other than model. render_annotations shows kwargs. 3 new parser tests.
This commit is contained in:
Ragnor Comerford 2026-06-15 21:09:15 +02:00
parent 30377c453b
commit 74476f7f51
No known key found for this signature in database
6 changed files with 120 additions and 9 deletions

View file

@ -696,9 +696,19 @@ pub(crate) fn render_constraint(constraint: &omnigraph_compiler::schema::ast::Co
pub(crate) fn render_annotations(annotations: &[omnigraph_compiler::schema::ast::Annotation]) -> String {
annotations
.iter()
.map(|annotation| match &annotation.value {
Some(value) => format!("@{}({})", annotation.name, value),
None => format!("@{}", annotation.name),
.map(|annotation| {
let mut args: Vec<String> = Vec::new();
if let Some(value) = &annotation.value {
args.push(value.clone());
}
for (key, val) in &annotation.kwargs {
args.push(format!("{}={}", key, val));
}
if args.is_empty() {
format!("@{}", annotation.name)
} else {
format!("@{}({})", annotation.name, args.join(", "))
}
})
.collect::<Vec<_>>()
.join(", ")

View file

@ -1137,6 +1137,7 @@ node Person @description("new") {
annotations: vec![Annotation {
name: "description".to_string(),
value: Some("new".to_string()),
kwargs: Default::default(),
}],
}));
}

View file

@ -1,3 +1,5 @@
use std::collections::BTreeMap;
use crate::types::PropType;
use serde::{Deserialize, Serialize};
@ -50,6 +52,11 @@ pub struct PropDecl {
pub struct Annotation {
pub name: String,
pub value: Option<String>,
/// Keyword arguments, e.g. `model="…"` on `@embed("source", model="…")`.
/// Empty is skipped in serialization so existing schemas' IR JSON (and
/// hash) stay byte-identical; `BTreeMap` keeps the order deterministic.
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
pub kwargs: BTreeMap<String, String>,
}
/// A typed constraint declared in a node or edge body.

View file

@ -556,12 +556,32 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
fn parse_annotation(pair: pest::iterators::Pair<Rule>) -> Result<Annotation> {
let mut inner = pair.into_inner();
let name = inner.next().unwrap().as_str().to_string();
let value = inner
.next()
.map(|p| decode_string_literal(p.as_str()))
.transpose()?;
let mut value = None;
let mut kwargs = std::collections::BTreeMap::new();
if let Some(args) = inner.next() {
// `annotation_args`: one positional arg followed by zero or more
// `key = literal` kwargs (e.g. `@embed("source", model="…")`).
for arg in args.into_inner() {
match arg.as_rule() {
Rule::annotation_arg => {
value = Some(decode_string_literal(arg.as_str())?);
}
Rule::annotation_kwarg => {
let mut kw = arg.into_inner();
let key = kw.next().unwrap().as_str().to_string();
let raw = kw.next().unwrap().as_str();
kwargs.insert(key, decode_string_literal(raw)?);
}
_ => {}
}
}
}
Ok(Annotation { name, value })
Ok(Annotation {
name,
value,
kwargs,
})
}
fn validate_string_annotation(
@ -823,6 +843,17 @@ fn validate_property_annotations(
type_name, source_prop
)));
}
// `model` is the only supported kwarg; reject the rest loudly so
// a typo can't be silently ignored (it would never validate).
for key in ann.kwargs.keys() {
if key != "model" {
return Err(NanoError::Parse(format!(
"@embed on {}.{} has unknown argument '{}=' (only 'model' is supported)",
type_name, prop.name, key
)));
}
}
}
_ => {}
}

View file

@ -508,6 +508,66 @@ embedding: Vector(3) @embed(title)
}
}
#[test]
fn test_parse_embed_annotation_with_model_kwarg() {
let input = r#"
node Doc {
title: String
embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large")
}
"#;
let schema = parse_schema(input).unwrap();
match &schema.declarations[0] {
SchemaDecl::Node(n) => {
let ann = &n.properties[1].annotations[0];
assert_eq!(ann.name, "embed");
assert_eq!(ann.value.as_deref(), Some("title"));
assert_eq!(
ann.kwargs.get("model").map(String::as_str),
Some("openai/text-embedding-3-large")
);
}
_ => panic!("expected Node"),
}
}
#[test]
fn test_parse_embed_annotation_without_model_has_empty_kwargs() {
let input = r#"
node Doc {
title: String
embedding: Vector(3) @embed("title")
}
"#;
let schema = parse_schema(input).unwrap();
match &schema.declarations[0] {
SchemaDecl::Node(n) => {
let ann = &n.properties[1].annotations[0];
assert!(ann.kwargs.is_empty());
// Empty kwargs must NOT serialize, so existing schemas' IR JSON (and
// thus the schema hash) stay byte-identical after this field is added.
let json = serde_json::to_string(ann).unwrap();
assert!(!json.contains("kwargs"), "unexpected kwargs in {json}");
}
_ => panic!("expected Node"),
}
}
#[test]
fn test_parse_embed_annotation_rejects_unknown_kwarg() {
let input = r#"
node Doc {
title: String
embedding: Vector(3) @embed("title", provider="openai")
}
"#;
let err = parse_schema(input).unwrap_err();
assert!(
err.to_string().contains("only 'model' is supported"),
"got: {err}"
);
}
#[test]
fn test_parse_edge_no_body() {
let input = "edge WorksAt: Person -> Company\n";

View file

@ -42,8 +42,10 @@ enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ }
base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }
// Annotation rule excludes constraint keywords followed by "(" — those are body_constraints
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? }
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_args ~ ")")? }
annotation_args = { annotation_arg ~ ("," ~ annotation_kwarg)* }
annotation_arg = { literal | ident }
annotation_kwarg = { ident ~ "=" ~ literal }
literal = { string_lit | float_lit | integer | bool_lit }