mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-18 02:24:27 +02:00
feat(compiler): @embed model kwarg in grammar/AST/parser (RFC-012 Phase 3)
Annotations gain optional comma-separated key=value kwargs. Annotation keeps value (existing consumers unchanged) and adds kwargs: BTreeMap with serde(default, skip_serializing_if) so empty kwargs are omitted and existing schemas' IR JSON/hash stay byte-identical. The parser rejects any @embed kwarg other than model. render_annotations shows kwargs. 3 new parser tests.
This commit is contained in:
parent
30377c453b
commit
74476f7f51
6 changed files with 120 additions and 9 deletions
|
|
@ -696,9 +696,19 @@ pub(crate) fn render_constraint(constraint: &omnigraph_compiler::schema::ast::Co
|
|||
pub(crate) fn render_annotations(annotations: &[omnigraph_compiler::schema::ast::Annotation]) -> String {
|
||||
annotations
|
||||
.iter()
|
||||
.map(|annotation| match &annotation.value {
|
||||
Some(value) => format!("@{}({})", annotation.name, value),
|
||||
None => format!("@{}", annotation.name),
|
||||
.map(|annotation| {
|
||||
let mut args: Vec<String> = Vec::new();
|
||||
if let Some(value) = &annotation.value {
|
||||
args.push(value.clone());
|
||||
}
|
||||
for (key, val) in &annotation.kwargs {
|
||||
args.push(format!("{}={}", key, val));
|
||||
}
|
||||
if args.is_empty() {
|
||||
format!("@{}", annotation.name)
|
||||
} else {
|
||||
format!("@{}({})", annotation.name, args.join(", "))
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
|
|
|
|||
|
|
@ -1137,6 +1137,7 @@ node Person @description("new") {
|
|||
annotations: vec![Annotation {
|
||||
name: "description".to_string(),
|
||||
value: Some("new".to_string()),
|
||||
kwargs: Default::default(),
|
||||
}],
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::types::PropType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
|
|
@ -50,6 +52,11 @@ pub struct PropDecl {
|
|||
pub struct Annotation {
|
||||
pub name: String,
|
||||
pub value: Option<String>,
|
||||
/// Keyword arguments, e.g. `model="…"` on `@embed("source", model="…")`.
|
||||
/// Empty is skipped in serialization so existing schemas' IR JSON (and
|
||||
/// hash) stay byte-identical; `BTreeMap` keeps the order deterministic.
|
||||
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
|
||||
pub kwargs: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
/// A typed constraint declared in a node or edge body.
|
||||
|
|
|
|||
|
|
@ -556,12 +556,32 @@ fn parse_type_ref(pair: pest::iterators::Pair<Rule>) -> Result<PropType> {
|
|||
fn parse_annotation(pair: pest::iterators::Pair<Rule>) -> Result<Annotation> {
|
||||
let mut inner = pair.into_inner();
|
||||
let name = inner.next().unwrap().as_str().to_string();
|
||||
let value = inner
|
||||
.next()
|
||||
.map(|p| decode_string_literal(p.as_str()))
|
||||
.transpose()?;
|
||||
let mut value = None;
|
||||
let mut kwargs = std::collections::BTreeMap::new();
|
||||
if let Some(args) = inner.next() {
|
||||
// `annotation_args`: one positional arg followed by zero or more
|
||||
// `key = literal` kwargs (e.g. `@embed("source", model="…")`).
|
||||
for arg in args.into_inner() {
|
||||
match arg.as_rule() {
|
||||
Rule::annotation_arg => {
|
||||
value = Some(decode_string_literal(arg.as_str())?);
|
||||
}
|
||||
Rule::annotation_kwarg => {
|
||||
let mut kw = arg.into_inner();
|
||||
let key = kw.next().unwrap().as_str().to_string();
|
||||
let raw = kw.next().unwrap().as_str();
|
||||
kwargs.insert(key, decode_string_literal(raw)?);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Annotation { name, value })
|
||||
Ok(Annotation {
|
||||
name,
|
||||
value,
|
||||
kwargs,
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_string_annotation(
|
||||
|
|
@ -823,6 +843,17 @@ fn validate_property_annotations(
|
|||
type_name, source_prop
|
||||
)));
|
||||
}
|
||||
|
||||
// `model` is the only supported kwarg; reject the rest loudly so
|
||||
// a typo can't be silently ignored (it would never validate).
|
||||
for key in ann.kwargs.keys() {
|
||||
if key != "model" {
|
||||
return Err(NanoError::Parse(format!(
|
||||
"@embed on {}.{} has unknown argument '{}=' (only 'model' is supported)",
|
||||
type_name, prop.name, key
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -508,6 +508,66 @@ embedding: Vector(3) @embed(title)
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_embed_annotation_with_model_kwarg() {
|
||||
let input = r#"
|
||||
node Doc {
|
||||
title: String
|
||||
embedding: Vector(3) @embed("title", model="openai/text-embedding-3-large")
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
match &schema.declarations[0] {
|
||||
SchemaDecl::Node(n) => {
|
||||
let ann = &n.properties[1].annotations[0];
|
||||
assert_eq!(ann.name, "embed");
|
||||
assert_eq!(ann.value.as_deref(), Some("title"));
|
||||
assert_eq!(
|
||||
ann.kwargs.get("model").map(String::as_str),
|
||||
Some("openai/text-embedding-3-large")
|
||||
);
|
||||
}
|
||||
_ => panic!("expected Node"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_embed_annotation_without_model_has_empty_kwargs() {
|
||||
let input = r#"
|
||||
node Doc {
|
||||
title: String
|
||||
embedding: Vector(3) @embed("title")
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
match &schema.declarations[0] {
|
||||
SchemaDecl::Node(n) => {
|
||||
let ann = &n.properties[1].annotations[0];
|
||||
assert!(ann.kwargs.is_empty());
|
||||
// Empty kwargs must NOT serialize, so existing schemas' IR JSON (and
|
||||
// thus the schema hash) stay byte-identical after this field is added.
|
||||
let json = serde_json::to_string(ann).unwrap();
|
||||
assert!(!json.contains("kwargs"), "unexpected kwargs in {json}");
|
||||
}
|
||||
_ => panic!("expected Node"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_embed_annotation_rejects_unknown_kwarg() {
|
||||
let input = r#"
|
||||
node Doc {
|
||||
title: String
|
||||
embedding: Vector(3) @embed("title", provider="openai")
|
||||
}
|
||||
"#;
|
||||
let err = parse_schema(input).unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("only 'model' is supported"),
|
||||
"got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_edge_no_body() {
|
||||
let input = "edge WorksAt: Person -> Company\n";
|
||||
|
|
|
|||
|
|
@ -42,8 +42,10 @@ enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ }
|
|||
base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }
|
||||
|
||||
// Annotation rule excludes constraint keywords followed by "(" — those are body_constraints
|
||||
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? }
|
||||
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_args ~ ")")? }
|
||||
annotation_args = { annotation_arg ~ ("," ~ annotation_kwarg)* }
|
||||
annotation_arg = { literal | ident }
|
||||
annotation_kwarg = { ident ~ "=" ~ literal }
|
||||
|
||||
literal = { string_lit | float_lit | integer | bool_lit }
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue