Initial public Omnigraph repository

This commit is contained in:
andrew 2026-04-10 20:49:41 +03:00
commit 338289656a
110 changed files with 60747 additions and 0 deletions

View file

@ -0,0 +1,594 @@
pub mod schema_ir;
pub mod schema_plan;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use crate::error::{NanoError, Result};
use crate::schema::ast::{Cardinality, Constraint, ConstraintBound, SchemaDecl, SchemaFile};
use crate::types::{PropType, ScalarType};
#[derive(Debug, Clone)]
pub struct Catalog {
pub node_types: HashMap<String, NodeType>,
pub edge_types: HashMap<String, EdgeType>,
/// Maps normalized lowercase edge name -> EdgeType key (e.g. "knows" -> "Knows")
pub edge_name_index: HashMap<String, String>,
/// Interface declarations (for Phase 2 polymorphic queries)
pub interfaces: HashMap<String, InterfaceType>,
}
#[derive(Debug, Clone)]
pub struct InterfaceType {
pub name: String,
pub properties: HashMap<String, PropType>,
}
#[derive(Debug, Clone)]
pub struct NodeType {
pub name: String,
/// Interface names this type implements
pub implements: Vec<String>,
pub properties: HashMap<String, PropType>,
/// Key property names (from `@key` or `@key(name)`). Usually 0 or 1 element.
pub key: Option<Vec<String>>,
/// Uniqueness constraints (each entry is a list of column names)
pub unique_constraints: Vec<Vec<String>>,
/// Index declarations (each entry is a list of column names)
pub indices: Vec<Vec<String>>,
/// Value range constraints
pub range_constraints: Vec<RangeConstraint>,
/// Regex check constraints
pub check_constraints: Vec<CheckConstraint>,
/// Maps @embed target property -> source text property
pub embed_sources: HashMap<String, String>,
pub blob_properties: HashSet<String>,
pub arrow_schema: SchemaRef,
}
impl NodeType {
/// Backward-compatible accessor: returns the first (and typically only) key property name.
pub fn key_property(&self) -> Option<&str> {
self.key
.as_ref()
.and_then(|v| v.first())
.map(|s| s.as_str())
}
}
#[derive(Debug, Clone)]
pub struct RangeConstraint {
pub property: String,
pub min: Option<LiteralValue>,
pub max: Option<LiteralValue>,
}
#[derive(Debug, Clone)]
pub enum LiteralValue {
Integer(i64),
Float(f64),
}
#[derive(Debug, Clone)]
pub struct CheckConstraint {
pub property: String,
pub pattern: String,
}
#[derive(Debug, Clone)]
pub struct EdgeType {
pub name: String,
pub from_type: String,
pub to_type: String,
pub cardinality: Cardinality,
pub properties: HashMap<String, PropType>,
/// Uniqueness constraints on edge columns (e.g. `@unique(src, dst)`)
pub unique_constraints: Vec<Vec<String>>,
/// Index declarations on edge properties
pub indices: Vec<Vec<String>>,
pub blob_properties: HashSet<String>,
pub arrow_schema: SchemaRef,
}
impl Catalog {
pub fn lookup_edge_by_name(&self, name: &str) -> Option<&EdgeType> {
if let Some(et) = self.edge_types.get(name) {
return Some(et);
}
if let Some(key) = self.edge_name_index.get(&normalize_edge_name(name)) {
return self.edge_types.get(key);
}
None
}
}
fn normalize_edge_name(name: &str) -> String {
name.to_lowercase()
}
fn bound_to_literal(b: &ConstraintBound) -> LiteralValue {
match b {
ConstraintBound::Integer(n) => LiteralValue::Integer(*n),
ConstraintBound::Float(f) => LiteralValue::Float(*f),
}
}
pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
let mut node_types = HashMap::new();
let mut edge_types = HashMap::new();
let mut edge_name_index = HashMap::new();
let mut interfaces = HashMap::new();
// Pass 0: collect interfaces
for decl in &schema.declarations {
if let SchemaDecl::Interface(iface) = decl {
let mut properties = HashMap::new();
for prop in &iface.properties {
properties.insert(prop.name.clone(), prop.prop_type.clone());
}
interfaces.insert(
iface.name.clone(),
InterfaceType {
name: iface.name.clone(),
properties,
},
);
}
}
// Pass 1: collect node types
for decl in &schema.declarations {
if let SchemaDecl::Node(node) = decl {
if node_types.contains_key(&node.name) {
return Err(NanoError::Catalog(format!(
"duplicate node type: {}",
node.name
)));
}
let mut properties = HashMap::new();
let mut embed_sources = HashMap::new();
let mut blob_properties = HashSet::new();
for prop in &node.properties {
properties.insert(prop.name.clone(), prop.prop_type.clone());
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
blob_properties.insert(prop.name.clone());
}
// Extract @embed from property annotations (stays as annotation)
if let Some(source_prop) = prop
.annotations
.iter()
.find(|ann| ann.name == "embed")
.and_then(|ann| ann.value.clone())
{
embed_sources.insert(prop.name.clone(), source_prop);
}
}
// Extract constraints from the typed Constraint enum
let mut key: Option<Vec<String>> = None;
let mut unique_constraints = Vec::new();
let mut indices = Vec::new();
let mut range_constraints = Vec::new();
let mut check_constraints = Vec::new();
for constraint in &node.constraints {
match constraint {
Constraint::Key(cols) => {
key = Some(cols.clone());
// @key implies index on key columns
indices.push(cols.clone());
}
Constraint::Unique(cols) => {
unique_constraints.push(cols.clone());
}
Constraint::Index(cols) => {
indices.push(cols.clone());
}
Constraint::Range { property, min, max } => {
range_constraints.push(RangeConstraint {
property: property.clone(),
min: min.as_ref().map(bound_to_literal),
max: max.as_ref().map(bound_to_literal),
});
}
Constraint::Check { property, pattern } => {
check_constraints.push(CheckConstraint {
property: property.clone(),
pattern: pattern.clone(),
});
}
}
}
// Build Arrow schema: id: Utf8 + all properties
let mut fields = vec![Field::new("id", DataType::Utf8, false)];
for prop in &node.properties {
fields.push(Field::new(
&prop.name,
prop.prop_type.to_arrow(),
prop.prop_type.nullable,
));
}
let arrow_schema = Arc::new(Schema::new(fields));
node_types.insert(
node.name.clone(),
NodeType {
name: node.name.clone(),
implements: node.implements.clone(),
properties,
key,
unique_constraints,
indices,
range_constraints,
check_constraints,
embed_sources,
blob_properties,
arrow_schema,
},
);
}
}
// Pass 2: collect edge types, validate endpoints
for decl in &schema.declarations {
if let SchemaDecl::Edge(edge) = decl {
if edge_types.contains_key(&edge.name) {
return Err(NanoError::Catalog(format!(
"duplicate edge type: {}",
edge.name
)));
}
if !node_types.contains_key(&edge.from_type) {
return Err(NanoError::Catalog(format!(
"edge {} references unknown source type: {}",
edge.name, edge.from_type
)));
}
if !node_types.contains_key(&edge.to_type) {
return Err(NanoError::Catalog(format!(
"edge {} references unknown target type: {}",
edge.name, edge.to_type
)));
}
let mut properties = HashMap::new();
let mut blob_properties = HashSet::new();
let mut fields = vec![
Field::new("id", DataType::Utf8, false),
Field::new("src", DataType::Utf8, false),
Field::new("dst", DataType::Utf8, false),
];
for prop in &edge.properties {
properties.insert(prop.name.clone(), prop.prop_type.clone());
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
blob_properties.insert(prop.name.clone());
}
fields.push(Field::new(
&prop.name,
prop.prop_type.to_arrow(),
prop.prop_type.nullable,
));
}
// Extract edge constraints
let mut unique_constraints = Vec::new();
let mut edge_indices = Vec::new();
for constraint in &edge.constraints {
match constraint {
Constraint::Unique(cols) => unique_constraints.push(cols.clone()),
Constraint::Index(cols) => edge_indices.push(cols.clone()),
_ => {} // Key/Range/Check validated at parse time to not appear on edges
}
}
let normalized_name = normalize_edge_name(&edge.name);
if let Some(existing) = edge_name_index.get(&normalized_name)
&& existing != &edge.name
{
return Err(NanoError::Catalog(format!(
"edge name collision after case folding: '{}' conflicts with '{}'",
edge.name, existing
)));
}
edge_name_index.insert(normalized_name, edge.name.clone());
edge_types.insert(
edge.name.clone(),
EdgeType {
name: edge.name.clone(),
from_type: edge.from_type.clone(),
to_type: edge.to_type.clone(),
cardinality: edge.cardinality.clone(),
properties,
unique_constraints,
indices: edge_indices,
blob_properties,
arrow_schema: Arc::new(Schema::new(fields)),
},
);
}
}
Ok(Catalog {
node_types,
edge_types,
edge_name_index,
interfaces,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::ast::{EdgeDecl, NodeDecl};
use crate::schema::parser::parse_schema;
use crate::types::PropType;
fn test_schema() -> &'static str {
r#"
node Person {
name: String
age: I32?
}
node Company {
name: String
}
edge Knows: Person -> Person {
since: Date?
}
edge WorksAt: Person -> Company {
title: String?
}
"#
}
#[test]
fn test_build_catalog() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert_eq!(catalog.node_types.len(), 2);
assert_eq!(catalog.edge_types.len(), 2);
assert!(catalog.node_types.contains_key("Person"));
assert!(catalog.node_types.contains_key("Company"));
}
#[test]
fn test_edge_lookup() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
let edge = catalog.lookup_edge_by_name("knows").unwrap();
assert_eq!(edge.from_type, "Person");
assert_eq!(edge.to_type, "Person");
let upper = catalog.lookup_edge_by_name("KNOWS").unwrap();
assert_eq!(upper.name, "Knows");
}
#[test]
fn test_node_arrow_schema() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
let person = &catalog.node_types["Person"];
assert_eq!(person.arrow_schema.fields().len(), 3); // id, name, age
}
#[test]
fn test_duplicate_node_error() {
let input = r#"
node Person { name: String }
node Person { age: I32 }
"#;
let schema = parse_schema(input).unwrap();
assert!(build_catalog(&schema).is_err());
}
#[test]
fn test_bad_edge_endpoint() {
let input = r#"
node Person { name: String }
edge Knows: Person -> Alien
"#;
let schema = parse_schema(input).unwrap();
assert!(build_catalog(&schema).is_err());
}
#[test]
fn test_id_fields_are_utf8() {
let schema = parse_schema(test_schema()).unwrap();
let catalog = build_catalog(&schema).unwrap();
let person = &catalog.node_types["Person"];
assert_eq!(
person
.arrow_schema
.field_with_name("id")
.unwrap()
.data_type(),
&DataType::Utf8
);
let knows = &catalog.edge_types["Knows"];
assert_eq!(
knows
.arrow_schema
.field_with_name("id")
.unwrap()
.data_type(),
&DataType::Utf8
);
assert_eq!(
knows
.arrow_schema
.field_with_name("src")
.unwrap()
.data_type(),
&DataType::Utf8
);
assert_eq!(
knows
.arrow_schema
.field_with_name("dst")
.unwrap()
.data_type(),
&DataType::Utf8
);
}
#[test]
fn test_key_property_tracking() {
let input = r#"
node Signal {
slug: String @key
title: String
}
node Person {
name: String
}
edge Emits: Person -> Signal
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert_eq!(catalog.node_types["Signal"].key_property(), Some("slug"));
assert_eq!(catalog.node_types["Person"].key_property(), None);
}
#[test]
fn test_edge_lookup_handles_non_ascii_leading_character() {
let schema = SchemaFile {
declarations: vec![
SchemaDecl::Node(NodeDecl {
name: "Person".to_string(),
annotations: vec![],
implements: vec![],
properties: vec![crate::schema::ast::PropDecl {
name: "name".to_string(),
prop_type: PropType::scalar(ScalarType::String, false),
annotations: vec![],
}],
constraints: vec![],
}),
SchemaDecl::Edge(EdgeDecl {
name: "Édges".to_string(),
from_type: "Person".to_string(),
to_type: "Person".to_string(),
cardinality: Default::default(),
annotations: vec![],
properties: vec![],
constraints: vec![],
}),
],
};
let catalog = build_catalog(&schema).unwrap();
assert!(catalog.lookup_edge_by_name("édges").is_some());
}
#[test]
fn test_edge_lookup_rejects_case_fold_collisions() {
let input = r#"
node Person { name: String }
edge Knows: Person -> Person
edge KNOWS: Person -> Person
"#;
let schema = parse_schema(input).unwrap();
let err = build_catalog(&schema).unwrap_err();
assert!(err.to_string().contains("case folding"));
}
#[test]
fn test_catalog_composite_unique() {
let input = r#"
node Person {
first: String
last: String
@unique(first, last)
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let person = &catalog.node_types["Person"];
assert!(
person
.unique_constraints
.contains(&vec!["first".to_string(), "last".to_string()])
);
}
#[test]
fn test_catalog_composite_index() {
let input = r#"
node Event {
category: String
date: Date
@index(category, date)
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let event = &catalog.node_types["Event"];
assert!(
event
.indices
.contains(&vec!["category".to_string(), "date".to_string()])
);
}
#[test]
fn test_catalog_edge_cardinality() {
let input = r#"
node Person { name: String }
node Company { name: String }
edge WorksAt: Person -> Company @card(0..1)
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let edge = &catalog.edge_types["WorksAt"];
assert_eq!(edge.cardinality.min, 0);
assert_eq!(edge.cardinality.max, Some(1));
}
#[test]
fn test_catalog_interfaces_stored() {
let input = r#"
interface Named {
name: String
}
node Person implements Named {
age: I32?
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert!(catalog.interfaces.contains_key("Named"));
assert!(catalog.interfaces["Named"].properties.contains_key("name"));
}
#[test]
fn test_catalog_node_implements() {
let input = r#"
interface Named {
name: String
}
node Person implements Named {
age: I32?
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
assert_eq!(catalog.node_types["Person"].implements, vec!["Named"]);
}
#[test]
fn test_key_implies_index() {
let input = r#"
node Signal {
slug: String @key
title: String
}
"#;
let schema = parse_schema(input).unwrap();
let catalog = build_catalog(&schema).unwrap();
let signal = &catalog.node_types["Signal"];
assert!(signal.indices.contains(&vec!["slug".to_string()]));
}
}

View file

@ -0,0 +1,393 @@
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::catalog::{Catalog, build_catalog};
use crate::error::{NanoError, Result};
use crate::schema::ast::{Annotation, Cardinality, Constraint, PropDecl, SchemaDecl, SchemaFile};
use crate::types::PropType;
const SCHEMA_IR_VERSION: u32 = 1;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SchemaIR {
pub ir_version: u32,
pub interfaces: Vec<InterfaceIR>,
pub nodes: Vec<NodeIR>,
pub edges: Vec<EdgeIR>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InterfaceIR {
pub name: String,
pub type_id: u32,
pub properties: Vec<PropertyIR>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NodeIR {
pub name: String,
pub type_id: u32,
pub annotations: Vec<Annotation>,
pub implements: Vec<String>,
pub properties: Vec<PropertyIR>,
pub constraints: Vec<Constraint>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EdgeIR {
pub name: String,
pub type_id: u32,
pub from_type: String,
pub to_type: String,
pub cardinality: Cardinality,
pub annotations: Vec<Annotation>,
pub properties: Vec<PropertyIR>,
pub constraints: Vec<Constraint>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PropertyIR {
pub name: String,
pub prop_id: u32,
pub prop_type: PropType,
pub annotations: Vec<Annotation>,
}
pub fn build_schema_ir(schema: &SchemaFile) -> Result<SchemaIR> {
let mut seen_type_ids = HashMap::<u32, String>::new();
let mut interfaces = Vec::new();
let mut nodes = Vec::new();
let mut edges = Vec::new();
for decl in &schema.declarations {
match decl {
SchemaDecl::Interface(interface) => {
let type_id = stable_type_id("interface", &interface.name);
check_type_id_collision(&mut seen_type_ids, type_id, &interface.name)?;
interfaces.push(InterfaceIR {
name: interface.name.clone(),
type_id,
properties: canonical_properties(
"interface",
&interface.name,
&interface.properties,
)?,
});
}
SchemaDecl::Node(node) => {
let type_id = stable_type_id("node", &node.name);
check_type_id_collision(&mut seen_type_ids, type_id, &node.name)?;
nodes.push(NodeIR {
name: node.name.clone(),
type_id,
annotations: canonical_annotations(&node.annotations),
implements: canonical_strings(&node.implements),
properties: canonical_properties("node", &node.name, &node.properties)?,
constraints: canonical_constraints(&node.constraints),
});
}
SchemaDecl::Edge(edge) => {
let type_id = stable_type_id("edge", &edge.name);
check_type_id_collision(&mut seen_type_ids, type_id, &edge.name)?;
edges.push(EdgeIR {
name: edge.name.clone(),
type_id,
from_type: edge.from_type.clone(),
to_type: edge.to_type.clone(),
cardinality: edge.cardinality.clone(),
annotations: canonical_annotations(&edge.annotations),
properties: canonical_properties("edge", &edge.name, &edge.properties)?,
constraints: canonical_constraints(&edge.constraints),
});
}
}
}
interfaces.sort_by(|a, b| a.name.cmp(&b.name));
nodes.sort_by(|a, b| a.name.cmp(&b.name));
edges.sort_by(|a, b| a.name.cmp(&b.name));
Ok(SchemaIR {
ir_version: SCHEMA_IR_VERSION,
interfaces,
nodes,
edges,
})
}
pub fn build_catalog_from_ir(ir: &SchemaIR) -> Result<Catalog> {
if ir.ir_version != SCHEMA_IR_VERSION {
return Err(NanoError::Catalog(format!(
"unsupported schema ir_version {} (expected {})",
ir.ir_version, SCHEMA_IR_VERSION
)));
}
let schema = SchemaFile {
declarations: ir
.interfaces
.iter()
.map(|interface| {
SchemaDecl::Interface(crate::schema::ast::InterfaceDecl {
name: interface.name.clone(),
properties: interface
.properties
.iter()
.map(property_decl_from_ir)
.collect(),
})
})
.chain(ir.nodes.iter().map(|node| {
SchemaDecl::Node(crate::schema::ast::NodeDecl {
name: node.name.clone(),
annotations: node.annotations.clone(),
implements: node.implements.clone(),
properties: node.properties.iter().map(property_decl_from_ir).collect(),
constraints: node.constraints.clone(),
})
}))
.chain(ir.edges.iter().map(|edge| {
SchemaDecl::Edge(crate::schema::ast::EdgeDecl {
name: edge.name.clone(),
from_type: edge.from_type.clone(),
to_type: edge.to_type.clone(),
cardinality: edge.cardinality.clone(),
annotations: edge.annotations.clone(),
properties: edge.properties.iter().map(property_decl_from_ir).collect(),
constraints: edge.constraints.clone(),
})
}))
.collect(),
};
build_catalog(&schema)
}
pub fn schema_ir_json(ir: &SchemaIR) -> Result<String> {
serde_json::to_string(ir)
.map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
}
pub fn schema_ir_pretty_json(ir: &SchemaIR) -> Result<String> {
serde_json::to_string_pretty(ir)
.map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
}
pub fn schema_ir_hash(ir: &SchemaIR) -> Result<String> {
let json = schema_ir_json(ir)?;
let mut hasher = Sha256::new();
hasher.update(json.as_bytes());
Ok(format!("sha256:{:x}", hasher.finalize()))
}
fn property_decl_from_ir(property: &PropertyIR) -> PropDecl {
PropDecl {
name: property.name.clone(),
prop_type: property.prop_type.clone(),
annotations: property.annotations.clone(),
}
}
fn canonical_strings(values: &[String]) -> Vec<String> {
let mut values = values.to_vec();
values.sort();
values.dedup();
values
}
fn canonical_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
let mut annotations = annotations.to_vec();
annotations.sort_by(|left, right| {
left.name
.cmp(&right.name)
.then_with(|| left.value.cmp(&right.value))
});
annotations
}
fn canonical_prop_type(prop_type: &PropType) -> PropType {
let mut normalized = prop_type.clone();
if let Some(values) = &mut normalized.enum_values {
values.sort();
values.dedup();
}
normalized
}
fn canonical_properties(
kind: &str,
owner_name: &str,
properties: &[PropDecl],
) -> Result<Vec<PropertyIR>> {
let mut seen_prop_ids = HashMap::<u32, String>::new();
let owner_key = format!("{}:{}", kind, owner_name);
let mut canonical = properties
.iter()
.map(|property| {
let prop_id = stable_prop_id(&owner_key, &property.name);
if let Some(previous) = seen_prop_ids.insert(prop_id, property.name.clone()) {
return Err(NanoError::Catalog(format!(
"property id collision on {}: '{}' and '{}' both hash to {}",
owner_name, previous, property.name, prop_id
)));
}
Ok(PropertyIR {
name: property.name.clone(),
prop_id,
prop_type: canonical_prop_type(&property.prop_type),
annotations: canonical_annotations(&property.annotations),
})
})
.collect::<Result<Vec<_>>>()?;
canonical.sort_by(|a, b| a.name.cmp(&b.name));
Ok(canonical)
}
fn canonical_constraints(constraints: &[Constraint]) -> Vec<Constraint> {
let mut constraints = constraints
.iter()
.cloned()
.map(normalize_constraint)
.collect::<Vec<_>>();
constraints.sort_by_key(constraint_sort_key);
constraints
}
fn normalize_constraint(constraint: Constraint) -> Constraint {
match constraint {
Constraint::Key(mut columns) => {
columns.sort();
Constraint::Key(columns)
}
Constraint::Unique(mut columns) => {
columns.sort();
Constraint::Unique(columns)
}
Constraint::Index(mut columns) => {
columns.sort();
Constraint::Index(columns)
}
other => other,
}
}
fn constraint_sort_key(constraint: &Constraint) -> String {
match constraint {
Constraint::Key(columns) => format!("key:{}", columns.join(",")),
Constraint::Unique(columns) => format!("unique:{}", columns.join(",")),
Constraint::Index(columns) => format!("index:{}", columns.join(",")),
Constraint::Range { property, min, max } => {
format!("range:{}:{:?}:{:?}", property, min, max)
}
Constraint::Check { property, pattern } => format!("check:{}:{}", property, pattern),
}
}
fn stable_type_id(kind: &str, name: &str) -> u32 {
fnv1a_u32(&format!("{}:{}", kind, name))
}
fn stable_prop_id(owner: &str, name: &str) -> u32 {
fnv1a_u32(&format!("{}:{}", owner, name))
}
fn fnv1a_u32(value: &str) -> u32 {
let mut hash: u32 = 2_166_136_261;
for byte in value.bytes() {
hash ^= u32::from(byte);
hash = hash.wrapping_mul(16_777_619);
}
if hash == 0 { 1 } else { hash }
}
fn check_type_id_collision(
seen_type_ids: &mut HashMap<u32, String>,
type_id: u32,
name: &str,
) -> Result<()> {
if let Some(previous) = seen_type_ids.insert(type_id, name.to_string()) {
return Err(NanoError::Catalog(format!(
"type id collision: '{}' and '{}' both hash to {}",
previous, name, type_id
)));
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::catalog::build_catalog;
use crate::schema::parser::parse_schema;
#[test]
fn schema_ir_hash_is_stable_across_source_ordering_noise() {
let schema_a = parse_schema(
r#"
node Person {
age: I32?
name: String @key
}
edge Knows: Person -> Person {
since: Date?
}
"#,
)
.unwrap();
let schema_b = parse_schema(
r#"
edge Knows: Person -> Person {
since: Date?
}
node Person {
name: String @key
age: I32?
}
"#,
)
.unwrap();
let ir_a = build_schema_ir(&schema_a).unwrap();
let ir_b = build_schema_ir(&schema_b).unwrap();
assert_eq!(ir_a, ir_b);
assert_eq!(
schema_ir_hash(&ir_a).unwrap(),
schema_ir_hash(&ir_b).unwrap()
);
}
#[test]
fn build_catalog_from_ir_round_trips_core_catalog_fields() {
let schema = parse_schema(
r#"
node Person @description("person") {
name: String @key
age: I32? @description("age")
}
edge Knows: Person -> Person @instruction("friendship") {
since: Date?
}
"#,
)
.unwrap();
let direct = build_catalog(&schema).unwrap();
let ir = build_schema_ir(&schema).unwrap();
let rebuilt = build_catalog_from_ir(&ir).unwrap();
assert_eq!(direct.node_types.len(), rebuilt.node_types.len());
assert_eq!(direct.edge_types.len(), rebuilt.edge_types.len());
assert_eq!(
direct.node_types["Person"].key_property(),
rebuilt.node_types["Person"].key_property()
);
assert_eq!(
direct.edge_types["Knows"].cardinality,
rebuilt.edge_types["Knows"].cardinality
);
}
}

View file

@ -0,0 +1,895 @@
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use serde::{Deserialize, Serialize};
use crate::error::Result;
use crate::schema::ast::{Annotation, Constraint};
use crate::types::PropType;
use super::schema_ir::{EdgeIR, InterfaceIR, NodeIR, PropertyIR, SchemaIR};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SchemaTypeKind {
Interface,
Node,
Edge,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SchemaMigrationPlan {
pub supported: bool,
pub steps: Vec<SchemaMigrationStep>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum SchemaMigrationStep {
AddType {
type_kind: SchemaTypeKind,
name: String,
},
RenameType {
type_kind: SchemaTypeKind,
from: String,
to: String,
},
AddProperty {
type_kind: SchemaTypeKind,
type_name: String,
property_name: String,
property_type: PropType,
},
RenameProperty {
type_kind: SchemaTypeKind,
type_name: String,
from: String,
to: String,
},
AddConstraint {
type_kind: SchemaTypeKind,
type_name: String,
constraint: Constraint,
},
UpdateTypeMetadata {
type_kind: SchemaTypeKind,
name: String,
annotations: Vec<Annotation>,
},
UpdatePropertyMetadata {
type_kind: SchemaTypeKind,
type_name: String,
property_name: String,
annotations: Vec<Annotation>,
},
UnsupportedChange {
entity: String,
reason: String,
},
}
pub fn plan_schema_migration(
accepted: &SchemaIR,
desired: &SchemaIR,
) -> Result<SchemaMigrationPlan> {
let mut steps = Vec::new();
let interface_renames = plan_interfaces(&accepted.interfaces, &desired.interfaces, &mut steps);
let node_renames = plan_nodes(
&accepted.nodes,
&desired.nodes,
&interface_renames,
&mut steps,
);
plan_edges(&accepted.edges, &desired.edges, &node_renames, &mut steps);
Ok(SchemaMigrationPlan {
supported: !steps
.iter()
.any(|step| matches!(step, SchemaMigrationStep::UnsupportedChange { .. })),
steps,
})
}
fn plan_interfaces(
accepted: &[InterfaceIR],
desired: &[InterfaceIR],
steps: &mut Vec<SchemaMigrationStep>,
) -> HashMap<String, String> {
let accepted_by_name = accepted
.iter()
.map(|interface| (interface.name.as_str(), interface))
.collect::<HashMap<_, _>>();
let mut consumed = HashSet::new();
for interface in desired {
if let Some(existing) = accepted_by_name.get(interface.name.as_str()) {
consumed.insert(existing.name.clone());
let _property_renames = plan_properties(
SchemaTypeKind::Interface,
&interface.name,
&existing.properties,
&interface.properties,
steps,
);
continue;
}
steps.push(SchemaMigrationStep::AddType {
type_kind: SchemaTypeKind::Interface,
name: interface.name.clone(),
});
}
for leftover in accepted
.iter()
.filter(|interface| !consumed.contains(&interface.name))
{
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("interface:{}", leftover.name),
reason: format!(
"removing interface '{}' is not supported in schema migration v1",
leftover.name
),
});
}
HashMap::new()
}
fn plan_nodes(
accepted: &[NodeIR],
desired: &[NodeIR],
interface_renames: &HashMap<String, String>,
steps: &mut Vec<SchemaMigrationStep>,
) -> HashMap<String, String> {
let accepted_by_name = accepted
.iter()
.map(|node| (node.name.as_str(), node))
.collect::<HashMap<_, _>>();
let mut consumed = HashSet::new();
let mut renames = HashMap::new();
for node in desired {
let rename_from = rename_from_value(&node.annotations);
let matched = accepted_by_name
.get(node.name.as_str())
.copied()
.or_else(|| {
rename_from.and_then(|from| {
accepted_by_name
.get(from)
.copied()
.filter(|candidate| candidate.name != node.name)
})
});
let Some(existing) = matched else {
if let Some(from) = rename_from {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("node:{}", node.name),
reason: format!(
"node '{}' declares @rename_from(\"{}\") but no accepted node with that name exists",
node.name, from
),
});
} else {
steps.push(SchemaMigrationStep::AddType {
type_kind: SchemaTypeKind::Node,
name: node.name.clone(),
});
}
continue;
};
consumed.insert(existing.name.clone());
if existing.name != node.name {
renames.insert(existing.name.clone(), node.name.clone());
steps.push(SchemaMigrationStep::RenameType {
type_kind: SchemaTypeKind::Node,
from: existing.name.clone(),
to: node.name.clone(),
});
}
if normalize_strings(&existing.implements, interface_renames)
!= normalize_strings(&node.implements, &HashMap::new())
{
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("node:{}", node.name),
reason: format!(
"changing implemented interfaces on node '{}' is not supported in schema migration v1",
node.name
),
});
}
plan_type_metadata(
SchemaTypeKind::Node,
&node.name,
&existing.annotations,
&node.annotations,
steps,
);
let property_renames = plan_properties(
SchemaTypeKind::Node,
&node.name,
&existing.properties,
&node.properties,
steps,
);
plan_constraints(
SchemaTypeKind::Node,
&node.name,
&existing.constraints,
&node.constraints,
&property_renames,
steps,
);
}
for leftover in accepted
.iter()
.filter(|node| !consumed.contains(&node.name))
{
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("node:{}", leftover.name),
reason: format!(
"removing node type '{}' is not supported in schema migration v1",
leftover.name
),
});
}
renames
}
fn plan_edges(
accepted: &[EdgeIR],
desired: &[EdgeIR],
node_renames: &HashMap<String, String>,
steps: &mut Vec<SchemaMigrationStep>,
) {
let accepted_by_name = accepted
.iter()
.map(|edge| (edge.name.as_str(), edge))
.collect::<HashMap<_, _>>();
let mut consumed = HashSet::new();
for edge in desired {
let rename_from = rename_from_value(&edge.annotations);
let matched = accepted_by_name
.get(edge.name.as_str())
.copied()
.or_else(|| {
rename_from.and_then(|from| {
accepted_by_name
.get(from)
.copied()
.filter(|candidate| candidate.name != edge.name)
})
});
let Some(existing) = matched else {
if let Some(from) = rename_from {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("edge:{}", edge.name),
reason: format!(
"edge '{}' declares @rename_from(\"{}\") but no accepted edge with that name exists",
edge.name, from
),
});
} else {
steps.push(SchemaMigrationStep::AddType {
type_kind: SchemaTypeKind::Edge,
name: edge.name.clone(),
});
}
continue;
};
consumed.insert(existing.name.clone());
if existing.name != edge.name {
steps.push(SchemaMigrationStep::RenameType {
type_kind: SchemaTypeKind::Edge,
from: existing.name.clone(),
to: edge.name.clone(),
});
}
let normalized_from = normalize_type_ref(&existing.from_type, node_renames);
let normalized_to = normalize_type_ref(&existing.to_type, node_renames);
if normalized_from != edge.from_type || normalized_to != edge.to_type {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("edge:{}", edge.name),
reason: format!(
"changing edge endpoints on '{}' is not supported in schema migration v1",
edge.name
),
});
}
if existing.cardinality != edge.cardinality {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("edge:{}", edge.name),
reason: format!(
"changing cardinality on edge '{}' is not supported in schema migration v1",
edge.name
),
});
}
plan_type_metadata(
SchemaTypeKind::Edge,
&edge.name,
&existing.annotations,
&edge.annotations,
steps,
);
let property_renames = plan_properties(
SchemaTypeKind::Edge,
&edge.name,
&existing.properties,
&edge.properties,
steps,
);
plan_constraints(
SchemaTypeKind::Edge,
&edge.name,
&existing.constraints,
&edge.constraints,
&property_renames,
steps,
);
}
for leftover in accepted
.iter()
.filter(|edge| !consumed.contains(&edge.name))
{
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("edge:{}", leftover.name),
reason: format!(
"removing edge type '{}' is not supported in schema migration v1",
leftover.name
),
});
}
}
fn plan_properties(
type_kind: SchemaTypeKind,
type_name: &str,
accepted: &[PropertyIR],
desired: &[PropertyIR],
steps: &mut Vec<SchemaMigrationStep>,
) -> HashMap<String, String> {
let accepted_by_name = accepted
.iter()
.map(|property| (property.name.as_str(), property))
.collect::<HashMap<_, _>>();
let mut consumed = HashSet::new();
let mut renames = HashMap::new();
for property in desired {
let rename_from = rename_from_value(&property.annotations);
let matched = accepted_by_name
.get(property.name.as_str())
.copied()
.or_else(|| {
rename_from.and_then(|from| {
accepted_by_name
.get(from)
.copied()
.filter(|candidate| candidate.name != property.name)
})
});
let Some(existing) = matched else {
if let Some(from) = rename_from {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!(
"{}:{}.{}",
schema_type_kind_key(type_kind),
type_name,
property.name
),
reason: format!(
"property '{}.{}' declares @rename_from(\"{}\") but no accepted property with that name exists",
type_name, property.name, from
),
});
} else if property.prop_type.nullable {
steps.push(SchemaMigrationStep::AddProperty {
type_kind,
type_name: type_name.to_string(),
property_name: property.name.clone(),
property_type: property.prop_type.clone(),
});
} else {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!(
"{}:{}.{}",
schema_type_kind_key(type_kind),
type_name,
property.name
),
reason: format!(
"adding required property '{}.{}' requires a backfill and is not supported in schema migration v1",
type_name, property.name
),
});
}
continue;
};
consumed.insert(existing.name.clone());
if existing.name != property.name {
renames.insert(existing.name.clone(), property.name.clone());
steps.push(SchemaMigrationStep::RenameProperty {
type_kind,
type_name: type_name.to_string(),
from: existing.name.clone(),
to: property.name.clone(),
});
}
if existing.prop_type != property.prop_type {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!(
"{}:{}.{}",
schema_type_kind_key(type_kind),
type_name,
property.name
),
reason: format!(
"changing property type for '{}.{}' is not supported in schema migration v1",
type_name, property.name
),
});
}
plan_property_metadata(
type_kind,
type_name,
&property.name,
&existing.annotations,
&property.annotations,
steps,
);
}
for leftover in accepted
.iter()
.filter(|property| !consumed.contains(&property.name))
{
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!(
"{}:{}.{}",
schema_type_kind_key(type_kind),
type_name,
leftover.name
),
reason: format!(
"removing property '{}.{}' is not supported in schema migration v1",
type_name, leftover.name
),
});
}
renames
}
fn plan_constraints(
type_kind: SchemaTypeKind,
type_name: &str,
accepted: &[Constraint],
desired: &[Constraint],
property_renames: &HashMap<String, String>,
steps: &mut Vec<SchemaMigrationStep>,
) {
let accepted = accepted
.iter()
.cloned()
.map(|constraint| rename_constraint_properties(constraint, property_renames))
.collect::<Vec<_>>();
let desired_map = desired
.iter()
.cloned()
.map(|constraint| (constraint_key(&constraint), constraint))
.collect::<BTreeMap<_, _>>();
let accepted_map = accepted
.into_iter()
.map(|constraint| (constraint_key(&constraint), constraint))
.collect::<BTreeMap<_, _>>();
let removed = accepted_map
.keys()
.filter(|key| !desired_map.contains_key(*key))
.cloned()
.collect::<Vec<_>>();
if !removed.is_empty() {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("{}:{}", schema_type_kind_key(type_kind), type_name),
reason: format!(
"removing constraints from '{}' is not supported in schema migration v1",
type_name
),
});
}
for (key, constraint) in desired_map {
if accepted_map.contains_key(&key) {
continue;
}
match constraint {
Constraint::Index(_) => steps.push(SchemaMigrationStep::AddConstraint {
type_kind,
type_name: type_name.to_string(),
constraint,
}),
_ => steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("{}:{}", schema_type_kind_key(type_kind), type_name),
reason: format!(
"adding constraint '{}' to '{}' is not supported in schema migration v1",
key, type_name
),
}),
}
}
}
fn plan_type_metadata(
type_kind: SchemaTypeKind,
name: &str,
accepted: &[Annotation],
desired: &[Annotation],
steps: &mut Vec<SchemaMigrationStep>,
) {
match annotation_change_kind(accepted, desired) {
AnnotationChangeKind::None => {}
AnnotationChangeKind::MetadataOnly(metadata) => {
steps.push(SchemaMigrationStep::UpdateTypeMetadata {
type_kind,
name: name.to_string(),
annotations: metadata,
});
}
AnnotationChangeKind::Unsupported(reason) => {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!("{}:{}", schema_type_kind_key(type_kind), name),
reason,
});
}
}
}
fn plan_property_metadata(
type_kind: SchemaTypeKind,
type_name: &str,
property_name: &str,
accepted: &[Annotation],
desired: &[Annotation],
steps: &mut Vec<SchemaMigrationStep>,
) {
match annotation_change_kind(accepted, desired) {
AnnotationChangeKind::None => {}
AnnotationChangeKind::MetadataOnly(metadata) => {
steps.push(SchemaMigrationStep::UpdatePropertyMetadata {
type_kind,
type_name: type_name.to_string(),
property_name: property_name.to_string(),
annotations: metadata,
});
}
AnnotationChangeKind::Unsupported(reason) => {
steps.push(SchemaMigrationStep::UnsupportedChange {
entity: format!(
"{}:{}.{}",
schema_type_kind_key(type_kind),
type_name,
property_name
),
reason,
});
}
}
}
enum AnnotationChangeKind {
None,
MetadataOnly(Vec<Annotation>),
Unsupported(String),
}
fn annotation_change_kind(accepted: &[Annotation], desired: &[Annotation]) -> AnnotationChangeKind {
let accepted_non_metadata = strip_metadata_annotations(accepted);
let desired_non_metadata = strip_metadata_annotations(desired);
if accepted_non_metadata != desired_non_metadata {
return AnnotationChangeKind::Unsupported(
"changing annotations beyond @description/@instruction is not supported in schema migration v1"
.to_string(),
);
}
let accepted_metadata = metadata_annotations(accepted);
let desired_metadata = metadata_annotations(desired);
if accepted_metadata == desired_metadata {
AnnotationChangeKind::None
} else {
AnnotationChangeKind::MetadataOnly(desired_metadata)
}
}
fn strip_metadata_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
annotations
.iter()
.filter(|annotation| {
!matches!(
annotation.name.as_str(),
"description" | "instruction" | "rename_from" | "key" | "unique" | "index"
)
})
.cloned()
.collect()
}
fn metadata_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
annotations
.iter()
.filter(|annotation| matches!(annotation.name.as_str(), "description" | "instruction"))
.cloned()
.collect()
}
fn normalize_strings(values: &[String], renames: &HashMap<String, String>) -> BTreeSet<String> {
values
.iter()
.map(|value| normalize_type_ref(value, renames))
.collect()
}
fn normalize_type_ref(value: &str, renames: &HashMap<String, String>) -> String {
renames
.get(value)
.cloned()
.unwrap_or_else(|| value.to_string())
}
fn rename_constraint_properties(
constraint: Constraint,
property_renames: &HashMap<String, String>,
) -> Constraint {
match constraint {
Constraint::Key(columns) => {
Constraint::Key(rename_constraint_columns(columns, property_renames))
}
Constraint::Unique(columns) => {
Constraint::Unique(rename_constraint_columns(columns, property_renames))
}
Constraint::Index(columns) => {
Constraint::Index(rename_constraint_columns(columns, property_renames))
}
Constraint::Range { property, min, max } => Constraint::Range {
property: normalize_property_ref(&property, property_renames),
min,
max,
},
Constraint::Check { property, pattern } => Constraint::Check {
property: normalize_property_ref(&property, property_renames),
pattern,
},
}
}
fn rename_constraint_columns(
columns: Vec<String>,
property_renames: &HashMap<String, String>,
) -> Vec<String> {
let mut columns = columns
.into_iter()
.map(|column| normalize_property_ref(&column, property_renames))
.collect::<Vec<_>>();
columns.sort();
columns
}
fn normalize_property_ref(value: &str, renames: &HashMap<String, String>) -> String {
renames
.get(value)
.cloned()
.unwrap_or_else(|| value.to_string())
}
fn constraint_key(constraint: &Constraint) -> String {
match constraint {
Constraint::Key(columns) => format!("key:{}", columns.join(",")),
Constraint::Unique(columns) => format!("unique:{}", columns.join(",")),
Constraint::Index(columns) => format!("index:{}", columns.join(",")),
Constraint::Range { property, min, max } => {
format!("range:{}:{:?}:{:?}", property, min, max)
}
Constraint::Check { property, pattern } => format!("check:{}:{}", property, pattern),
}
}
fn rename_from_value(annotations: &[Annotation]) -> Option<&str> {
annotations
.iter()
.find(|annotation| annotation.name == "rename_from")
.and_then(|annotation| annotation.value.as_deref())
}
fn schema_type_kind_key(kind: SchemaTypeKind) -> &'static str {
match kind {
SchemaTypeKind::Interface => "interface",
SchemaTypeKind::Node => "node",
SchemaTypeKind::Edge => "edge",
}
}
#[cfg(test)]
mod tests {
use crate::catalog::schema_ir::build_schema_ir;
use crate::schema::parser::parse_schema;
use super::SchemaMigrationStep::{
AddConstraint, AddProperty, RenameProperty, RenameType, UnsupportedChange,
UpdateTypeMetadata,
};
use super::*;
#[test]
fn plan_supports_additive_nullable_property_and_index() {
let accepted = build_schema_ir(
&parse_schema(
r#"
node Person {
name: String @key
age: I32?
}
"#,
)
.unwrap(),
)
.unwrap();
let desired = build_schema_ir(
&parse_schema(
r#"
node Person {
name: String @key
age: I32? @index
nickname: String?
}
"#,
)
.unwrap(),
)
.unwrap();
let plan = plan_schema_migration(&accepted, &desired).unwrap();
assert!(plan.supported);
assert!(plan.steps.contains(&AddProperty {
type_kind: SchemaTypeKind::Node,
type_name: "Person".to_string(),
property_name: "nickname".to_string(),
property_type: PropType::scalar(crate::types::ScalarType::String, true),
}));
assert!(plan.steps.contains(&AddConstraint {
type_kind: SchemaTypeKind::Node,
type_name: "Person".to_string(),
constraint: Constraint::Index(vec!["age".to_string()]),
}));
}
#[test]
fn plan_supports_explicit_type_and_property_rename() {
let accepted = build_schema_ir(
&parse_schema(
r#"
node User {
name: String @key
}
"#,
)
.unwrap(),
)
.unwrap();
let desired = build_schema_ir(
&parse_schema(
r#"
node Account @rename_from("User") {
full_name: String @key @rename_from("name")
}
"#,
)
.unwrap(),
)
.unwrap();
let plan = plan_schema_migration(&accepted, &desired).unwrap();
assert!(plan.supported);
assert!(plan.steps.contains(&RenameType {
type_kind: SchemaTypeKind::Node,
from: "User".to_string(),
to: "Account".to_string(),
}));
assert!(plan.steps.contains(&RenameProperty {
type_kind: SchemaTypeKind::Node,
type_name: "Account".to_string(),
from: "name".to_string(),
to: "full_name".to_string(),
}));
}
#[test]
fn plan_rejects_required_property_addition() {
let accepted = build_schema_ir(
&parse_schema(
r#"
node Person {
name: String @key
}
"#,
)
.unwrap(),
)
.unwrap();
let desired = build_schema_ir(
&parse_schema(
r#"
node Person {
name: String @key
age: I32
}
"#,
)
.unwrap(),
)
.unwrap();
let plan = plan_schema_migration(&accepted, &desired).unwrap();
assert!(!plan.supported);
assert!(plan.steps.iter().any(|step| matches!(
step,
UnsupportedChange { entity, reason }
if entity.contains("Person.age")
&& reason.contains("adding required property")
)));
}
#[test]
fn plan_supports_metadata_only_annotation_changes() {
let accepted = build_schema_ir(
&parse_schema(
r#"
node Person @description("old") {
name: String @key
}
"#,
)
.unwrap(),
)
.unwrap();
let desired = build_schema_ir(
&parse_schema(
r#"
node Person @description("new") {
name: String @key
}
"#,
)
.unwrap(),
)
.unwrap();
let plan = plan_schema_migration(&accepted, &desired).unwrap();
assert!(plan.supported);
assert!(plan.steps.contains(&UpdateTypeMetadata {
type_kind: SchemaTypeKind::Node,
name: "Person".to_string(),
annotations: vec![Annotation {
name: "description".to_string(),
value: Some("new".to_string()),
}],
}));
}
}

View file

@ -0,0 +1,379 @@
#![allow(dead_code)]
use std::time::Duration;
use reqwest::Client;
use serde::Deserialize;
use tokio::time::sleep;
use crate::error::{NanoError, Result};
const DEFAULT_EMBED_MODEL: &str = "text-embedding-3-small";
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
const DEFAULT_TIMEOUT_MS: u64 = 30_000;
const DEFAULT_RETRY_ATTEMPTS: usize = 4;
const DEFAULT_RETRY_BACKOFF_MS: u64 = 200;
#[derive(Clone)]
enum EmbeddingTransport {
Mock,
OpenAi {
api_key: String,
base_url: String,
http: Client,
},
}
#[derive(Clone)]
pub(crate) struct EmbeddingClient {
model: String,
retry_attempts: usize,
retry_backoff_ms: u64,
transport: EmbeddingTransport,
}
struct EmbedCallError {
message: String,
retryable: bool,
}
#[derive(Debug, Deserialize)]
struct OpenAiEmbeddingResponse {
data: Vec<OpenAiEmbeddingDatum>,
}
#[derive(Debug, Deserialize)]
struct OpenAiEmbeddingDatum {
index: usize,
embedding: Vec<f32>,
}
#[derive(Debug, Deserialize)]
struct OpenAiErrorEnvelope {
error: OpenAiErrorBody,
}
#[derive(Debug, Deserialize)]
struct OpenAiErrorBody {
message: String,
}
impl EmbeddingClient {
pub(crate) fn from_env() -> Result<Self> {
let model = std::env::var("NANOGRAPH_EMBED_MODEL")
.ok()
.map(|v| v.trim().to_string())
.filter(|v| !v.is_empty())
.unwrap_or_else(|| DEFAULT_EMBED_MODEL.to_string());
let retry_attempts =
parse_env_usize("NANOGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS);
let retry_backoff_ms =
parse_env_u64("NANOGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS);
if env_flag("NANOGRAPH_EMBEDDINGS_MOCK") {
return Ok(Self {
model,
retry_attempts,
retry_backoff_ms,
transport: EmbeddingTransport::Mock,
});
}
let api_key = std::env::var("OPENAI_API_KEY")
.ok()
.map(|v| v.trim().to_string())
.filter(|v| !v.is_empty())
.ok_or_else(|| {
NanoError::Execution(
"OPENAI_API_KEY is required when an embedding call is needed".to_string(),
)
})?;
let base_url = std::env::var("OPENAI_BASE_URL")
.ok()
.map(|v| v.trim_end_matches('/').to_string())
.filter(|v| !v.is_empty())
.unwrap_or_else(|| DEFAULT_OPENAI_BASE_URL.to_string());
let timeout_ms = parse_env_u64("NANOGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS);
let http = Client::builder()
.timeout(Duration::from_millis(timeout_ms))
.build()
.map_err(|e| {
NanoError::Execution(format!("failed to initialize HTTP client: {}", e))
})?;
Ok(Self {
model,
retry_attempts,
retry_backoff_ms,
transport: EmbeddingTransport::OpenAi {
api_key,
base_url,
http,
},
})
}
#[cfg(test)]
pub(crate) fn mock_for_tests() -> Self {
Self {
model: DEFAULT_EMBED_MODEL.to_string(),
retry_attempts: DEFAULT_RETRY_ATTEMPTS,
retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS,
transport: EmbeddingTransport::Mock,
}
}
pub(crate) fn model(&self) -> &str {
&self.model
}
pub(crate) async fn embed_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
let mut vectors = self.embed_texts(&[input.to_string()], expected_dim).await?;
vectors.pop().ok_or_else(|| {
NanoError::Execution("embedding provider returned no vector".to_string())
})
}
pub(crate) async fn embed_texts(
&self,
inputs: &[String],
expected_dim: usize,
) -> Result<Vec<Vec<f32>>> {
if expected_dim == 0 {
return Err(NanoError::Execution(
"embedding dimension must be greater than zero".to_string(),
));
}
if inputs.is_empty() {
return Ok(Vec::new());
}
match &self.transport {
EmbeddingTransport::Mock => Ok(inputs
.iter()
.map(|input| mock_embedding(input, expected_dim))
.collect()),
EmbeddingTransport::OpenAi { .. } => {
self.embed_texts_openai_with_retry(inputs, expected_dim)
.await
}
}
}
async fn embed_texts_openai_with_retry(
&self,
inputs: &[String],
expected_dim: usize,
) -> Result<Vec<Vec<f32>>> {
let max_attempt = self.retry_attempts.max(1);
let mut attempt = 0usize;
loop {
attempt += 1;
match self.embed_texts_openai_once(inputs, expected_dim).await {
Ok(vectors) => return Ok(vectors),
Err(err) => {
if !err.retryable || attempt >= max_attempt {
return Err(NanoError::Execution(err.message));
}
let shift = (attempt - 1).min(10) as u32;
let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift);
sleep(Duration::from_millis(delay)).await;
}
}
}
}
async fn embed_texts_openai_once(
&self,
inputs: &[String],
expected_dim: usize,
) -> std::result::Result<Vec<Vec<f32>>, EmbedCallError> {
let (api_key, base_url, http) = match &self.transport {
EmbeddingTransport::OpenAi {
api_key,
base_url,
http,
} => (api_key, base_url, http),
EmbeddingTransport::Mock => unreachable!("mock transport should not call OpenAI"),
};
let request = serde_json::json!({
"model": self.model,
"input": inputs,
"dimensions": expected_dim,
});
let url = format!("{}/embeddings", base_url);
let response = http
.post(&url)
.bearer_auth(api_key)
.json(&request)
.send()
.await;
let response = match response {
Ok(resp) => resp,
Err(err) => {
let retryable = err.is_timeout() || err.is_connect() || err.is_request();
return Err(EmbedCallError {
message: format!("embedding request failed: {}", err),
retryable,
});
}
};
let status = response.status();
let body = match response.text().await {
Ok(body) => body,
Err(err) => {
return Err(EmbedCallError {
message: format!(
"embedding response read failed (status {}): {}",
status, err
),
retryable: status.is_server_error() || status.as_u16() == 429,
});
}
};
if !status.is_success() {
let message = parse_openai_error_message(&body).unwrap_or_else(|| body.clone());
return Err(EmbedCallError {
message: format!(
"embedding request failed with status {}: {}",
status, message
),
retryable: status.is_server_error() || status.as_u16() == 429,
});
}
let mut parsed: OpenAiEmbeddingResponse =
serde_json::from_str(&body).map_err(|err| EmbedCallError {
message: format!("embedding response decode failed: {}", err),
retryable: false,
})?;
if parsed.data.len() != inputs.len() {
return Err(EmbedCallError {
message: format!(
"embedding response size mismatch: expected {}, got {}",
inputs.len(),
parsed.data.len()
),
retryable: false,
});
}
parsed.data.sort_by_key(|item| item.index);
let mut vectors = Vec::with_capacity(parsed.data.len());
for (idx, item) in parsed.data.into_iter().enumerate() {
if item.index != idx {
return Err(EmbedCallError {
message: format!(
"embedding response index mismatch at position {}: got {}",
idx, item.index
),
retryable: false,
});
}
if item.embedding.len() != expected_dim {
return Err(EmbedCallError {
message: format!(
"embedding dimension mismatch: expected {}, got {}",
expected_dim,
item.embedding.len()
),
retryable: false,
});
}
vectors.push(item.embedding);
}
Ok(vectors)
}
}
fn parse_openai_error_message(body: &str) -> Option<String> {
serde_json::from_str::<OpenAiErrorEnvelope>(body)
.ok()
.map(|e| e.error.message)
.filter(|msg| !msg.trim().is_empty())
}
fn parse_env_usize(name: &str, default: usize) -> usize {
std::env::var(name)
.ok()
.and_then(|v| v.parse::<usize>().ok())
.filter(|v| *v > 0)
.unwrap_or(default)
}
fn parse_env_u64(name: &str, default: u64) -> u64 {
std::env::var(name)
.ok()
.and_then(|v| v.parse::<u64>().ok())
.filter(|v| *v > 0)
.unwrap_or(default)
}
fn env_flag(name: &str) -> bool {
std::env::var(name)
.ok()
.map(|v| {
let s = v.trim().to_ascii_lowercase();
s == "1" || s == "true" || s == "yes" || s == "on"
})
.unwrap_or(false)
}
fn mock_embedding(input: &str, dim: usize) -> Vec<f32> {
let mut seed = fnv1a64(input.as_bytes());
let mut out = Vec::with_capacity(dim);
for _ in 0..dim {
seed = xorshift64(seed);
let ratio = (seed as f64 / u64::MAX as f64) as f32;
out.push((ratio * 2.0) - 1.0);
}
let norm = out
.iter()
.map(|v| (*v as f64) * (*v as f64))
.sum::<f64>()
.sqrt() as f32;
if norm > f32::EPSILON {
for value in &mut out {
*value /= norm;
}
}
out
}
fn fnv1a64(bytes: &[u8]) -> u64 {
let mut hash = 14695981039346656037u64;
for byte in bytes {
hash ^= *byte as u64;
hash = hash.wrapping_mul(1099511628211u64);
}
hash
}
fn xorshift64(mut x: u64) -> u64 {
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
x
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn mock_embeddings_are_deterministic() {
let client = EmbeddingClient::mock_for_tests();
let a = client.embed_text("alpha", 8).await.unwrap();
let b = client.embed_text("alpha", 8).await.unwrap();
let c = client.embed_text("beta", 8).await.unwrap();
assert_eq!(a, b);
assert_ne!(a, c);
assert_eq!(a.len(), 8);
}
}

View file

@ -0,0 +1,146 @@
use thiserror::Error;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SourceSpan {
pub start: usize,
pub end: usize,
}
impl SourceSpan {
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseDiagnostic {
pub message: String,
pub span: Option<SourceSpan>,
}
impl ParseDiagnostic {
pub fn new(message: String, span: Option<SourceSpan>) -> Self {
Self { message, span }
}
}
impl std::fmt::Display for ParseDiagnostic {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.message)
}
}
impl std::error::Error for ParseDiagnostic {}
pub fn render_span(span: SourceSpan) -> SourceSpan {
SourceSpan {
start: span.start,
end: span.end.max(span.start.saturating_add(1)),
}
}
pub fn decode_string_literal(raw: &str) -> Result<String> {
let inner = raw
.strip_prefix('"')
.and_then(|inner| inner.strip_suffix('"'))
.unwrap_or(raw);
let mut decoded = String::with_capacity(inner.len());
let mut chars = inner.chars();
while let Some(ch) = chars.next() {
if ch != '\\' {
decoded.push(ch);
continue;
}
let escaped = chars
.next()
.ok_or_else(|| NanoError::Parse("unterminated escape sequence".to_string()))?;
match escaped {
'"' => decoded.push('"'),
'\\' => decoded.push('\\'),
'n' => decoded.push('\n'),
'r' => decoded.push('\r'),
't' => decoded.push('\t'),
other => {
return Err(NanoError::Parse(format!(
"unsupported escape sequence: \\{}",
other
)));
}
}
}
Ok(decoded)
}
#[derive(Debug, Error)]
pub enum NanoError {
#[error("parse error: {0}")]
Parse(String),
#[error("catalog error: {0}")]
Catalog(String),
#[error("type error: {0}")]
Type(String),
#[error("storage error: {0}")]
Storage(String),
#[error(
"@unique constraint violation on {type_name}.{property}: duplicate value '{value}' at rows {first_row} and {second_row}"
)]
UniqueConstraint {
type_name: String,
property: String,
value: String,
first_row: usize,
second_row: usize,
},
#[error("plan error: {0}")]
Plan(String),
#[error("execution error: {0}")]
Execution(String),
#[error(transparent)]
Arrow(#[from] arrow_schema::ArrowError),
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("lance error: {0}")]
Lance(String),
#[error("manifest error: {0}")]
Manifest(String),
}
pub type Result<T> = std::result::Result<T, NanoError>;
#[cfg(test)]
mod tests {
use super::{SourceSpan, decode_string_literal, render_span};
#[test]
fn source_span_preserves_zero_width() {
let span = SourceSpan::new(7, 7);
assert_eq!(span.start, 7);
assert_eq!(span.end, 7);
}
#[test]
fn render_span_widens_zero_width_for_diagnostics() {
let rendered = render_span(SourceSpan::new(7, 7));
assert_eq!(rendered.start, 7);
assert_eq!(rendered.end, 8);
}
#[test]
fn decode_string_literal_supports_common_escapes() {
let decoded = decode_string_literal("\"a\\n\\r\\t\\\\\\\"b\"").unwrap();
assert_eq!(decoded, "a\n\r\t\\\"b");
}
}

View file

@ -0,0 +1,657 @@
use std::collections::HashSet;
use crate::catalog::Catalog;
use crate::error::Result;
use crate::query::ast::*;
use crate::query::typecheck::TypeContext;
use crate::types::Direction;
use super::*;
pub fn lower_query(
catalog: &Catalog,
query: &QueryDecl,
type_ctx: &TypeContext,
) -> Result<QueryIR> {
if query.mutation.is_some() {
return Err(crate::error::NanoError::Plan(
"cannot lower mutation query with read-query lowerer".to_string(),
));
}
let param_names: HashSet<String> = query.params.iter().map(|p| p.name.clone()).collect();
let mut pipeline = Vec::new();
let mut bound_vars = HashSet::new();
lower_clauses(
catalog,
&query.match_clause,
type_ctx,
&mut pipeline,
&mut bound_vars,
&param_names,
)?;
let return_exprs: Vec<IRProjection> = query
.return_clause
.iter()
.map(|p| IRProjection {
expr: lower_expr(&p.expr, &param_names),
alias: p.alias.clone(),
})
.collect();
let order_by: Vec<IROrdering> = query
.order_clause
.iter()
.map(|o| IROrdering {
expr: lower_expr(&o.expr, &param_names),
descending: o.descending,
})
.collect();
Ok(QueryIR {
name: query.name.clone(),
params: query.params.clone(),
pipeline,
return_exprs,
order_by,
limit: query.limit,
})
}
pub fn lower_mutation_query(query: &QueryDecl) -> Result<MutationIR> {
let mutation = query.mutation.as_ref().ok_or_else(|| {
crate::error::NanoError::Plan("query does not contain a mutation body".to_string())
})?;
let param_names: HashSet<String> = query.params.iter().map(|p| p.name.clone()).collect();
let op = match mutation {
Mutation::Insert(insert) => MutationOpIR::Insert {
type_name: insert.type_name.clone(),
assignments: insert
.assignments
.iter()
.map(|a| IRAssignment {
property: a.property.clone(),
value: lower_match_value(&a.value, &param_names),
})
.collect(),
},
Mutation::Update(update) => MutationOpIR::Update {
type_name: update.type_name.clone(),
assignments: update
.assignments
.iter()
.map(|a| IRAssignment {
property: a.property.clone(),
value: lower_match_value(&a.value, &param_names),
})
.collect(),
predicate: IRMutationPredicate {
property: update.predicate.property.clone(),
op: update.predicate.op,
value: lower_match_value(&update.predicate.value, &param_names),
},
},
Mutation::Delete(delete) => MutationOpIR::Delete {
type_name: delete.type_name.clone(),
predicate: IRMutationPredicate {
property: delete.predicate.property.clone(),
op: delete.predicate.op,
value: lower_match_value(&delete.predicate.value, &param_names),
},
},
};
Ok(MutationIR {
name: query.name.clone(),
params: query.params.clone(),
op,
})
}
fn lower_clauses(
catalog: &Catalog,
clauses: &[Clause],
type_ctx: &TypeContext,
pipeline: &mut Vec<IROp>,
bound_vars: &mut HashSet<String>,
param_names: &HashSet<String>,
) -> Result<()> {
// Separate clause types for ordering: bindings first, then traversals, then filters
let mut bindings = Vec::new();
let mut traversals = Vec::new();
let mut filters = Vec::new();
let mut negations = Vec::new();
for clause in clauses {
match clause {
Clause::Binding(b) => bindings.push(b),
Clause::Traversal(t) => traversals.push(t),
Clause::Filter(f) => filters.push(f),
Clause::Negation(inner) => negations.push(inner),
}
}
// Lower bindings into NodeScan ops
for binding in &bindings {
let node_type = catalog
.node_types
.get(&binding.type_name)
.expect("binding type was validated during typecheck");
// Collect inline filters from prop matches
let mut scan_filters = Vec::new();
for pm in &binding.prop_matches {
let prop = node_type
.properties
.get(&pm.prop_name)
.expect("binding property was validated during typecheck");
let op = if prop.list {
CompOp::Contains
} else {
CompOp::Eq
};
match &pm.value {
MatchValue::Literal(lit) => {
scan_filters.push(IRFilter {
left: IRExpr::PropAccess {
variable: binding.variable.clone(),
property: pm.prop_name.clone(),
},
op,
right: IRExpr::Literal(lit.clone()),
});
}
MatchValue::Now => {
scan_filters.push(IRFilter {
left: IRExpr::PropAccess {
variable: binding.variable.clone(),
property: pm.prop_name.clone(),
},
op,
right: IRExpr::Param(NOW_PARAM_NAME.to_string()),
});
}
MatchValue::Variable(v) => {
let right = if param_names.contains(v) {
IRExpr::Param(v.clone())
} else {
IRExpr::Variable(v.clone())
};
scan_filters.push(IRFilter {
left: IRExpr::PropAccess {
variable: binding.variable.clone(),
property: pm.prop_name.clone(),
},
op,
right,
});
}
}
}
pipeline.push(IROp::NodeScan {
variable: binding.variable.clone(),
type_name: binding.type_name.clone(),
filters: scan_filters,
});
bound_vars.insert(binding.variable.clone());
}
// Lower traversals into Expand ops
// Handle "cycle closing" — if both src and dst are already bound, use a filter
for traversal in &traversals {
let edge = catalog
.lookup_edge_by_name(&traversal.edge_name)
.ok_or_else(|| {
crate::error::NanoError::Plan(format!(
"lowering traversal referenced missing edge '{}' after typecheck",
traversal.edge_name
))
})?;
// Determine direction from type context
let direction = type_ctx
.traversals
.iter()
.find(|rt| {
rt.src == traversal.src && rt.dst == traversal.dst && rt.edge_type == edge.name
})
.map(|rt| rt.direction)
.unwrap_or(Direction::Out);
let dst_type = match direction {
Direction::Out => edge.to_type.clone(),
Direction::In => edge.from_type.clone(),
};
if bound_vars.contains(&traversal.src) && bound_vars.contains(&traversal.dst) {
// Cycle closing: emit expand to a temp var, then filter temp.id = dst.id
let temp_var = format!("__temp_{}", traversal.dst);
pipeline.push(IROp::Expand {
src_var: traversal.src.clone(),
dst_var: temp_var.clone(),
edge_type: edge.name.clone(),
direction,
dst_type,
min_hops: traversal.min_hops,
max_hops: traversal.max_hops,
});
pipeline.push(IROp::Filter(IRFilter {
left: IRExpr::PropAccess {
variable: temp_var,
property: "id".to_string(),
},
op: CompOp::Eq,
right: IRExpr::PropAccess {
variable: traversal.dst.clone(),
property: "id".to_string(),
},
}));
} else if !bound_vars.contains(&traversal.src) && bound_vars.contains(&traversal.dst) {
// Reverse expand: dst is bound, src is not.
// Swap direction and expand from dst to discover src.
let reverse_dir = match direction {
Direction::Out => Direction::In,
Direction::In => Direction::Out,
};
let src_type = match direction {
Direction::Out => edge.from_type.clone(),
Direction::In => edge.to_type.clone(),
};
pipeline.push(IROp::Expand {
src_var: traversal.dst.clone(),
dst_var: traversal.src.clone(),
edge_type: edge.name.clone(),
direction: reverse_dir,
dst_type: src_type,
min_hops: traversal.min_hops,
max_hops: traversal.max_hops,
});
if traversal.src != "_" {
bound_vars.insert(traversal.src.clone());
}
} else {
pipeline.push(IROp::Expand {
src_var: traversal.src.clone(),
dst_var: traversal.dst.clone(),
edge_type: edge.name.clone(),
direction,
dst_type,
min_hops: traversal.min_hops,
max_hops: traversal.max_hops,
});
if traversal.dst != "_" {
bound_vars.insert(traversal.dst.clone());
}
}
}
// Lower explicit filters
for filter in &filters {
pipeline.push(IROp::Filter(IRFilter {
left: lower_expr(&filter.left, param_names),
op: filter.op,
right: lower_expr(&filter.right, param_names),
}));
}
// Lower negations into AntiJoin ops
for neg_clauses in &negations {
// Find outer-bound variable referenced in the negation
let outer_var = find_outer_var(neg_clauses, bound_vars);
let mut inner_pipeline = Vec::new();
let mut inner_bound = bound_vars.clone();
lower_clauses(
catalog,
neg_clauses,
type_ctx,
&mut inner_pipeline,
&mut inner_bound,
param_names,
)?;
pipeline.push(IROp::AntiJoin {
outer_var: outer_var.unwrap_or_default(),
inner: inner_pipeline,
});
}
Ok(())
}
fn find_outer_var(clauses: &[Clause], outer_bound: &HashSet<String>) -> Option<String> {
for clause in clauses {
match clause {
Clause::Traversal(t) => {
if outer_bound.contains(&t.src) {
return Some(t.src.clone());
}
if outer_bound.contains(&t.dst) {
return Some(t.dst.clone());
}
}
Clause::Filter(f) => {
if let Some(v) = expr_var(&f.left)
&& outer_bound.contains(&v)
{
return Some(v);
}
if let Some(v) = expr_var(&f.right)
&& outer_bound.contains(&v)
{
return Some(v);
}
}
Clause::Binding(b) => {
if outer_bound.contains(&b.variable) {
return Some(b.variable.clone());
}
}
_ => {}
}
}
None
}
fn expr_var(expr: &Expr) -> Option<String> {
match expr {
Expr::Now => None,
Expr::PropAccess { variable, .. } => Some(variable.clone()),
Expr::Variable(v) => Some(v.clone()),
Expr::Nearest { variable, .. } => Some(variable.clone()),
Expr::Search { field, query } => expr_var(field).or_else(|| expr_var(query)),
Expr::Fuzzy {
field,
query,
max_edits,
} => expr_var(field)
.or_else(|| expr_var(query))
.or_else(|| max_edits.as_deref().and_then(expr_var)),
Expr::MatchText { field, query } => expr_var(field).or_else(|| expr_var(query)),
Expr::Bm25 { field, query } => expr_var(field).or_else(|| expr_var(query)),
Expr::Rrf {
primary,
secondary,
k,
} => expr_var(primary)
.or_else(|| expr_var(secondary))
.or_else(|| k.as_deref().and_then(expr_var)),
Expr::Aggregate { arg, .. } => expr_var(arg),
_ => None,
}
}
fn lower_expr(expr: &Expr, param_names: &HashSet<String>) -> IRExpr {
match expr {
Expr::Now => IRExpr::Param(NOW_PARAM_NAME.to_string()),
Expr::PropAccess { variable, property } => IRExpr::PropAccess {
variable: variable.clone(),
property: property.clone(),
},
Expr::Nearest {
variable,
property,
query,
} => IRExpr::Nearest {
variable: variable.clone(),
property: property.clone(),
query: Box::new(lower_expr(query, param_names)),
},
Expr::Search { field, query } => IRExpr::Search {
field: Box::new(lower_expr(field, param_names)),
query: Box::new(lower_expr(query, param_names)),
},
Expr::Fuzzy {
field,
query,
max_edits,
} => IRExpr::Fuzzy {
field: Box::new(lower_expr(field, param_names)),
query: Box::new(lower_expr(query, param_names)),
max_edits: max_edits
.as_ref()
.map(|expr| Box::new(lower_expr(expr, param_names))),
},
Expr::MatchText { field, query } => IRExpr::MatchText {
field: Box::new(lower_expr(field, param_names)),
query: Box::new(lower_expr(query, param_names)),
},
Expr::Bm25 { field, query } => IRExpr::Bm25 {
field: Box::new(lower_expr(field, param_names)),
query: Box::new(lower_expr(query, param_names)),
},
Expr::Rrf {
primary,
secondary,
k,
} => IRExpr::Rrf {
primary: Box::new(lower_expr(primary, param_names)),
secondary: Box::new(lower_expr(secondary, param_names)),
k: k.as_ref()
.map(|expr| Box::new(lower_expr(expr, param_names))),
},
Expr::Variable(v) => {
if param_names.contains(v) {
IRExpr::Param(v.clone())
} else {
IRExpr::Variable(v.clone())
}
}
Expr::Literal(l) => IRExpr::Literal(l.clone()),
Expr::Aggregate { func, arg } => IRExpr::Aggregate {
func: *func,
arg: Box::new(lower_expr(arg, param_names)),
},
Expr::AliasRef(name) => IRExpr::AliasRef(name.clone()),
}
}
fn lower_match_value(value: &MatchValue, param_names: &HashSet<String>) -> IRExpr {
match value {
MatchValue::Now => IRExpr::Param(NOW_PARAM_NAME.to_string()),
MatchValue::Literal(l) => IRExpr::Literal(l.clone()),
MatchValue::Variable(v) => {
if param_names.contains(v) {
IRExpr::Param(v.clone())
} else {
IRExpr::Variable(v.clone())
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::catalog::build_catalog;
use crate::query::parser::parse_query;
use crate::query::typecheck::{CheckedQuery, typecheck_query, typecheck_query_decl};
use crate::schema::parser::parse_schema;
fn setup() -> Catalog {
let schema = parse_schema(
r#"
node Person { name: String age: I32? }
node Company { name: String }
edge Knows: Person -> Person { since: Date? }
edge WorksAt: Person -> Company
"#,
)
.unwrap();
build_catalog(&schema).unwrap()
}
#[test]
fn test_lower_basic() {
let catalog = setup();
let qf = parse_query(
r#"
query q($name: String) {
match {
$p: Person { name: $name }
$p knows $f
}
return { $f.name, $f.age }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
assert_eq!(ir.pipeline.len(), 2); // NodeScan + Expand
assert_eq!(ir.return_exprs.len(), 2);
}
#[test]
fn test_lower_negation() {
let catalog = setup();
let qf = parse_query(
r#"
query q() {
match {
$p: Person
not { $p worksAt $_ }
}
return { $p.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
assert_eq!(ir.pipeline.len(), 2); // NodeScan + AntiJoin
assert!(matches!(&ir.pipeline[1], IROp::AntiJoin { .. }));
}
#[test]
fn test_lower_mutation_update() {
let catalog = setup();
let qf = parse_query(
r#"
query q($name: String, $age: I32) {
update Person set { age: $age } where name = $name
}
"#,
)
.unwrap();
let checked = typecheck_query_decl(&catalog, &qf.queries[0]).unwrap();
assert!(matches!(checked, CheckedQuery::Mutation(_)));
let ir = lower_mutation_query(&qf.queries[0]).unwrap();
match ir.op {
MutationOpIR::Update {
type_name,
assignments,
predicate,
} => {
assert_eq!(type_name, "Person");
assert_eq!(assignments.len(), 1);
assert_eq!(assignments[0].property, "age");
assert_eq!(predicate.property, "name");
}
_ => panic!("expected update mutation op"),
}
}
#[test]
fn test_lower_bounded_traversal() {
let catalog = setup();
let qf = parse_query(
r#"
query q() {
match {
$p: Person
$p knows{1,3} $f
}
return { $f.name }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
let expand = ir
.pipeline
.iter()
.find_map(|op| match op {
IROp::Expand {
min_hops, max_hops, ..
} => Some((*min_hops, *max_hops)),
_ => None,
})
.expect("expected expand op");
assert_eq!(expand.0, 1);
assert_eq!(expand.1, Some(3));
}
#[test]
fn test_lower_now_uses_reserved_runtime_param() {
let catalog = setup();
let qf = parse_query(
r#"
query stamp() {
match { $p: Person }
return { now() as ts }
}
"#,
)
.unwrap();
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
assert!(matches!(
ir.return_exprs[0].expr,
IRExpr::Param(ref name) if name == NOW_PARAM_NAME
));
}
#[test]
fn test_lower_mutation_now_uses_reserved_runtime_param() {
let catalog = build_catalog(
&parse_schema(
r#"
node Event {
slug: String @key
updated_at: DateTime?
}
"#,
)
.unwrap(),
)
.unwrap();
let qf = parse_query(
r#"
query stamp() {
update Event set { updated_at: now() } where updated_at = now()
}
"#,
)
.unwrap();
let checked = typecheck_query_decl(&catalog, &qf.queries[0]).unwrap();
assert!(matches!(checked, CheckedQuery::Mutation(_)));
let ir = lower_mutation_query(&qf.queries[0]).unwrap();
match ir.op {
MutationOpIR::Update {
assignments,
predicate,
..
} => {
assert!(matches!(
assignments[0].value,
IRExpr::Param(ref name) if name == NOW_PARAM_NAME
));
assert!(matches!(
predicate.value,
IRExpr::Param(ref name) if name == NOW_PARAM_NAME
));
}
_ => panic!("expected update mutation op"),
}
}
}

View file

@ -0,0 +1,143 @@
pub(crate) mod lower;
use std::collections::HashMap;
use crate::query::ast::{AggFunc, CompOp, Literal, Param};
use crate::types::Direction;
#[derive(Debug, Clone)]
pub struct QueryIR {
pub name: String,
pub params: Vec<Param>,
pub pipeline: Vec<IROp>,
pub return_exprs: Vec<IRProjection>,
pub order_by: Vec<IROrdering>,
pub limit: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct MutationIR {
pub name: String,
pub params: Vec<Param>,
pub op: MutationOpIR,
}
#[derive(Debug, Clone)]
pub enum MutationOpIR {
Insert {
type_name: String,
assignments: Vec<IRAssignment>,
},
Update {
type_name: String,
assignments: Vec<IRAssignment>,
predicate: IRMutationPredicate,
},
Delete {
type_name: String,
predicate: IRMutationPredicate,
},
}
#[derive(Debug, Clone)]
pub struct IRAssignment {
pub property: String,
pub value: IRExpr,
}
#[derive(Debug, Clone)]
pub struct IRMutationPredicate {
pub property: String,
pub op: CompOp,
pub value: IRExpr,
}
/// Resolved runtime parameters: param name → literal value.
pub type ParamMap = HashMap<String, Literal>;
#[derive(Debug, Clone)]
pub enum IROp {
NodeScan {
variable: String,
type_name: String,
filters: Vec<IRFilter>,
},
Expand {
src_var: String,
dst_var: String,
edge_type: String,
direction: Direction,
dst_type: String,
min_hops: u32,
max_hops: Option<u32>,
},
Filter(IRFilter),
AntiJoin {
/// The outer variable whose id is used for the join key
outer_var: String,
/// The inner pipeline that produces rows to anti-join against
inner: Vec<IROp>,
},
}
#[derive(Debug, Clone)]
pub struct IRFilter {
pub left: IRExpr,
pub op: CompOp,
pub right: IRExpr,
}
#[derive(Debug, Clone)]
pub enum IRExpr {
PropAccess {
variable: String,
property: String,
},
Nearest {
variable: String,
property: String,
query: Box<IRExpr>,
},
Search {
field: Box<IRExpr>,
query: Box<IRExpr>,
},
Fuzzy {
field: Box<IRExpr>,
query: Box<IRExpr>,
max_edits: Option<Box<IRExpr>>,
},
MatchText {
field: Box<IRExpr>,
query: Box<IRExpr>,
},
Bm25 {
field: Box<IRExpr>,
query: Box<IRExpr>,
},
Rrf {
primary: Box<IRExpr>,
secondary: Box<IRExpr>,
k: Option<Box<IRExpr>>,
},
Variable(String),
Param(String),
Literal(Literal),
Aggregate {
func: AggFunc,
arg: Box<IRExpr>,
},
AliasRef(String),
}
#[derive(Debug, Clone)]
pub struct IRProjection {
pub expr: IRExpr,
pub alias: Option<String>,
}
#[derive(Debug, Clone)]
pub struct IROrdering {
pub expr: IRExpr,
pub descending: bool,
}

View file

@ -0,0 +1,352 @@
use arrow_array::{
Array, ArrayRef, BooleanArray, Date32Array, Date64Array, FixedSizeListArray, Float32Array,
Float64Array, Int32Array, Int64Array, ListArray, RecordBatch, StringArray, StructArray,
UInt32Array, UInt64Array,
};
use arrow_schema::DataType;
pub const JS_MAX_SAFE_INTEGER_I64: i64 = 9_007_199_254_740_991;
pub const JS_MAX_SAFE_INTEGER_U64: u64 = 9_007_199_254_740_991;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum JsonIntegerMode {
JavaScript,
Native,
}
pub fn is_js_safe_integer_i64(value: i64) -> bool {
(-JS_MAX_SAFE_INTEGER_I64..=JS_MAX_SAFE_INTEGER_I64).contains(&value)
}
/// Convert Arrow RecordBatches into a Vec of JSON objects (one per row).
pub fn record_batches_to_json_rows(results: &[RecordBatch]) -> Vec<serde_json::Value> {
record_batches_to_json_rows_with_mode(results, JsonIntegerMode::JavaScript)
}
/// Convert Arrow RecordBatches into JSON rows without JS-safe integer coercion.
pub fn record_batches_to_rust_json_rows(results: &[RecordBatch]) -> Vec<serde_json::Value> {
record_batches_to_json_rows_with_mode(results, JsonIntegerMode::Native)
}
fn record_batches_to_json_rows_with_mode(
results: &[RecordBatch],
integer_mode: JsonIntegerMode,
) -> Vec<serde_json::Value> {
let total_rows = results.iter().map(RecordBatch::num_rows).sum();
let mut out = Vec::with_capacity(total_rows);
for batch in results {
let schema = batch.schema();
for row in 0..batch.num_rows() {
let mut map = serde_json::Map::new();
for (col_idx, field) in schema.fields().iter().enumerate() {
let col_arr = batch.column(col_idx);
map.insert(
field.name().clone(),
array_value_to_json_with_mode(col_arr, row, integer_mode),
);
}
out.push(serde_json::Value::Object(map));
}
}
out
}
/// Convert a single cell from an Arrow array to a serde_json::Value.
pub fn array_value_to_json(array: &ArrayRef, row: usize) -> serde_json::Value {
array_value_to_json_with_mode(array, row, JsonIntegerMode::JavaScript)
}
fn array_value_to_json_with_mode(
array: &ArrayRef,
row: usize,
integer_mode: JsonIntegerMode,
) -> serde_json::Value {
if array.is_null(row) {
return serde_json::Value::Null;
}
match array.data_type() {
DataType::Utf8 => array
.as_any()
.downcast_ref::<StringArray>()
.map(|a| serde_json::Value::String(a.value(row).to_string()))
.unwrap_or(serde_json::Value::Null),
DataType::Boolean => array
.as_any()
.downcast_ref::<BooleanArray>()
.map(|a| serde_json::Value::Bool(a.value(row)))
.unwrap_or(serde_json::Value::Null),
DataType::Int32 => array
.as_any()
.downcast_ref::<Int32Array>()
.map(|a| serde_json::Value::Number((a.value(row) as i64).into()))
.unwrap_or(serde_json::Value::Null),
DataType::Int64 => array
.as_any()
.downcast_ref::<Int64Array>()
.map(|a| {
let value = a.value(row);
match integer_mode {
JsonIntegerMode::JavaScript if !is_js_safe_integer_i64(value) => {
serde_json::Value::String(value.to_string())
}
JsonIntegerMode::JavaScript | JsonIntegerMode::Native => {
serde_json::Value::Number(value.into())
}
}
})
.unwrap_or(serde_json::Value::Null),
DataType::UInt32 => array
.as_any()
.downcast_ref::<UInt32Array>()
.map(|a| serde_json::Value::Number((a.value(row) as u64).into()))
.unwrap_or(serde_json::Value::Null),
DataType::UInt64 => array
.as_any()
.downcast_ref::<UInt64Array>()
.map(|a| {
let value = a.value(row);
match integer_mode {
JsonIntegerMode::JavaScript if value > JS_MAX_SAFE_INTEGER_U64 => {
serde_json::Value::String(value.to_string())
}
JsonIntegerMode::JavaScript | JsonIntegerMode::Native => {
serde_json::Value::Number(value.into())
}
}
})
.unwrap_or(serde_json::Value::Null),
DataType::Float32 => array
.as_any()
.downcast_ref::<Float32Array>()
.map(|a| json_float_value(a.value(row) as f64))
.unwrap_or(serde_json::Value::Null),
DataType::Float64 => array
.as_any()
.downcast_ref::<Float64Array>()
.map(|a| json_float_value(a.value(row)))
.unwrap_or(serde_json::Value::Null),
DataType::Date32 => array
.as_any()
.downcast_ref::<Date32Array>()
.map(|a| {
let days = a.value(row);
arrow_array::temporal_conversions::date32_to_datetime(days)
.map(|dt| serde_json::Value::String(dt.format("%Y-%m-%d").to_string()))
.unwrap_or_else(|| serde_json::Value::Number((days as i64).into()))
})
.unwrap_or(serde_json::Value::Null),
DataType::Date64 => array
.as_any()
.downcast_ref::<Date64Array>()
.map(|a| {
let ms = a.value(row);
arrow_array::temporal_conversions::date64_to_datetime(ms)
.map(|dt| {
serde_json::Value::String(dt.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string())
})
.unwrap_or_else(|| serde_json::Value::Number(ms.into()))
})
.unwrap_or(serde_json::Value::Null),
DataType::List(_) => array
.as_any()
.downcast_ref::<ListArray>()
.map(|a| {
let values = a.value(row);
serde_json::Value::Array(
(0..values.len())
.map(|idx| array_value_to_json_with_mode(&values, idx, integer_mode))
.collect(),
)
})
.unwrap_or(serde_json::Value::Null),
DataType::FixedSizeList(_, _) => array
.as_any()
.downcast_ref::<FixedSizeListArray>()
.map(|a| fixed_size_list_value_to_json(a, row, integer_mode))
.unwrap_or(serde_json::Value::Null),
DataType::Struct(_) => array
.as_any()
.downcast_ref::<StructArray>()
.map(|struct_arr| {
let mut obj = serde_json::Map::new();
for (i, field) in struct_arr.fields().iter().enumerate() {
let col = struct_arr.column(i);
obj.insert(
field.name().clone(),
array_value_to_json_with_mode(col, row, integer_mode),
);
}
serde_json::Value::Object(obj)
})
.unwrap_or(serde_json::Value::Null),
_ => {
let display =
arrow_cast::display::array_value_to_string(array, row).unwrap_or_default();
serde_json::Value::String(display)
}
}
}
fn json_float_value(value: f64) -> serde_json::Value {
if value.is_nan() {
return serde_json::Value::String("NaN".to_string());
}
if value == f64::INFINITY {
return serde_json::Value::String("Infinity".to_string());
}
if value == f64::NEG_INFINITY {
return serde_json::Value::String("-Infinity".to_string());
}
serde_json::Number::from_f64(value)
.map(serde_json::Value::Number)
.unwrap_or(serde_json::Value::Null)
}
fn fixed_size_list_value_to_json(
array: &FixedSizeListArray,
row: usize,
integer_mode: JsonIntegerMode,
) -> serde_json::Value {
let value_len = array.value_length() as usize;
let values = array.values();
if let Some(float_values) = values.as_any().downcast_ref::<Float32Array>() {
let start = row.saturating_mul(value_len);
return float32_json_array(float_values, start, value_len);
}
let values = array.value(row);
serde_json::Value::Array(
(0..values.len())
.map(|idx| array_value_to_json_with_mode(&values, idx, integer_mode))
.collect(),
)
}
fn float32_json_array(values: &Float32Array, start: usize, len: usize) -> serde_json::Value {
let mut out = Vec::with_capacity(len);
let end = start.saturating_add(len).min(values.len());
for idx in start..end {
if values.is_null(idx) {
out.push(serde_json::Value::Null);
continue;
}
let value = values.value(idx) as f64;
out.push(
serde_json::Number::from_f64(value)
.map(serde_json::Value::Number)
.unwrap_or(serde_json::Value::Null),
);
}
serde_json::Value::Array(out)
}
#[cfg(test)]
mod tests {
use super::{array_value_to_json, record_batches_to_rust_json_rows};
use std::sync::Arc;
use arrow_array::builder::{FixedSizeListBuilder, Float32Builder};
use arrow_array::{ArrayRef, Float64Array, Int64Array, RecordBatch, UInt64Array};
use arrow_schema::{DataType, Field, Schema};
#[test]
fn int64_outside_js_safe_range_is_stringified() {
let values: ArrayRef = Arc::new(Int64Array::from(vec![Some(9_007_199_254_740_992)]));
assert_eq!(
array_value_to_json(&values, 0),
serde_json::Value::String("9007199254740992".to_string())
);
}
#[test]
fn uint64_outside_js_safe_range_is_stringified() {
let values: ArrayRef = Arc::new(UInt64Array::from(vec![Some(9_007_199_254_740_992)]));
assert_eq!(
array_value_to_json(&values, 0),
serde_json::Value::String("9007199254740992".to_string())
);
}
#[test]
fn uint64_within_js_safe_range_stays_numeric() {
let values: ArrayRef = Arc::new(UInt64Array::from(vec![Some(9_007_199_254_740_991)]));
assert_eq!(
array_value_to_json(&values, 0),
serde_json::json!(9_007_199_254_740_991u64)
);
}
#[test]
fn rust_json_rows_preserve_full_width_integers() {
let schema = Arc::new(Schema::new(vec![
Field::new("signed", DataType::Int64, false),
Field::new("unsigned", DataType::UInt64, false),
]));
let batch = RecordBatch::try_new(
schema,
vec![
Arc::new(Int64Array::from(vec![i64::MIN])),
Arc::new(UInt64Array::from(vec![u64::MAX])),
],
)
.expect("batch");
assert_eq!(
record_batches_to_rust_json_rows(&[batch]),
vec![serde_json::json!({
"signed": i64::MIN,
"unsigned": u64::MAX,
})]
);
}
#[test]
fn fixed_size_float32_vectors_serialize_without_recursive_dispatch() {
let mut builder = FixedSizeListBuilder::new(Float32Builder::new(), 3);
builder.values().append_value(0.25);
builder.values().append_value(0.5);
builder.values().append_value(0.75);
builder.append(true);
for _ in 0..3 {
builder.values().append_null();
}
builder.append(false);
builder.values().append_value(1.0);
builder.values().append_value(2.0);
builder.values().append_value(3.0);
builder.append(true);
let values: ArrayRef = Arc::new(builder.finish());
assert_eq!(
array_value_to_json(&values, 0),
serde_json::json!([0.25, 0.5, 0.75])
);
assert_eq!(array_value_to_json(&values, 1), serde_json::Value::Null);
assert_eq!(
array_value_to_json(&values, 2),
serde_json::json!([1.0, 2.0, 3.0])
);
}
#[test]
fn non_finite_floats_are_stringified() {
let values: ArrayRef = Arc::new(Float64Array::from(vec![
Some(f64::NAN),
Some(f64::INFINITY),
Some(f64::NEG_INFINITY),
]));
assert_eq!(array_value_to_json(&values, 0), serde_json::json!("NaN"));
assert_eq!(
array_value_to_json(&values, 1),
serde_json::json!("Infinity")
);
assert_eq!(
array_value_to_json(&values, 2),
serde_json::json!("-Infinity")
);
}
}

View file

@ -0,0 +1,28 @@
pub mod catalog;
pub mod embedding;
pub mod error;
pub mod ir;
pub mod json_output;
pub mod query;
pub mod query_input;
pub mod result;
pub mod schema;
pub mod types;
pub use catalog::build_catalog;
pub use catalog::schema_ir::{
SchemaIR, build_catalog_from_ir, build_schema_ir, schema_ir_hash, schema_ir_json,
schema_ir_pretty_json,
};
pub use catalog::schema_plan::{
SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, plan_schema_migration,
};
pub use ir::ParamMap;
pub use ir::lower::{lower_mutation_query, lower_query};
pub use query::ast::Literal;
pub use query_input::{
JsonParamMode, RunInputError, RunInputResult, ToParam, find_named_query,
json_params_to_param_map,
};
pub use result::{MutationExecResult, MutationResult, QueryResult, RunResult};
pub use types::{Direction, PropType, ScalarType};

View file

@ -0,0 +1,221 @@
pub const NOW_PARAM_NAME: &str = "__nanograph_now";
#[derive(Debug, Clone)]
pub struct QueryFile {
pub queries: Vec<QueryDecl>,
}
#[derive(Debug, Clone)]
pub struct QueryDecl {
pub name: String,
pub description: Option<String>,
pub instruction: Option<String>,
pub params: Vec<Param>,
pub match_clause: Vec<Clause>,
pub return_clause: Vec<Projection>,
pub order_clause: Vec<Ordering>,
pub limit: Option<u64>,
pub mutation: Option<Mutation>,
}
#[derive(Debug, Clone)]
pub struct Param {
pub name: String,
pub type_name: String,
pub nullable: bool,
}
#[derive(Debug, Clone)]
pub enum Clause {
Binding(Binding),
Traversal(Traversal),
Filter(Filter),
Negation(Vec<Clause>),
}
#[derive(Debug, Clone)]
pub struct Binding {
pub variable: String,
pub type_name: String,
pub prop_matches: Vec<PropMatch>,
}
#[derive(Debug, Clone)]
pub struct PropMatch {
pub prop_name: String,
pub value: MatchValue,
}
#[derive(Debug, Clone)]
pub enum MatchValue {
Literal(Literal),
Variable(String),
Now,
}
#[derive(Debug, Clone)]
pub struct Traversal {
pub src: String,
pub edge_name: String,
pub dst: String,
pub min_hops: u32,
pub max_hops: Option<u32>,
}
#[derive(Debug, Clone)]
pub struct Filter {
pub left: Expr,
pub op: CompOp,
pub right: Expr,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompOp {
Eq,
Ne,
Gt,
Lt,
Ge,
Le,
Contains,
}
impl std::fmt::Display for CompOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Eq => write!(f, "="),
Self::Ne => write!(f, "!="),
Self::Gt => write!(f, ">"),
Self::Lt => write!(f, "<"),
Self::Ge => write!(f, ">="),
Self::Le => write!(f, "<="),
Self::Contains => write!(f, "contains"),
}
}
}
#[derive(Debug, Clone)]
pub enum Expr {
Now,
PropAccess {
variable: String,
property: String,
},
Nearest {
variable: String,
property: String,
query: Box<Expr>,
},
Search {
field: Box<Expr>,
query: Box<Expr>,
},
Fuzzy {
field: Box<Expr>,
query: Box<Expr>,
max_edits: Option<Box<Expr>>,
},
MatchText {
field: Box<Expr>,
query: Box<Expr>,
},
Bm25 {
field: Box<Expr>,
query: Box<Expr>,
},
Rrf {
primary: Box<Expr>,
secondary: Box<Expr>,
k: Option<Box<Expr>>,
},
Variable(String),
Literal(Literal),
Aggregate {
func: AggFunc,
arg: Box<Expr>,
},
AliasRef(String),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AggFunc {
Count,
Sum,
Avg,
Min,
Max,
}
impl std::fmt::Display for AggFunc {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Count => write!(f, "count"),
Self::Sum => write!(f, "sum"),
Self::Avg => write!(f, "avg"),
Self::Min => write!(f, "min"),
Self::Max => write!(f, "max"),
}
}
}
#[derive(Debug, Clone)]
pub enum Literal {
String(String),
Integer(i64),
Float(f64),
Bool(bool),
Date(String),
DateTime(String),
List(Vec<Literal>),
}
#[derive(Debug, Clone)]
pub struct Projection {
pub expr: Expr,
pub alias: Option<String>,
}
#[derive(Debug, Clone)]
pub struct Ordering {
pub expr: Expr,
pub descending: bool,
}
#[derive(Debug, Clone)]
pub enum Mutation {
Insert(InsertMutation),
Update(UpdateMutation),
Delete(DeleteMutation),
}
#[derive(Debug, Clone)]
pub struct InsertMutation {
pub type_name: String,
pub assignments: Vec<MutationAssignment>,
}
#[derive(Debug, Clone)]
pub struct UpdateMutation {
pub type_name: String,
pub assignments: Vec<MutationAssignment>,
pub predicate: MutationPredicate,
}
#[derive(Debug, Clone)]
pub struct DeleteMutation {
pub type_name: String,
pub predicate: MutationPredicate,
}
#[derive(Debug, Clone)]
pub struct MutationAssignment {
pub property: String,
pub value: MatchValue,
}
#[derive(Debug, Clone)]
pub struct MutationPredicate {
pub property: String,
pub op: CompOp,
pub value: MatchValue,
}

View file

@ -0,0 +1,3 @@
pub mod ast;
pub mod parser;
pub mod typecheck;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,114 @@
// NanoGraph Query Grammar (.gq files)
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ LINE_COMMENT | BLOCK_COMMENT }
LINE_COMMENT = _{ "//" ~ (!"\n" ~ ANY)* }
BLOCK_COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
query_file = { SOI ~ query_decl* ~ EOI }
query_decl = {
"query" ~ ident ~ "(" ~ param_list? ~ ")" ~ query_annotation* ~ "{"
~ query_body
~ "}"
}
query_annotation = { description_annotation | instruction_annotation }
description_annotation = { "@description" ~ "(" ~ string_lit ~ ")" }
instruction_annotation = { "@instruction" ~ "(" ~ string_lit ~ ")" }
query_body = { read_query_body | mutation_stmt }
read_query_body = {
match_clause
~ return_clause
~ order_clause?
~ limit_clause?
}
mutation_stmt = { insert_stmt | update_stmt | delete_stmt }
insert_stmt = { "insert" ~ type_name ~ "{" ~ mutation_assignment+ ~ "}" }
update_stmt = { "update" ~ type_name ~ "set" ~ "{" ~ mutation_assignment+ ~ "}" ~ "where" ~ mutation_predicate }
delete_stmt = { "delete" ~ type_name ~ "where" ~ mutation_predicate }
mutation_assignment = { ident ~ ":" ~ match_value ~ ","? }
mutation_predicate = { ident ~ comp_op ~ match_value }
param_list = { param ~ ("," ~ param)* }
param = { variable ~ ":" ~ type_ref }
type_ref = { (list_type | base_type | vector_type) ~ "?"? }
list_type = { "[" ~ base_type ~ "]" }
vector_type = { "Vector" ~ "(" ~ integer ~ ")" }
base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }
match_clause = { "match" ~ "{" ~ clause+ ~ "}" }
clause = { negation | binding | traversal | filter | text_search_clause }
text_search_clause = { search_call | fuzzy_call | match_text_call }
// Binding: $p: Person { name: "Alice" }
binding = { variable ~ ":" ~ type_name ~ ("{" ~ prop_match_list ~ "}")? }
prop_match_list = { prop_match ~ ("," ~ prop_match)* ~ ","? }
prop_match = { ident ~ ":" ~ match_value }
match_value = { literal | variable | now_call }
// Traversal: $p knows $f
traversal = { variable ~ edge_ident ~ traversal_bounds? ~ variable }
traversal_bounds = { "{" ~ integer ~ "," ~ integer? ~ "}" }
// Filter: $f.age > 25
filter = { expr ~ filter_op ~ expr }
// Negation: not { ... }
negation = { "not" ~ "{" ~ clause+ ~ "}" }
// Return clause — projections separated by commas or newlines
return_clause = { "return" ~ "{" ~ projection+ ~ "}" }
projection = { expr ~ ("as" ~ ident)? ~ ","? }
// Order clause
order_clause = { "order" ~ "{" ~ ordering ~ ("," ~ ordering)* ~ "}" }
ordering = { nearest_ordering | (expr ~ order_dir?) }
nearest_ordering = { "nearest" ~ "(" ~ prop_access ~ "," ~ expr ~ ")" }
order_dir = { "asc" | "desc" }
// Limit clause
limit_clause = { "limit" ~ integer }
// Expressions
expr = { now_call | nearest_ordering | search_call | fuzzy_call | match_text_call | bm25_call | rrf_call | agg_call | prop_access | variable | literal | ident }
now_call = { "now" ~ "(" ~ ")" }
search_call = { "search" ~ "(" ~ expr ~ "," ~ expr ~ ")" }
fuzzy_call = { "fuzzy" ~ "(" ~ expr ~ "," ~ expr ~ ("," ~ expr)? ~ ")" }
match_text_call = { "match_text" ~ "(" ~ expr ~ "," ~ expr ~ ")" }
bm25_call = { "bm25" ~ "(" ~ expr ~ "," ~ expr ~ ")" }
rank_expr = { nearest_ordering | bm25_call }
rrf_call = { "rrf" ~ "(" ~ rank_expr ~ "," ~ rank_expr ~ ("," ~ expr)? ~ ")" }
prop_access = { variable ~ "." ~ ident }
agg_call = { agg_func ~ "(" ~ expr ~ ")" }
agg_func = { "count" | "sum" | "avg" | "min" | "max" }
comp_op = { ">=" | "<=" | "!=" | ">" | "<" | "=" }
filter_op = { "contains" | comp_op }
// Terminals
variable = @{ "$" ~ (ident_chars | "_") }
ident_chars = @{ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
// Edge identifier — lowercase start, same as ident but used in traversal context
// Must not match keywords
edge_ident = @{ !("not" ~ !ASCII_ALPHANUMERIC) ~ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
type_name = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }
ident = @{ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
literal = { list_lit | datetime_lit | date_lit | string_lit | float_lit | integer | bool_lit }
date_lit = { "date" ~ "(" ~ string_lit ~ ")" }
datetime_lit = { "datetime" ~ "(" ~ string_lit ~ ")" }
list_lit = { "[" ~ (literal ~ ("," ~ literal)*)? ~ "]" }
string_lit = @{ "\"" ~ string_char* ~ "\"" }
string_char = @{ !("\"" | "\\") ~ ANY | "\\" ~ ANY }
float_lit = @{ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
integer = @{ ASCII_DIGIT+ }
bool_lit = { "true" | "false" }

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,892 @@
use std::error::Error;
use std::fmt;
use serde_json::Value;
use crate::error::NanoError;
use crate::ir::ParamMap;
use crate::json_output::{JS_MAX_SAFE_INTEGER_U64, is_js_safe_integer_i64};
use crate::query::ast::{Literal, Param, QueryDecl};
use crate::query::parser::parse_query;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum JsonParamMode {
Standard,
JavaScript,
}
#[derive(Debug)]
pub enum RunInputError {
Core(NanoError),
Message(String),
}
impl RunInputError {
fn message(message: impl Into<String>) -> Self {
Self::Message(message.into())
}
}
impl fmt::Display for RunInputError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Core(err) => err.fmt(f),
Self::Message(message) => f.write_str(message),
}
}
}
impl Error for RunInputError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Core(err) => Some(err),
Self::Message(_) => None,
}
}
}
impl From<NanoError> for RunInputError {
fn from(value: NanoError) -> Self {
Self::Core(value)
}
}
pub type RunInputResult<T> = std::result::Result<T, RunInputError>;
pub trait ToParam {
fn to_param(self) -> crate::error::Result<Literal>;
}
impl ToParam for Literal {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(self)
}
}
impl ToParam for &Literal {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(self.clone())
}
}
impl ToParam for String {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::String(self))
}
}
impl ToParam for &String {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::String(self.clone()))
}
}
impl ToParam for &str {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::String(self.to_string()))
}
}
impl ToParam for bool {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Bool(self))
}
}
impl ToParam for i8 {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Integer(i64::from(self)))
}
}
impl ToParam for i16 {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Integer(i64::from(self)))
}
}
impl ToParam for i32 {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Integer(i64::from(self)))
}
}
impl ToParam for i64 {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Integer(self))
}
}
impl ToParam for isize {
fn to_param(self) -> crate::error::Result<Literal> {
let value = i64::try_from(self).map_err(|_| {
NanoError::Execution(format!(
"param value {} exceeds current engine range for numeric literals (max {})",
self,
i64::MAX
))
})?;
Ok(Literal::Integer(value))
}
}
impl ToParam for u8 {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Integer(i64::from(self)))
}
}
impl ToParam for u16 {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Integer(i64::from(self)))
}
}
impl ToParam for u32 {
fn to_param(self) -> crate::error::Result<Literal> {
Ok(Literal::Integer(i64::from(self)))
}
}
impl ToParam for u64 {
fn to_param(self) -> crate::error::Result<Literal> {
let value = i64::try_from(self).map_err(|_| {
NanoError::Execution(format!(
"param value {} exceeds current engine range for numeric literals (max {})",
self,
i64::MAX
))
})?;
Ok(Literal::Integer(value))
}
}
impl ToParam for usize {
fn to_param(self) -> crate::error::Result<Literal> {
let value = i64::try_from(self).map_err(|_| {
NanoError::Execution(format!(
"param value {} exceeds current engine range for numeric literals (max {})",
self,
i64::MAX
))
})?;
Ok(Literal::Integer(value))
}
}
impl ToParam for f32 {
fn to_param(self) -> crate::error::Result<Literal> {
if !self.is_finite() {
return Err(NanoError::Execution(format!(
"invalid float parameter {}",
self
)));
}
Ok(Literal::Float(f64::from(self)))
}
}
impl ToParam for f64 {
fn to_param(self) -> crate::error::Result<Literal> {
if !self.is_finite() {
return Err(NanoError::Execution(format!(
"invalid float parameter {}",
self
)));
}
Ok(Literal::Float(self))
}
}
impl<T> ToParam for Vec<T>
where
T: ToParam,
{
fn to_param(self) -> crate::error::Result<Literal> {
let mut out = Vec::with_capacity(self.len());
for value in self {
out.push(value.to_param()?);
}
Ok(Literal::List(out))
}
}
impl<T> ToParam for &[T]
where
T: Clone + ToParam,
{
fn to_param(self) -> crate::error::Result<Literal> {
let mut out = Vec::with_capacity(self.len());
for value in self {
out.push(value.clone().to_param()?);
}
Ok(Literal::List(out))
}
}
impl<T, const N: usize> ToParam for [T; N]
where
T: ToParam,
{
fn to_param(self) -> crate::error::Result<Literal> {
let mut out = Vec::with_capacity(N);
for value in self {
out.push(value.to_param()?);
}
Ok(Literal::List(out))
}
}
#[macro_export]
macro_rules! params {
() => {
::std::result::Result::Ok($crate::ParamMap::new())
};
($($key:expr => $value:expr),+ $(,)?) => {{
(|| -> $crate::error::Result<$crate::ParamMap> {
let mut map = $crate::ParamMap::new();
$(
map.insert(::std::convert::Into::<String>::into($key), $crate::ToParam::to_param($value)?);
)+
Ok(map)
})()
}};
}
pub fn find_named_query(query_source: &str, query_name: &str) -> RunInputResult<QueryDecl> {
let queries = parse_query(query_source)?;
queries
.queries
.into_iter()
.find(|query| query.name == query_name)
.ok_or_else(|| RunInputError::message(format!("query '{}' not found", query_name)))
}
pub fn json_params_to_param_map(
params: Option<&Value>,
query_params: &[Param],
mode: JsonParamMode,
) -> RunInputResult<ParamMap> {
let mut map = ParamMap::new();
let object = match params {
Some(Value::Object(object)) => object,
Some(Value::Null) | None => return Ok(map),
Some(other) => {
let message = match mode {
JsonParamMode::Standard => "params must be a JSON object".to_string(),
JsonParamMode::JavaScript => {
format!("params must be an object, got {}", json_type_name(other))
}
};
return Err(RunInputError::message(message));
}
};
for (key, value) in object {
let decl = query_params.iter().find(|param| param.name == *key);
let literal = if let Some(decl) = decl {
json_value_to_literal_typed(key, value, &decl.type_name, mode)?
} else {
json_value_to_literal_inferred(key, value, mode)?
};
map.insert(key.clone(), literal);
}
Ok(map)
}
fn json_value_to_literal_typed(
key: &str,
value: &Value,
type_name: &str,
mode: JsonParamMode,
) -> RunInputResult<Literal> {
match type_name {
"String" => match value {
Value::String(value) => Ok(Literal::String(value.clone())),
other => Err(RunInputError::message(format!(
"param '{}': expected string, got {}",
key,
json_type_name(other)
))),
},
"I32" => match mode {
JsonParamMode::Standard => {
let value = parse_i64_param(key, value, mode)?;
let value = i32::try_from(value).map_err(|_| {
RunInputError::message(format!("param '{}': value {} exceeds I32", key, value))
})?;
Ok(Literal::Integer(i64::from(value)))
}
JsonParamMode::JavaScript => {
let value = parse_i64_param(key, value, mode)?;
let value = i32::try_from(value).map_err(|_| {
RunInputError::message(format!(
"param '{}': value {} exceeds I32 range",
key, value
))
})?;
Ok(Literal::Integer(i64::from(value)))
}
},
"I64" => Ok(Literal::Integer(parse_i64_param(key, value, mode)?)),
"U32" => {
let value = parse_u64_param(key, value, mode)?;
let value = match mode {
JsonParamMode::Standard => u32::try_from(value).map_err(|_| {
RunInputError::message(format!("param '{}': value {} exceeds U32", key, value))
})?,
JsonParamMode::JavaScript => u32::try_from(value).map_err(|_| {
RunInputError::message(format!(
"param '{}': value {} exceeds U32 range",
key, value
))
})?,
};
Ok(Literal::Integer(i64::from(value)))
}
"U64" => {
let value = parse_u64_param(key, value, mode)?;
let value = match mode {
JsonParamMode::Standard => i64::try_from(value).map_err(|_| {
RunInputError::message(format!(
"param '{}': value {} exceeds current engine range for U64 (max {})",
key,
value,
i64::MAX
))
})?,
JsonParamMode::JavaScript => i64::try_from(value).map_err(|_| {
RunInputError::message(format!(
"param '{}': value {} exceeds current engine range for U64 parameters (max {})",
key,
value,
i64::MAX
))
})?,
};
Ok(Literal::Integer(value))
}
"F32" | "F64" => {
let value = value.as_f64().ok_or_else(|| match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': expected float", key))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': expected float, got {}",
key,
json_type_name(value)
)),
})?;
Ok(Literal::Float(value))
}
"Bool" => {
let value = value.as_bool().ok_or_else(|| match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': expected boolean", key))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': expected boolean, got {}",
key,
json_type_name(value)
)),
})?;
Ok(Literal::Bool(value))
}
"Date" => match value {
Value::String(value) => Ok(Literal::Date(value.clone())),
other => Err(match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': expected date string", key))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': expected date string, got {}",
key,
json_type_name(other)
)),
}),
},
"DateTime" => match value {
Value::String(value) => Ok(Literal::DateTime(value.clone())),
other => Err(match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': expected datetime string", key))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': expected datetime string, got {}",
key,
json_type_name(other)
)),
}),
},
"Blob" => match value {
Value::String(value) => Ok(Literal::String(value.clone())),
other => Err(RunInputError::message(format!(
"param '{}': expected blob URI string, got {}",
key,
json_type_name(other)
))),
},
other if parse_list_item_type(other).is_some() => {
let item_type = parse_list_item_type(other).unwrap();
let items = value.as_array().ok_or_else(|| match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': expected array for {}", key, other))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': expected array for {}, got {}",
key,
other,
json_type_name(value)
)),
})?;
let mut out = Vec::with_capacity(items.len());
for item in items {
out.push(json_value_to_literal_typed(key, item, item_type, mode)?);
}
Ok(Literal::List(out))
}
other if other.starts_with("Vector(") => {
let expected_dim = parse_vector_dim(other).ok_or_else(|| match mode {
JsonParamMode::Standard => RunInputError::message(format!(
"param '{}': invalid vector type '{}'",
key, other
)),
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': invalid vector type '{}' (expected Vector(N))",
key, other
)),
})?;
let items = value.as_array().ok_or_else(|| match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': expected array for {}", key, other))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': expected array for {}, got {}",
key,
other,
json_type_name(value)
)),
})?;
if items.len() != expected_dim {
return Err(RunInputError::message(format!(
"param '{}': expected {} values for {}, got {}",
key,
expected_dim,
other,
items.len()
)));
}
let mut out = Vec::with_capacity(items.len());
for item in items {
let value = item.as_f64().ok_or_else(|| match mode {
JsonParamMode::Standard => RunInputError::message(format!(
"param '{}': vector element is not numeric",
key
)),
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': vector element '{}' is not numeric",
key, item
)),
})?;
out.push(Literal::Float(value));
}
Ok(Literal::List(out))
}
_ => match value {
Value::String(value) => Ok(Literal::String(value.clone())),
other => Err(RunInputError::message(format!(
"param '{}': expected string for type '{}', got {}",
key,
type_name,
json_type_name(other)
))),
},
}
}
fn json_value_to_literal_inferred(
key: &str,
value: &Value,
mode: JsonParamMode,
) -> RunInputResult<Literal> {
match value {
Value::String(value) => Ok(Literal::String(value.clone())),
Value::Bool(value) => Ok(Literal::Bool(*value)),
Value::Number(number) => match mode {
JsonParamMode::Standard => {
if let Some(value) = number.as_i64() {
Ok(Literal::Integer(value))
} else if let Some(value) = number.as_f64() {
Ok(Literal::Float(value))
} else {
Err(RunInputError::message(format!(
"param '{}': unsupported numeric value",
key
)))
}
}
JsonParamMode::JavaScript => {
if let Some(value) = number.as_i64() {
if !is_js_safe_integer_i64(value) {
return Err(RunInputError::message(format!(
"param '{}': integer {} exceeds JS safe integer range; use a decimal string and a typed query parameter for exact values",
key, value
)));
}
Ok(Literal::Integer(value))
} else if let Some(value) = number.as_u64() {
if value > JS_MAX_SAFE_INTEGER_U64 {
return Err(RunInputError::message(format!(
"param '{}': integer {} exceeds JS safe integer range; use a decimal string and a typed query parameter for exact values",
key, value
)));
}
let value = i64::try_from(value).map_err(|_| {
RunInputError::message(format!(
"param '{}': integer {} exceeds supported range (max {})",
key,
value,
i64::MAX
))
})?;
Ok(Literal::Integer(value))
} else if let Some(value) = number.as_f64() {
Ok(Literal::Float(value))
} else {
Err(RunInputError::message(format!(
"param '{}': unsupported number value",
key
)))
}
}
},
Value::Array(values) => {
let mut out = Vec::with_capacity(values.len());
for value in values {
out.push(json_value_to_literal_inferred(key, value, mode)?);
}
Ok(Literal::List(out))
}
Value::Null => Err(match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': null is not supported", key))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': null values are not supported as query parameters",
key
)),
}),
Value::Object(_) => Err(match mode {
JsonParamMode::Standard => {
RunInputError::message(format!("param '{}': object is not supported", key))
}
JsonParamMode::JavaScript => RunInputError::message(format!(
"param '{}': object values are not supported as query parameters",
key
)),
}),
}
}
fn parse_i64_param(key: &str, value: &Value, mode: JsonParamMode) -> RunInputResult<i64> {
match mode {
JsonParamMode::Standard => match value {
Value::Number(number) => number.as_i64().ok_or_else(|| {
RunInputError::message(format!("param '{}': expected integer number", key))
}),
Value::String(value) => value.parse::<i64>().map_err(|_| {
RunInputError::message(format!(
"param '{}': expected integer string, got '{}'",
key, value
))
}),
_ => Err(RunInputError::message(format!(
"param '{}': expected integer",
key
))),
},
JsonParamMode::JavaScript => match value {
Value::Number(number) => {
let parsed = if let Some(parsed) = number.as_i64() {
parsed
} else if let Some(parsed) = number.as_f64() {
if !parsed.is_finite() || parsed.fract() != 0.0 {
return Err(RunInputError::message(format!(
"param '{}': expected integer, got number",
key
)));
}
if parsed < i64::MIN as f64 || parsed > i64::MAX as f64 {
return Err(RunInputError::message(format!(
"param '{}': integer {} is outside i64 range",
key, parsed
)));
}
parsed as i64
} else {
return Err(RunInputError::message(format!(
"param '{}': expected integer, got number",
key
)));
};
if !is_js_safe_integer_i64(parsed) {
return Err(RunInputError::message(format!(
"param '{}': integer {} exceeds JS safe integer range; pass a decimal string for exact values",
key, parsed
)));
}
Ok(parsed)
}
Value::String(value) => value.parse::<i64>().map_err(|_| {
RunInputError::message(format!(
"param '{}': expected integer string, got '{}'",
key, value
))
}),
other => Err(RunInputError::message(format!(
"param '{}': expected integer, got {}",
key,
json_type_name(other)
))),
},
}
}
fn parse_u64_param(key: &str, value: &Value, mode: JsonParamMode) -> RunInputResult<u64> {
match mode {
JsonParamMode::Standard => match value {
Value::Number(number) => number.as_u64().ok_or_else(|| {
RunInputError::message(format!("param '{}': expected unsigned integer number", key))
}),
Value::String(value) => value.parse::<u64>().map_err(|_| {
RunInputError::message(format!(
"param '{}': expected unsigned integer string, got '{}'",
key, value
))
}),
_ => Err(RunInputError::message(format!(
"param '{}': expected unsigned integer",
key
))),
},
JsonParamMode::JavaScript => match value {
Value::Number(number) => {
let parsed = if let Some(parsed) = number.as_u64() {
parsed
} else if let Some(parsed) = number.as_f64() {
if !parsed.is_finite() || parsed.fract() != 0.0 || parsed < 0.0 {
return Err(RunInputError::message(format!(
"param '{}': expected unsigned integer, got number",
key
)));
}
if parsed > u64::MAX as f64 {
return Err(RunInputError::message(format!(
"param '{}': integer {} is outside u64 range",
key, parsed
)));
}
parsed as u64
} else {
return Err(RunInputError::message(format!(
"param '{}': expected unsigned integer, got number",
key
)));
};
if parsed > JS_MAX_SAFE_INTEGER_U64 {
return Err(RunInputError::message(format!(
"param '{}': integer {} exceeds JS safe integer range; pass a decimal string for exact values",
key, parsed
)));
}
Ok(parsed)
}
Value::String(value) => value.parse::<u64>().map_err(|_| {
RunInputError::message(format!(
"param '{}': expected unsigned integer string, got '{}'",
key, value
))
}),
other => Err(RunInputError::message(format!(
"param '{}': expected unsigned integer, got {}",
key,
json_type_name(other)
))),
},
}
}
fn parse_vector_dim(type_name: &str) -> Option<usize> {
let dim = type_name
.strip_prefix("Vector(")?
.strip_suffix(')')?
.parse::<usize>()
.ok()?;
if dim == 0 { None } else { Some(dim) }
}
fn parse_list_item_type(type_name: &str) -> Option<&str> {
Some(type_name.strip_prefix('[')?.strip_suffix(']')?.trim())
}
fn json_type_name(value: &Value) -> &'static str {
match value {
Value::Null => "null",
Value::Bool(_) => "boolean",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::{JsonParamMode, ToParam, find_named_query, json_params_to_param_map};
use crate::query::ast::Literal;
#[test]
fn js_mode_rejects_unsafe_integer_numbers() {
let query = find_named_query(
"query find($id: U64) { match { $u: User } return { $u } }",
"find",
)
.expect("query should parse");
let error = json_params_to_param_map(
Some(&json!({ "id": 9_007_199_254_740_992u64 })),
&query.params,
JsonParamMode::JavaScript,
)
.expect_err("unsafe integer should fail");
assert_eq!(
error.to_string(),
"param 'id': integer 9007199254740992 exceeds JS safe integer range; pass a decimal string for exact values"
);
}
#[test]
fn standard_mode_preserves_ffi_param_object_error() {
let error = json_params_to_param_map(Some(&json!(["nope"])), &[], JsonParamMode::Standard)
.expect_err("non-object params should fail");
assert_eq!(error.to_string(), "params must be a JSON object");
}
#[test]
fn to_param_supports_lists_and_explicit_date_literals() {
let vector = vec![1_i32, 2_i32, 3_i32].to_param().expect("vector param");
match vector {
Literal::List(values) => {
assert!(matches!(values.first(), Some(Literal::Integer(1))));
assert!(matches!(values.get(1), Some(Literal::Integer(2))));
assert!(matches!(values.get(2), Some(Literal::Integer(3))));
}
other => panic!("expected list param, got {:?}", other),
}
let date = Literal::Date("2026-03-06".to_string())
.to_param()
.expect("date param");
assert!(matches!(date, Literal::Date(ref value) if value == "2026-03-06"));
}
#[test]
fn to_param_rejects_unsigned_values_outside_engine_range() {
let error = u64::MAX.to_param().expect_err("oversized u64 should fail");
assert_eq!(
error.to_string(),
format!(
"execution error: param value {} exceeds current engine range for numeric literals (max {})",
u64::MAX,
i64::MAX
)
);
}
#[test]
fn params_macro_builds_param_map() {
let params = params! {
"name" => "Alice",
"age" => 41_i32,
"scores" => [1_u8, 2_u8, 3_u8],
"published_at" => Literal::DateTime("2026-03-06T12:00:00Z".to_string()),
}
.expect("params");
assert!(matches!(
params.get("name"),
Some(Literal::String(value)) if value == "Alice"
));
assert!(matches!(params.get("age"), Some(Literal::Integer(41))));
match params.get("scores") {
Some(Literal::List(values)) => {
assert!(matches!(values.first(), Some(Literal::Integer(1))));
assert!(matches!(values.get(1), Some(Literal::Integer(2))));
assert!(matches!(values.get(2), Some(Literal::Integer(3))));
}
other => panic!("expected list param, got {:?}", other),
}
assert!(matches!(
params.get("published_at"),
Some(Literal::DateTime(value)) if value == "2026-03-06T12:00:00Z"
));
}
#[test]
fn typed_json_params_support_list_and_datetime_types() {
let query = find_named_query(
r#"
query q($tags: [String], $days: [Date]?, $due_at: DateTime) {
match { $t: Task }
return { $t.slug }
}
"#,
"q",
)
.expect("query");
let params = json_params_to_param_map(
Some(&json!({
"tags": ["launch", "priority"],
"days": ["2026-04-01", "2026-04-02"],
"due_at": "2026-04-03T10:15:00Z"
})),
&query.params,
JsonParamMode::Standard,
)
.expect("typed params");
assert!(matches!(
params.get("due_at"),
Some(Literal::DateTime(value)) if value == "2026-04-03T10:15:00Z"
));
match params.get("tags") {
Some(Literal::List(values)) => {
assert!(
matches!(values.first(), Some(Literal::String(value)) if value == "launch")
);
assert!(
matches!(values.get(1), Some(Literal::String(value)) if value == "priority")
);
}
other => panic!("expected string list param, got {:?}", other),
}
match params.get("days") {
Some(Literal::List(values)) => {
assert!(
matches!(values.first(), Some(Literal::Date(value)) if value == "2026-04-01")
);
assert!(
matches!(values.get(1), Some(Literal::Date(value)) if value == "2026-04-02")
);
}
other => panic!("expected date list param, got {:?}", other),
}
}
}

View file

@ -0,0 +1,286 @@
use std::sync::Arc;
use arrow_array::{RecordBatch, UInt64Array};
use arrow_ipc::writer::StreamWriter;
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use serde::de::DeserializeOwned;
use crate::error::{NanoError, Result};
use crate::json_output::{record_batches_to_json_rows, record_batches_to_rust_json_rows};
#[derive(Debug, Clone, Copy, Default)]
pub struct MutationExecResult {
pub affected_nodes: usize,
pub affected_edges: usize,
}
#[derive(Debug, Clone)]
pub struct QueryResult {
schema: SchemaRef,
batches: Vec<RecordBatch>,
}
impl QueryResult {
pub fn new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Self {
Self { schema, batches }
}
pub fn schema(&self) -> &SchemaRef {
&self.schema
}
pub fn batches(&self) -> &[RecordBatch] {
&self.batches
}
pub fn into_batches(self) -> Vec<RecordBatch> {
self.batches
}
pub fn num_rows(&self) -> usize {
self.batches.iter().map(RecordBatch::num_rows).sum()
}
pub fn concat_batches(&self) -> Result<RecordBatch> {
if self.batches.is_empty() {
return Ok(RecordBatch::new_empty(self.schema.clone()));
}
arrow_select::concat::concat_batches(&self.schema, &self.batches)
.map_err(|err| NanoError::Execution(err.to_string()))
}
pub fn to_sdk_json(&self) -> serde_json::Value {
serde_json::Value::Array(record_batches_to_json_rows(&self.batches))
}
pub fn to_rust_json(&self) -> serde_json::Value {
serde_json::Value::Array(record_batches_to_rust_json_rows(&self.batches))
}
pub fn deserialize<T: DeserializeOwned>(&self) -> Result<T> {
serde_json::from_value(self.to_rust_json()).map_err(|err| {
NanoError::Execution(format!("failed to deserialize query result: {}", err))
})
}
pub fn to_arrow_ipc(&self) -> Result<Vec<u8>> {
let mut buffer = Vec::new();
let mut writer = StreamWriter::try_new(&mut buffer, &self.schema)?;
for batch in &self.batches {
writer.write(batch)?;
}
writer.finish()?;
drop(writer);
Ok(buffer)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct MutationResult {
pub affected_nodes: usize,
pub affected_edges: usize,
}
impl MutationResult {
pub fn to_sdk_json(&self) -> serde_json::Value {
serde_json::json!({
"affectedNodes": self.affected_nodes,
"affectedEdges": self.affected_edges,
})
}
pub fn to_record_batch(&self) -> Result<RecordBatch> {
let schema = Arc::new(Schema::new(vec![
Field::new("affected_nodes", DataType::UInt64, false),
Field::new("affected_edges", DataType::UInt64, false),
]));
Ok(RecordBatch::try_new(
schema,
vec![
Arc::new(UInt64Array::from(vec![self.affected_nodes as u64])),
Arc::new(UInt64Array::from(vec![self.affected_edges as u64])),
],
)?)
}
}
impl From<MutationExecResult> for MutationResult {
fn from(value: MutationExecResult) -> Self {
Self {
affected_nodes: value.affected_nodes,
affected_edges: value.affected_edges,
}
}
}
#[derive(Debug, Clone)]
pub enum RunResult {
Query(QueryResult),
Mutation(MutationResult),
}
impl RunResult {
pub fn to_sdk_json(&self) -> serde_json::Value {
match self {
Self::Query(result) => result.to_sdk_json(),
Self::Mutation(result) => result.to_sdk_json(),
}
}
pub fn into_record_batches(self) -> Result<Vec<RecordBatch>> {
match self {
Self::Query(result) => Ok(result.into_batches()),
Self::Mutation(result) => Ok(vec![result.to_record_batch()?]),
}
}
}
#[cfg(test)]
mod tests {
use std::io::Cursor;
use arrow_array::Int64Array;
use arrow_ipc::reader::StreamReader;
use serde::Deserialize;
use super::*;
#[test]
fn query_result_arrow_ipc_round_trips_empty_schema() {
let schema = Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false)]));
let result = QueryResult::new(schema.clone(), vec![]);
let encoded = result.to_arrow_ipc().expect("encode empty result");
let reader = StreamReader::try_new(Cursor::new(encoded), None).expect("open stream");
assert_eq!(reader.schema().as_ref(), schema.as_ref());
assert_eq!(reader.count(), 0);
}
#[test]
fn query_result_arrow_ipc_round_trips_batches() {
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt64, false)]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(UInt64Array::from(vec![1_u64, 2_u64]))],
)
.expect("batch");
let result = QueryResult::new(schema.clone(), vec![batch]);
let encoded = result.to_arrow_ipc().expect("encode result");
let mut reader = StreamReader::try_new(Cursor::new(encoded), None).expect("open stream");
let decoded = reader.next().expect("first batch").expect("decode batch");
assert_eq!(reader.schema().as_ref(), schema.as_ref());
assert_eq!(decoded.num_rows(), 2);
assert_eq!(decoded.schema().as_ref(), schema.as_ref());
}
#[test]
fn query_result_num_rows_and_concat_cover_multiple_batches() {
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt64, false)]));
let batch1 = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(UInt64Array::from(vec![1_u64, 2_u64]))],
)
.expect("batch1");
let batch2 = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(UInt64Array::from(vec![3_u64]))],
)
.expect("batch2");
let result = QueryResult::new(schema.clone(), vec![batch1, batch2]);
assert_eq!(result.num_rows(), 3);
let concatenated = result.concat_batches().expect("concat batches");
let ids = concatenated
.column(0)
.as_any()
.downcast_ref::<UInt64Array>()
.expect("u64 ids");
assert_eq!(concatenated.schema().as_ref(), schema.as_ref());
assert_eq!(ids.values(), &[1, 2, 3]);
}
#[test]
fn query_result_concat_empty_batches_returns_empty_batch() {
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt64, false)]));
let result = QueryResult::new(schema.clone(), vec![]);
let concatenated = result.concat_batches().expect("concat empty");
assert_eq!(concatenated.schema().as_ref(), schema.as_ref());
assert_eq!(concatenated.num_rows(), 0);
}
#[test]
fn query_result_to_rust_json_preserves_wide_integers() {
let schema = Arc::new(Schema::new(vec![
Field::new("signed", DataType::Int64, false),
Field::new("unsigned", DataType::UInt64, false),
]));
let batch = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(Int64Array::from(vec![i64::MIN])),
Arc::new(UInt64Array::from(vec![u64::MAX])),
],
)
.expect("batch");
let result = QueryResult::new(schema, vec![batch]);
assert_eq!(
result.to_rust_json(),
serde_json::json!([{
"signed": i64::MIN,
"unsigned": u64::MAX,
}])
);
}
#[derive(Debug, Deserialize, PartialEq)]
struct PersonRow {
id: u64,
age: i64,
}
#[test]
fn query_result_deserialize_decodes_rust_rows() {
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::UInt64, false),
Field::new("age", DataType::Int64, false),
]));
let batch1 = RecordBatch::try_new(
schema.clone(),
vec![
Arc::new(UInt64Array::from(vec![1_u64])),
Arc::new(Int64Array::from(vec![40_i64])),
],
)
.expect("batch1");
let batch2 = RecordBatch::try_new(
schema,
vec![
Arc::new(UInt64Array::from(vec![u64::MAX])),
Arc::new(Int64Array::from(vec![-5_i64])),
],
)
.expect("batch2");
let result = QueryResult::new(batch1.schema(), vec![batch1, batch2]);
let rows: Vec<PersonRow> = result.deserialize().expect("deserialize rows");
assert_eq!(
rows,
vec![
PersonRow { id: 1, age: 40 },
PersonRow {
id: u64::MAX,
age: -5,
},
]
);
}
}

View file

@ -0,0 +1,111 @@
use crate::types::PropType;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct SchemaFile {
pub declarations: Vec<SchemaDecl>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SchemaDecl {
Interface(InterfaceDecl),
Node(NodeDecl),
Edge(EdgeDecl),
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InterfaceDecl {
pub name: String,
pub properties: Vec<PropDecl>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct NodeDecl {
pub name: String,
pub annotations: Vec<Annotation>,
pub implements: Vec<String>,
pub properties: Vec<PropDecl>,
pub constraints: Vec<Constraint>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct EdgeDecl {
pub name: String,
pub from_type: String,
pub to_type: String,
pub cardinality: Cardinality,
pub annotations: Vec<Annotation>,
pub properties: Vec<PropDecl>,
pub constraints: Vec<Constraint>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct PropDecl {
pub name: String,
pub prop_type: PropType,
pub annotations: Vec<Annotation>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Annotation {
pub name: String,
pub value: Option<String>,
}
/// A typed constraint declared in a node or edge body.
///
/// Property-level annotations (`@key`, `@unique`, `@index`) are desugared
/// into these during parsing, so both syntactic positions produce the same
/// representation.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Constraint {
Key(Vec<String>),
Unique(Vec<String>),
Index(Vec<String>),
Range {
property: String,
min: Option<ConstraintBound>,
max: Option<ConstraintBound>,
},
Check {
property: String,
pattern: String,
},
}
/// A numeric bound used in `@range` constraints.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum ConstraintBound {
Integer(i64),
Float(f64),
}
/// Edge cardinality: `@card(min..max)`. Default is `0..*`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Cardinality {
pub min: u32,
pub max: Option<u32>,
}
impl Default for Cardinality {
fn default() -> Self {
Self { min: 0, max: None }
}
}
impl Cardinality {
pub fn is_default(&self) -> bool {
self.min == 0 && self.max.is_none()
}
}
pub fn has_annotation(annotations: &[Annotation], name: &str) -> bool {
annotations.iter().any(|ann| ann.name == name)
}
pub fn annotation_value<'a>(annotations: &'a [Annotation], name: &str) -> Option<&'a str> {
annotations
.iter()
.find(|ann| ann.name == name)
.and_then(|ann| ann.value.as_deref())
}

View file

@ -0,0 +1,2 @@
pub mod ast;
pub mod parser;

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,60 @@
// Omnigraph Schema Grammar (.pg files)
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ LINE_COMMENT | BLOCK_COMMENT }
LINE_COMMENT = _{ "//" ~ (!"\n" ~ ANY)* }
BLOCK_COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
schema_file = { SOI ~ schema_decl* ~ EOI }
schema_decl = { interface_decl | node_decl | edge_decl }
// interface Named { name: String @key }
interface_decl = { "interface" ~ type_name ~ "{" ~ prop_decl* ~ "}" }
// node Person implements Named, Described { ... }
node_decl = { "node" ~ type_name ~ annotation* ~ implements_clause? ~ "{" ~ (prop_decl | body_constraint)* ~ "}" }
implements_clause = { "implements" ~ type_name ~ ("," ~ type_name)* }
// edge Knows: Person -> Person @card(0..1) { ... }
// edge Knows: Person -> Person
edge_decl = { "edge" ~ type_name ~ ":" ~ type_name ~ "->" ~ type_name ~ cardinality? ~ annotation* ~ ("{" ~ (prop_decl | body_constraint)* ~ "}")? }
// @card(0..1), @card(1..), @card(0..)
cardinality = { "@card" ~ "(" ~ integer ~ ".." ~ integer? ~ ")" }
prop_decl = { ident ~ ":" ~ type_ref ~ annotation* }
// Body-level constraints: @key(name), @unique(a, b), @index(a, b), @range(age, 0..200), @check(code, "regex")
body_constraint = { "@" ~ constraint_name ~ "(" ~ constraint_args ~ ")" }
constraint_name = { "key" | "unique" | "index" | "range" | "check" }
constraint_args = { constraint_arg ~ ("," ~ constraint_arg)* }
constraint_arg = { range_bound | literal | ident }
range_bound = { (signed_float | signed_integer) ~ ".." ~ (signed_float | signed_integer)? | ".." ~ (signed_float | signed_integer) }
type_ref = { core_type ~ "?"? }
core_type = { list_type | enum_type | vector_type | base_type }
list_type = { "[" ~ base_type ~ "]" }
enum_type = { "enum" ~ "(" ~ enum_value ~ ("," ~ enum_value)* ~ ")" }
vector_type = { "Vector" ~ "(" ~ integer ~ ")" }
enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ }
base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }
// Annotation rule excludes constraint keywords followed by "(" — those are body_constraints
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? }
annotation_arg = { literal | ident }
literal = { string_lit | float_lit | integer | bool_lit }
string_lit = @{ "\"" ~ string_char* ~ "\"" }
string_char = @{ !("\"" | "\\") ~ ANY | "\\" ~ ANY }
float_lit = @{ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
integer = @{ ASCII_DIGIT+ }
signed_float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
signed_integer = @{ "-"? ~ ASCII_DIGIT+ }
bool_lit = { "true" | "false" }
type_name = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }
ident = @{ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }

View file

@ -0,0 +1,227 @@
use arrow_schema::DataType;
use serde::{Deserialize, Serialize};
const MAX_VECTOR_DIM: u32 = i32::MAX as u32;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum ScalarType {
String,
Bool,
I32,
I64,
U32,
U64,
F32,
F64,
Date,
DateTime,
Vector(u32),
Blob,
}
impl ScalarType {
pub fn from_str_name(s: &str) -> Option<Self> {
if let Some(inner) = s.strip_prefix("Vector(").and_then(|t| t.strip_suffix(')')) {
let dim = inner.parse::<u32>().ok()?;
if dim == 0 || dim > MAX_VECTOR_DIM {
return None;
}
return Some(Self::Vector(dim));
}
match s {
"String" => Some(Self::String),
"Bool" => Some(Self::Bool),
"I32" => Some(Self::I32),
"I64" => Some(Self::I64),
"U32" => Some(Self::U32),
"U64" => Some(Self::U64),
"F32" => Some(Self::F32),
"F64" => Some(Self::F64),
"Date" => Some(Self::Date),
"DateTime" => Some(Self::DateTime),
"Blob" => Some(Self::Blob),
_ => None,
}
}
pub fn to_arrow(&self) -> DataType {
match self {
Self::String => DataType::Utf8,
Self::Bool => DataType::Boolean,
Self::I32 => DataType::Int32,
Self::I64 => DataType::Int64,
Self::U32 => DataType::UInt32,
Self::U64 => DataType::UInt64,
Self::F32 => DataType::Float32,
Self::F64 => DataType::Float64,
Self::Date => DataType::Date32,
Self::DateTime => DataType::Date64,
Self::Blob => DataType::LargeBinary,
Self::Vector(dim) => {
let dim = i32::try_from(*dim)
.expect("vector dimension exceeds Arrow FixedSizeList i32 bound");
DataType::FixedSizeList(
std::sync::Arc::new(arrow_schema::Field::new("item", DataType::Float32, true)),
dim,
)
}
}
}
pub fn is_numeric(&self) -> bool {
matches!(
self,
Self::I32 | Self::I64 | Self::U32 | Self::U64 | Self::F32 | Self::F64
)
}
}
impl std::fmt::Display for ScalarType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::String => "String",
Self::Bool => "Bool",
Self::I32 => "I32",
Self::I64 => "I64",
Self::U32 => "U32",
Self::U64 => "U64",
Self::F32 => "F32",
Self::F64 => "F64",
Self::Date => "Date",
Self::DateTime => "DateTime",
Self::Blob => "Blob",
Self::Vector(dim) => return write!(f, "Vector({})", dim),
};
write!(f, "{}", s)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct PropType {
pub scalar: ScalarType,
pub nullable: bool,
pub list: bool,
pub enum_values: Option<Vec<String>>,
}
impl PropType {
pub fn from_param_type_name(s: &str, nullable: bool) -> Option<Self> {
if let Some(inner) = s
.strip_prefix('[')
.and_then(|value| value.strip_suffix(']'))
{
let scalar = ScalarType::from_str_name(inner)?;
return Some(Self::list_of(scalar, nullable));
}
let scalar = ScalarType::from_str_name(s)?;
Some(Self::scalar(scalar, nullable))
}
pub fn scalar(scalar: ScalarType, nullable: bool) -> Self {
Self {
scalar,
nullable,
list: false,
enum_values: None,
}
}
pub fn list_of(scalar: ScalarType, nullable: bool) -> Self {
Self {
scalar,
nullable,
list: true,
enum_values: None,
}
}
pub fn enum_type(mut values: Vec<String>, nullable: bool) -> Self {
values.sort();
values.dedup();
Self {
scalar: ScalarType::String,
nullable,
list: false,
enum_values: Some(values),
}
}
pub fn is_enum(&self) -> bool {
self.enum_values.is_some()
}
pub fn to_arrow(&self) -> DataType {
let scalar_dt = self.scalar.to_arrow();
if self.list {
DataType::List(std::sync::Arc::new(arrow_schema::Field::new(
"item", scalar_dt, true,
)))
} else {
scalar_dt
}
}
pub fn display_name(&self) -> String {
let base = if let Some(values) = &self.enum_values {
format!("enum({})", values.join(", "))
} else {
self.scalar.to_string()
};
let wrapped = if self.list {
format!("[{}]", base)
} else {
base
};
if self.nullable {
format!("{}?", wrapped)
} else {
wrapped
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Direction {
Out,
In,
}
#[cfg(test)]
mod tests {
use super::*;
use arrow_schema::{DataType, Field};
use std::sync::Arc;
#[test]
fn vector_to_arrow_uses_nullable_float32_child() {
let dt = ScalarType::Vector(4).to_arrow();
assert_eq!(
dt,
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 4)
);
}
#[test]
fn scalar_type_from_str_name_rejects_vector_dimensions_outside_arrow_bounds() {
let too_large = format!("Vector({})", (i32::MAX as u64) + 1);
assert!(ScalarType::from_str_name(&too_large).is_none());
assert_eq!(
ScalarType::from_str_name("Vector(2147483647)"),
Some(ScalarType::Vector(2147483647))
);
}
#[test]
fn prop_type_from_param_type_name_supports_lists_and_nullable_scalars() {
assert_eq!(
PropType::from_param_type_name("[DateTime]", false),
Some(PropType::list_of(ScalarType::DateTime, false))
);
assert_eq!(
PropType::from_param_type_name("DateTime", true),
Some(PropType::scalar(ScalarType::DateTime, true))
);
}
}