mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-30 02:49:39 +02:00
Initial public Omnigraph repository
This commit is contained in:
commit
338289656a
110 changed files with 60747 additions and 0 deletions
594
crates/omnigraph-compiler/src/catalog/mod.rs
Normal file
594
crates/omnigraph-compiler/src/catalog/mod.rs
Normal file
|
|
@ -0,0 +1,594 @@
|
|||
pub mod schema_ir;
|
||||
pub mod schema_plan;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
|
||||
use crate::error::{NanoError, Result};
|
||||
use crate::schema::ast::{Cardinality, Constraint, ConstraintBound, SchemaDecl, SchemaFile};
|
||||
use crate::types::{PropType, ScalarType};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Catalog {
|
||||
pub node_types: HashMap<String, NodeType>,
|
||||
pub edge_types: HashMap<String, EdgeType>,
|
||||
/// Maps normalized lowercase edge name -> EdgeType key (e.g. "knows" -> "Knows")
|
||||
pub edge_name_index: HashMap<String, String>,
|
||||
/// Interface declarations (for Phase 2 polymorphic queries)
|
||||
pub interfaces: HashMap<String, InterfaceType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct InterfaceType {
|
||||
pub name: String,
|
||||
pub properties: HashMap<String, PropType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NodeType {
|
||||
pub name: String,
|
||||
/// Interface names this type implements
|
||||
pub implements: Vec<String>,
|
||||
pub properties: HashMap<String, PropType>,
|
||||
/// Key property names (from `@key` or `@key(name)`). Usually 0 or 1 element.
|
||||
pub key: Option<Vec<String>>,
|
||||
/// Uniqueness constraints (each entry is a list of column names)
|
||||
pub unique_constraints: Vec<Vec<String>>,
|
||||
/// Index declarations (each entry is a list of column names)
|
||||
pub indices: Vec<Vec<String>>,
|
||||
/// Value range constraints
|
||||
pub range_constraints: Vec<RangeConstraint>,
|
||||
/// Regex check constraints
|
||||
pub check_constraints: Vec<CheckConstraint>,
|
||||
/// Maps @embed target property -> source text property
|
||||
pub embed_sources: HashMap<String, String>,
|
||||
pub blob_properties: HashSet<String>,
|
||||
pub arrow_schema: SchemaRef,
|
||||
}
|
||||
|
||||
impl NodeType {
|
||||
/// Backward-compatible accessor: returns the first (and typically only) key property name.
|
||||
pub fn key_property(&self) -> Option<&str> {
|
||||
self.key
|
||||
.as_ref()
|
||||
.and_then(|v| v.first())
|
||||
.map(|s| s.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RangeConstraint {
|
||||
pub property: String,
|
||||
pub min: Option<LiteralValue>,
|
||||
pub max: Option<LiteralValue>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum LiteralValue {
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CheckConstraint {
|
||||
pub property: String,
|
||||
pub pattern: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EdgeType {
|
||||
pub name: String,
|
||||
pub from_type: String,
|
||||
pub to_type: String,
|
||||
pub cardinality: Cardinality,
|
||||
pub properties: HashMap<String, PropType>,
|
||||
/// Uniqueness constraints on edge columns (e.g. `@unique(src, dst)`)
|
||||
pub unique_constraints: Vec<Vec<String>>,
|
||||
/// Index declarations on edge properties
|
||||
pub indices: Vec<Vec<String>>,
|
||||
pub blob_properties: HashSet<String>,
|
||||
pub arrow_schema: SchemaRef,
|
||||
}
|
||||
|
||||
impl Catalog {
|
||||
pub fn lookup_edge_by_name(&self, name: &str) -> Option<&EdgeType> {
|
||||
if let Some(et) = self.edge_types.get(name) {
|
||||
return Some(et);
|
||||
}
|
||||
if let Some(key) = self.edge_name_index.get(&normalize_edge_name(name)) {
|
||||
return self.edge_types.get(key);
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_edge_name(name: &str) -> String {
|
||||
name.to_lowercase()
|
||||
}
|
||||
|
||||
fn bound_to_literal(b: &ConstraintBound) -> LiteralValue {
|
||||
match b {
|
||||
ConstraintBound::Integer(n) => LiteralValue::Integer(*n),
|
||||
ConstraintBound::Float(f) => LiteralValue::Float(*f),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_catalog(schema: &SchemaFile) -> Result<Catalog> {
|
||||
let mut node_types = HashMap::new();
|
||||
let mut edge_types = HashMap::new();
|
||||
let mut edge_name_index = HashMap::new();
|
||||
let mut interfaces = HashMap::new();
|
||||
|
||||
// Pass 0: collect interfaces
|
||||
for decl in &schema.declarations {
|
||||
if let SchemaDecl::Interface(iface) = decl {
|
||||
let mut properties = HashMap::new();
|
||||
for prop in &iface.properties {
|
||||
properties.insert(prop.name.clone(), prop.prop_type.clone());
|
||||
}
|
||||
interfaces.insert(
|
||||
iface.name.clone(),
|
||||
InterfaceType {
|
||||
name: iface.name.clone(),
|
||||
properties,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 1: collect node types
|
||||
for decl in &schema.declarations {
|
||||
if let SchemaDecl::Node(node) = decl {
|
||||
if node_types.contains_key(&node.name) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"duplicate node type: {}",
|
||||
node.name
|
||||
)));
|
||||
}
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
let mut embed_sources = HashMap::new();
|
||||
let mut blob_properties = HashSet::new();
|
||||
for prop in &node.properties {
|
||||
properties.insert(prop.name.clone(), prop.prop_type.clone());
|
||||
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
|
||||
blob_properties.insert(prop.name.clone());
|
||||
}
|
||||
// Extract @embed from property annotations (stays as annotation)
|
||||
if let Some(source_prop) = prop
|
||||
.annotations
|
||||
.iter()
|
||||
.find(|ann| ann.name == "embed")
|
||||
.and_then(|ann| ann.value.clone())
|
||||
{
|
||||
embed_sources.insert(prop.name.clone(), source_prop);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract constraints from the typed Constraint enum
|
||||
let mut key: Option<Vec<String>> = None;
|
||||
let mut unique_constraints = Vec::new();
|
||||
let mut indices = Vec::new();
|
||||
let mut range_constraints = Vec::new();
|
||||
let mut check_constraints = Vec::new();
|
||||
|
||||
for constraint in &node.constraints {
|
||||
match constraint {
|
||||
Constraint::Key(cols) => {
|
||||
key = Some(cols.clone());
|
||||
// @key implies index on key columns
|
||||
indices.push(cols.clone());
|
||||
}
|
||||
Constraint::Unique(cols) => {
|
||||
unique_constraints.push(cols.clone());
|
||||
}
|
||||
Constraint::Index(cols) => {
|
||||
indices.push(cols.clone());
|
||||
}
|
||||
Constraint::Range { property, min, max } => {
|
||||
range_constraints.push(RangeConstraint {
|
||||
property: property.clone(),
|
||||
min: min.as_ref().map(bound_to_literal),
|
||||
max: max.as_ref().map(bound_to_literal),
|
||||
});
|
||||
}
|
||||
Constraint::Check { property, pattern } => {
|
||||
check_constraints.push(CheckConstraint {
|
||||
property: property.clone(),
|
||||
pattern: pattern.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build Arrow schema: id: Utf8 + all properties
|
||||
let mut fields = vec![Field::new("id", DataType::Utf8, false)];
|
||||
for prop in &node.properties {
|
||||
fields.push(Field::new(
|
||||
&prop.name,
|
||||
prop.prop_type.to_arrow(),
|
||||
prop.prop_type.nullable,
|
||||
));
|
||||
}
|
||||
let arrow_schema = Arc::new(Schema::new(fields));
|
||||
|
||||
node_types.insert(
|
||||
node.name.clone(),
|
||||
NodeType {
|
||||
name: node.name.clone(),
|
||||
implements: node.implements.clone(),
|
||||
properties,
|
||||
key,
|
||||
unique_constraints,
|
||||
indices,
|
||||
range_constraints,
|
||||
check_constraints,
|
||||
embed_sources,
|
||||
blob_properties,
|
||||
arrow_schema,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2: collect edge types, validate endpoints
|
||||
for decl in &schema.declarations {
|
||||
if let SchemaDecl::Edge(edge) = decl {
|
||||
if edge_types.contains_key(&edge.name) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"duplicate edge type: {}",
|
||||
edge.name
|
||||
)));
|
||||
}
|
||||
if !node_types.contains_key(&edge.from_type) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"edge {} references unknown source type: {}",
|
||||
edge.name, edge.from_type
|
||||
)));
|
||||
}
|
||||
if !node_types.contains_key(&edge.to_type) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"edge {} references unknown target type: {}",
|
||||
edge.name, edge.to_type
|
||||
)));
|
||||
}
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
let mut blob_properties = HashSet::new();
|
||||
let mut fields = vec![
|
||||
Field::new("id", DataType::Utf8, false),
|
||||
Field::new("src", DataType::Utf8, false),
|
||||
Field::new("dst", DataType::Utf8, false),
|
||||
];
|
||||
for prop in &edge.properties {
|
||||
properties.insert(prop.name.clone(), prop.prop_type.clone());
|
||||
if matches!(prop.prop_type.scalar, ScalarType::Blob) {
|
||||
blob_properties.insert(prop.name.clone());
|
||||
}
|
||||
fields.push(Field::new(
|
||||
&prop.name,
|
||||
prop.prop_type.to_arrow(),
|
||||
prop.prop_type.nullable,
|
||||
));
|
||||
}
|
||||
|
||||
// Extract edge constraints
|
||||
let mut unique_constraints = Vec::new();
|
||||
let mut edge_indices = Vec::new();
|
||||
for constraint in &edge.constraints {
|
||||
match constraint {
|
||||
Constraint::Unique(cols) => unique_constraints.push(cols.clone()),
|
||||
Constraint::Index(cols) => edge_indices.push(cols.clone()),
|
||||
_ => {} // Key/Range/Check validated at parse time to not appear on edges
|
||||
}
|
||||
}
|
||||
|
||||
let normalized_name = normalize_edge_name(&edge.name);
|
||||
if let Some(existing) = edge_name_index.get(&normalized_name)
|
||||
&& existing != &edge.name
|
||||
{
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"edge name collision after case folding: '{}' conflicts with '{}'",
|
||||
edge.name, existing
|
||||
)));
|
||||
}
|
||||
edge_name_index.insert(normalized_name, edge.name.clone());
|
||||
|
||||
edge_types.insert(
|
||||
edge.name.clone(),
|
||||
EdgeType {
|
||||
name: edge.name.clone(),
|
||||
from_type: edge.from_type.clone(),
|
||||
to_type: edge.to_type.clone(),
|
||||
cardinality: edge.cardinality.clone(),
|
||||
properties,
|
||||
unique_constraints,
|
||||
indices: edge_indices,
|
||||
blob_properties,
|
||||
arrow_schema: Arc::new(Schema::new(fields)),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Catalog {
|
||||
node_types,
|
||||
edge_types,
|
||||
edge_name_index,
|
||||
interfaces,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::schema::ast::{EdgeDecl, NodeDecl};
|
||||
use crate::schema::parser::parse_schema;
|
||||
use crate::types::PropType;
|
||||
|
||||
fn test_schema() -> &'static str {
|
||||
r#"
|
||||
node Person {
|
||||
name: String
|
||||
age: I32?
|
||||
}
|
||||
node Company {
|
||||
name: String
|
||||
}
|
||||
edge Knows: Person -> Person {
|
||||
since: Date?
|
||||
}
|
||||
edge WorksAt: Person -> Company {
|
||||
title: String?
|
||||
}
|
||||
"#
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_catalog() {
|
||||
let schema = parse_schema(test_schema()).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
assert_eq!(catalog.node_types.len(), 2);
|
||||
assert_eq!(catalog.edge_types.len(), 2);
|
||||
assert!(catalog.node_types.contains_key("Person"));
|
||||
assert!(catalog.node_types.contains_key("Company"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_lookup() {
|
||||
let schema = parse_schema(test_schema()).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let edge = catalog.lookup_edge_by_name("knows").unwrap();
|
||||
assert_eq!(edge.from_type, "Person");
|
||||
assert_eq!(edge.to_type, "Person");
|
||||
let upper = catalog.lookup_edge_by_name("KNOWS").unwrap();
|
||||
assert_eq!(upper.name, "Knows");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_arrow_schema() {
|
||||
let schema = parse_schema(test_schema()).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let person = &catalog.node_types["Person"];
|
||||
assert_eq!(person.arrow_schema.fields().len(), 3); // id, name, age
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_duplicate_node_error() {
|
||||
let input = r#"
|
||||
node Person { name: String }
|
||||
node Person { age: I32 }
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
assert!(build_catalog(&schema).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bad_edge_endpoint() {
|
||||
let input = r#"
|
||||
node Person { name: String }
|
||||
edge Knows: Person -> Alien
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
assert!(build_catalog(&schema).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_id_fields_are_utf8() {
|
||||
let schema = parse_schema(test_schema()).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let person = &catalog.node_types["Person"];
|
||||
assert_eq!(
|
||||
person
|
||||
.arrow_schema
|
||||
.field_with_name("id")
|
||||
.unwrap()
|
||||
.data_type(),
|
||||
&DataType::Utf8
|
||||
);
|
||||
let knows = &catalog.edge_types["Knows"];
|
||||
assert_eq!(
|
||||
knows
|
||||
.arrow_schema
|
||||
.field_with_name("id")
|
||||
.unwrap()
|
||||
.data_type(),
|
||||
&DataType::Utf8
|
||||
);
|
||||
assert_eq!(
|
||||
knows
|
||||
.arrow_schema
|
||||
.field_with_name("src")
|
||||
.unwrap()
|
||||
.data_type(),
|
||||
&DataType::Utf8
|
||||
);
|
||||
assert_eq!(
|
||||
knows
|
||||
.arrow_schema
|
||||
.field_with_name("dst")
|
||||
.unwrap()
|
||||
.data_type(),
|
||||
&DataType::Utf8
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_key_property_tracking() {
|
||||
let input = r#"
|
||||
node Signal {
|
||||
slug: String @key
|
||||
title: String
|
||||
}
|
||||
node Person {
|
||||
name: String
|
||||
}
|
||||
edge Emits: Person -> Signal
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
assert_eq!(catalog.node_types["Signal"].key_property(), Some("slug"));
|
||||
assert_eq!(catalog.node_types["Person"].key_property(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_lookup_handles_non_ascii_leading_character() {
|
||||
let schema = SchemaFile {
|
||||
declarations: vec![
|
||||
SchemaDecl::Node(NodeDecl {
|
||||
name: "Person".to_string(),
|
||||
annotations: vec![],
|
||||
implements: vec![],
|
||||
properties: vec![crate::schema::ast::PropDecl {
|
||||
name: "name".to_string(),
|
||||
prop_type: PropType::scalar(ScalarType::String, false),
|
||||
annotations: vec![],
|
||||
}],
|
||||
constraints: vec![],
|
||||
}),
|
||||
SchemaDecl::Edge(EdgeDecl {
|
||||
name: "Édges".to_string(),
|
||||
from_type: "Person".to_string(),
|
||||
to_type: "Person".to_string(),
|
||||
cardinality: Default::default(),
|
||||
annotations: vec![],
|
||||
properties: vec![],
|
||||
constraints: vec![],
|
||||
}),
|
||||
],
|
||||
};
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
assert!(catalog.lookup_edge_by_name("édges").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_edge_lookup_rejects_case_fold_collisions() {
|
||||
let input = r#"
|
||||
node Person { name: String }
|
||||
edge Knows: Person -> Person
|
||||
edge KNOWS: Person -> Person
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let err = build_catalog(&schema).unwrap_err();
|
||||
assert!(err.to_string().contains("case folding"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_catalog_composite_unique() {
|
||||
let input = r#"
|
||||
node Person {
|
||||
first: String
|
||||
last: String
|
||||
@unique(first, last)
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let person = &catalog.node_types["Person"];
|
||||
assert!(
|
||||
person
|
||||
.unique_constraints
|
||||
.contains(&vec!["first".to_string(), "last".to_string()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_catalog_composite_index() {
|
||||
let input = r#"
|
||||
node Event {
|
||||
category: String
|
||||
date: Date
|
||||
@index(category, date)
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let event = &catalog.node_types["Event"];
|
||||
assert!(
|
||||
event
|
||||
.indices
|
||||
.contains(&vec!["category".to_string(), "date".to_string()])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_catalog_edge_cardinality() {
|
||||
let input = r#"
|
||||
node Person { name: String }
|
||||
node Company { name: String }
|
||||
edge WorksAt: Person -> Company @card(0..1)
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let edge = &catalog.edge_types["WorksAt"];
|
||||
assert_eq!(edge.cardinality.min, 0);
|
||||
assert_eq!(edge.cardinality.max, Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_catalog_interfaces_stored() {
|
||||
let input = r#"
|
||||
interface Named {
|
||||
name: String
|
||||
}
|
||||
node Person implements Named {
|
||||
age: I32?
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
assert!(catalog.interfaces.contains_key("Named"));
|
||||
assert!(catalog.interfaces["Named"].properties.contains_key("name"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_catalog_node_implements() {
|
||||
let input = r#"
|
||||
interface Named {
|
||||
name: String
|
||||
}
|
||||
node Person implements Named {
|
||||
age: I32?
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
assert_eq!(catalog.node_types["Person"].implements, vec!["Named"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_key_implies_index() {
|
||||
let input = r#"
|
||||
node Signal {
|
||||
slug: String @key
|
||||
title: String
|
||||
}
|
||||
"#;
|
||||
let schema = parse_schema(input).unwrap();
|
||||
let catalog = build_catalog(&schema).unwrap();
|
||||
let signal = &catalog.node_types["Signal"];
|
||||
assert!(signal.indices.contains(&vec!["slug".to_string()]));
|
||||
}
|
||||
}
|
||||
393
crates/omnigraph-compiler/src/catalog/schema_ir.rs
Normal file
393
crates/omnigraph-compiler/src/catalog/schema_ir.rs
Normal file
|
|
@ -0,0 +1,393 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
|
||||
use crate::catalog::{Catalog, build_catalog};
|
||||
use crate::error::{NanoError, Result};
|
||||
use crate::schema::ast::{Annotation, Cardinality, Constraint, PropDecl, SchemaDecl, SchemaFile};
|
||||
use crate::types::PropType;
|
||||
|
||||
const SCHEMA_IR_VERSION: u32 = 1;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SchemaIR {
|
||||
pub ir_version: u32,
|
||||
pub interfaces: Vec<InterfaceIR>,
|
||||
pub nodes: Vec<NodeIR>,
|
||||
pub edges: Vec<EdgeIR>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct InterfaceIR {
|
||||
pub name: String,
|
||||
pub type_id: u32,
|
||||
pub properties: Vec<PropertyIR>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NodeIR {
|
||||
pub name: String,
|
||||
pub type_id: u32,
|
||||
pub annotations: Vec<Annotation>,
|
||||
pub implements: Vec<String>,
|
||||
pub properties: Vec<PropertyIR>,
|
||||
pub constraints: Vec<Constraint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct EdgeIR {
|
||||
pub name: String,
|
||||
pub type_id: u32,
|
||||
pub from_type: String,
|
||||
pub to_type: String,
|
||||
pub cardinality: Cardinality,
|
||||
pub annotations: Vec<Annotation>,
|
||||
pub properties: Vec<PropertyIR>,
|
||||
pub constraints: Vec<Constraint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct PropertyIR {
|
||||
pub name: String,
|
||||
pub prop_id: u32,
|
||||
pub prop_type: PropType,
|
||||
pub annotations: Vec<Annotation>,
|
||||
}
|
||||
|
||||
pub fn build_schema_ir(schema: &SchemaFile) -> Result<SchemaIR> {
|
||||
let mut seen_type_ids = HashMap::<u32, String>::new();
|
||||
let mut interfaces = Vec::new();
|
||||
let mut nodes = Vec::new();
|
||||
let mut edges = Vec::new();
|
||||
|
||||
for decl in &schema.declarations {
|
||||
match decl {
|
||||
SchemaDecl::Interface(interface) => {
|
||||
let type_id = stable_type_id("interface", &interface.name);
|
||||
check_type_id_collision(&mut seen_type_ids, type_id, &interface.name)?;
|
||||
interfaces.push(InterfaceIR {
|
||||
name: interface.name.clone(),
|
||||
type_id,
|
||||
properties: canonical_properties(
|
||||
"interface",
|
||||
&interface.name,
|
||||
&interface.properties,
|
||||
)?,
|
||||
});
|
||||
}
|
||||
SchemaDecl::Node(node) => {
|
||||
let type_id = stable_type_id("node", &node.name);
|
||||
check_type_id_collision(&mut seen_type_ids, type_id, &node.name)?;
|
||||
nodes.push(NodeIR {
|
||||
name: node.name.clone(),
|
||||
type_id,
|
||||
annotations: canonical_annotations(&node.annotations),
|
||||
implements: canonical_strings(&node.implements),
|
||||
properties: canonical_properties("node", &node.name, &node.properties)?,
|
||||
constraints: canonical_constraints(&node.constraints),
|
||||
});
|
||||
}
|
||||
SchemaDecl::Edge(edge) => {
|
||||
let type_id = stable_type_id("edge", &edge.name);
|
||||
check_type_id_collision(&mut seen_type_ids, type_id, &edge.name)?;
|
||||
edges.push(EdgeIR {
|
||||
name: edge.name.clone(),
|
||||
type_id,
|
||||
from_type: edge.from_type.clone(),
|
||||
to_type: edge.to_type.clone(),
|
||||
cardinality: edge.cardinality.clone(),
|
||||
annotations: canonical_annotations(&edge.annotations),
|
||||
properties: canonical_properties("edge", &edge.name, &edge.properties)?,
|
||||
constraints: canonical_constraints(&edge.constraints),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interfaces.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
nodes.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
edges.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
|
||||
Ok(SchemaIR {
|
||||
ir_version: SCHEMA_IR_VERSION,
|
||||
interfaces,
|
||||
nodes,
|
||||
edges,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn build_catalog_from_ir(ir: &SchemaIR) -> Result<Catalog> {
|
||||
if ir.ir_version != SCHEMA_IR_VERSION {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"unsupported schema ir_version {} (expected {})",
|
||||
ir.ir_version, SCHEMA_IR_VERSION
|
||||
)));
|
||||
}
|
||||
|
||||
let schema = SchemaFile {
|
||||
declarations: ir
|
||||
.interfaces
|
||||
.iter()
|
||||
.map(|interface| {
|
||||
SchemaDecl::Interface(crate::schema::ast::InterfaceDecl {
|
||||
name: interface.name.clone(),
|
||||
properties: interface
|
||||
.properties
|
||||
.iter()
|
||||
.map(property_decl_from_ir)
|
||||
.collect(),
|
||||
})
|
||||
})
|
||||
.chain(ir.nodes.iter().map(|node| {
|
||||
SchemaDecl::Node(crate::schema::ast::NodeDecl {
|
||||
name: node.name.clone(),
|
||||
annotations: node.annotations.clone(),
|
||||
implements: node.implements.clone(),
|
||||
properties: node.properties.iter().map(property_decl_from_ir).collect(),
|
||||
constraints: node.constraints.clone(),
|
||||
})
|
||||
}))
|
||||
.chain(ir.edges.iter().map(|edge| {
|
||||
SchemaDecl::Edge(crate::schema::ast::EdgeDecl {
|
||||
name: edge.name.clone(),
|
||||
from_type: edge.from_type.clone(),
|
||||
to_type: edge.to_type.clone(),
|
||||
cardinality: edge.cardinality.clone(),
|
||||
annotations: edge.annotations.clone(),
|
||||
properties: edge.properties.iter().map(property_decl_from_ir).collect(),
|
||||
constraints: edge.constraints.clone(),
|
||||
})
|
||||
}))
|
||||
.collect(),
|
||||
};
|
||||
|
||||
build_catalog(&schema)
|
||||
}
|
||||
|
||||
pub fn schema_ir_json(ir: &SchemaIR) -> Result<String> {
|
||||
serde_json::to_string(ir)
|
||||
.map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
|
||||
}
|
||||
|
||||
pub fn schema_ir_pretty_json(ir: &SchemaIR) -> Result<String> {
|
||||
serde_json::to_string_pretty(ir)
|
||||
.map_err(|err| NanoError::Catalog(format!("serialize schema ir error: {}", err)))
|
||||
}
|
||||
|
||||
pub fn schema_ir_hash(ir: &SchemaIR) -> Result<String> {
|
||||
let json = schema_ir_json(ir)?;
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(json.as_bytes());
|
||||
Ok(format!("sha256:{:x}", hasher.finalize()))
|
||||
}
|
||||
|
||||
fn property_decl_from_ir(property: &PropertyIR) -> PropDecl {
|
||||
PropDecl {
|
||||
name: property.name.clone(),
|
||||
prop_type: property.prop_type.clone(),
|
||||
annotations: property.annotations.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn canonical_strings(values: &[String]) -> Vec<String> {
|
||||
let mut values = values.to_vec();
|
||||
values.sort();
|
||||
values.dedup();
|
||||
values
|
||||
}
|
||||
|
||||
fn canonical_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
|
||||
let mut annotations = annotations.to_vec();
|
||||
annotations.sort_by(|left, right| {
|
||||
left.name
|
||||
.cmp(&right.name)
|
||||
.then_with(|| left.value.cmp(&right.value))
|
||||
});
|
||||
annotations
|
||||
}
|
||||
|
||||
fn canonical_prop_type(prop_type: &PropType) -> PropType {
|
||||
let mut normalized = prop_type.clone();
|
||||
if let Some(values) = &mut normalized.enum_values {
|
||||
values.sort();
|
||||
values.dedup();
|
||||
}
|
||||
normalized
|
||||
}
|
||||
|
||||
fn canonical_properties(
|
||||
kind: &str,
|
||||
owner_name: &str,
|
||||
properties: &[PropDecl],
|
||||
) -> Result<Vec<PropertyIR>> {
|
||||
let mut seen_prop_ids = HashMap::<u32, String>::new();
|
||||
let owner_key = format!("{}:{}", kind, owner_name);
|
||||
let mut canonical = properties
|
||||
.iter()
|
||||
.map(|property| {
|
||||
let prop_id = stable_prop_id(&owner_key, &property.name);
|
||||
if let Some(previous) = seen_prop_ids.insert(prop_id, property.name.clone()) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"property id collision on {}: '{}' and '{}' both hash to {}",
|
||||
owner_name, previous, property.name, prop_id
|
||||
)));
|
||||
}
|
||||
Ok(PropertyIR {
|
||||
name: property.name.clone(),
|
||||
prop_id,
|
||||
prop_type: canonical_prop_type(&property.prop_type),
|
||||
annotations: canonical_annotations(&property.annotations),
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
canonical.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
Ok(canonical)
|
||||
}
|
||||
|
||||
fn canonical_constraints(constraints: &[Constraint]) -> Vec<Constraint> {
|
||||
let mut constraints = constraints
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(normalize_constraint)
|
||||
.collect::<Vec<_>>();
|
||||
constraints.sort_by_key(constraint_sort_key);
|
||||
constraints
|
||||
}
|
||||
|
||||
fn normalize_constraint(constraint: Constraint) -> Constraint {
|
||||
match constraint {
|
||||
Constraint::Key(mut columns) => {
|
||||
columns.sort();
|
||||
Constraint::Key(columns)
|
||||
}
|
||||
Constraint::Unique(mut columns) => {
|
||||
columns.sort();
|
||||
Constraint::Unique(columns)
|
||||
}
|
||||
Constraint::Index(mut columns) => {
|
||||
columns.sort();
|
||||
Constraint::Index(columns)
|
||||
}
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
fn constraint_sort_key(constraint: &Constraint) -> String {
|
||||
match constraint {
|
||||
Constraint::Key(columns) => format!("key:{}", columns.join(",")),
|
||||
Constraint::Unique(columns) => format!("unique:{}", columns.join(",")),
|
||||
Constraint::Index(columns) => format!("index:{}", columns.join(",")),
|
||||
Constraint::Range { property, min, max } => {
|
||||
format!("range:{}:{:?}:{:?}", property, min, max)
|
||||
}
|
||||
Constraint::Check { property, pattern } => format!("check:{}:{}", property, pattern),
|
||||
}
|
||||
}
|
||||
|
||||
fn stable_type_id(kind: &str, name: &str) -> u32 {
|
||||
fnv1a_u32(&format!("{}:{}", kind, name))
|
||||
}
|
||||
|
||||
fn stable_prop_id(owner: &str, name: &str) -> u32 {
|
||||
fnv1a_u32(&format!("{}:{}", owner, name))
|
||||
}
|
||||
|
||||
fn fnv1a_u32(value: &str) -> u32 {
|
||||
let mut hash: u32 = 2_166_136_261;
|
||||
for byte in value.bytes() {
|
||||
hash ^= u32::from(byte);
|
||||
hash = hash.wrapping_mul(16_777_619);
|
||||
}
|
||||
if hash == 0 { 1 } else { hash }
|
||||
}
|
||||
|
||||
fn check_type_id_collision(
|
||||
seen_type_ids: &mut HashMap<u32, String>,
|
||||
type_id: u32,
|
||||
name: &str,
|
||||
) -> Result<()> {
|
||||
if let Some(previous) = seen_type_ids.insert(type_id, name.to_string()) {
|
||||
return Err(NanoError::Catalog(format!(
|
||||
"type id collision: '{}' and '{}' both hash to {}",
|
||||
previous, name, type_id
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::catalog::build_catalog;
|
||||
use crate::schema::parser::parse_schema;
|
||||
|
||||
#[test]
|
||||
fn schema_ir_hash_is_stable_across_source_ordering_noise() {
|
||||
let schema_a = parse_schema(
|
||||
r#"
|
||||
node Person {
|
||||
age: I32?
|
||||
name: String @key
|
||||
}
|
||||
|
||||
edge Knows: Person -> Person {
|
||||
since: Date?
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let schema_b = parse_schema(
|
||||
r#"
|
||||
edge Knows: Person -> Person {
|
||||
since: Date?
|
||||
}
|
||||
|
||||
node Person {
|
||||
name: String @key
|
||||
age: I32?
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let ir_a = build_schema_ir(&schema_a).unwrap();
|
||||
let ir_b = build_schema_ir(&schema_b).unwrap();
|
||||
assert_eq!(ir_a, ir_b);
|
||||
assert_eq!(
|
||||
schema_ir_hash(&ir_a).unwrap(),
|
||||
schema_ir_hash(&ir_b).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn build_catalog_from_ir_round_trips_core_catalog_fields() {
|
||||
let schema = parse_schema(
|
||||
r#"
|
||||
node Person @description("person") {
|
||||
name: String @key
|
||||
age: I32? @description("age")
|
||||
}
|
||||
|
||||
edge Knows: Person -> Person @instruction("friendship") {
|
||||
since: Date?
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let direct = build_catalog(&schema).unwrap();
|
||||
let ir = build_schema_ir(&schema).unwrap();
|
||||
let rebuilt = build_catalog_from_ir(&ir).unwrap();
|
||||
|
||||
assert_eq!(direct.node_types.len(), rebuilt.node_types.len());
|
||||
assert_eq!(direct.edge_types.len(), rebuilt.edge_types.len());
|
||||
assert_eq!(
|
||||
direct.node_types["Person"].key_property(),
|
||||
rebuilt.node_types["Person"].key_property()
|
||||
);
|
||||
assert_eq!(
|
||||
direct.edge_types["Knows"].cardinality,
|
||||
rebuilt.edge_types["Knows"].cardinality
|
||||
);
|
||||
}
|
||||
}
|
||||
895
crates/omnigraph-compiler/src/catalog/schema_plan.rs
Normal file
895
crates/omnigraph-compiler/src/catalog/schema_plan.rs
Normal file
|
|
@ -0,0 +1,895 @@
|
|||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::schema::ast::{Annotation, Constraint};
|
||||
use crate::types::PropType;
|
||||
|
||||
use super::schema_ir::{EdgeIR, InterfaceIR, NodeIR, PropertyIR, SchemaIR};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SchemaTypeKind {
|
||||
Interface,
|
||||
Node,
|
||||
Edge,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SchemaMigrationPlan {
|
||||
pub supported: bool,
|
||||
pub steps: Vec<SchemaMigrationStep>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum SchemaMigrationStep {
|
||||
AddType {
|
||||
type_kind: SchemaTypeKind,
|
||||
name: String,
|
||||
},
|
||||
RenameType {
|
||||
type_kind: SchemaTypeKind,
|
||||
from: String,
|
||||
to: String,
|
||||
},
|
||||
AddProperty {
|
||||
type_kind: SchemaTypeKind,
|
||||
type_name: String,
|
||||
property_name: String,
|
||||
property_type: PropType,
|
||||
},
|
||||
RenameProperty {
|
||||
type_kind: SchemaTypeKind,
|
||||
type_name: String,
|
||||
from: String,
|
||||
to: String,
|
||||
},
|
||||
AddConstraint {
|
||||
type_kind: SchemaTypeKind,
|
||||
type_name: String,
|
||||
constraint: Constraint,
|
||||
},
|
||||
UpdateTypeMetadata {
|
||||
type_kind: SchemaTypeKind,
|
||||
name: String,
|
||||
annotations: Vec<Annotation>,
|
||||
},
|
||||
UpdatePropertyMetadata {
|
||||
type_kind: SchemaTypeKind,
|
||||
type_name: String,
|
||||
property_name: String,
|
||||
annotations: Vec<Annotation>,
|
||||
},
|
||||
UnsupportedChange {
|
||||
entity: String,
|
||||
reason: String,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn plan_schema_migration(
|
||||
accepted: &SchemaIR,
|
||||
desired: &SchemaIR,
|
||||
) -> Result<SchemaMigrationPlan> {
|
||||
let mut steps = Vec::new();
|
||||
let interface_renames = plan_interfaces(&accepted.interfaces, &desired.interfaces, &mut steps);
|
||||
let node_renames = plan_nodes(
|
||||
&accepted.nodes,
|
||||
&desired.nodes,
|
||||
&interface_renames,
|
||||
&mut steps,
|
||||
);
|
||||
plan_edges(&accepted.edges, &desired.edges, &node_renames, &mut steps);
|
||||
|
||||
Ok(SchemaMigrationPlan {
|
||||
supported: !steps
|
||||
.iter()
|
||||
.any(|step| matches!(step, SchemaMigrationStep::UnsupportedChange { .. })),
|
||||
steps,
|
||||
})
|
||||
}
|
||||
|
||||
fn plan_interfaces(
|
||||
accepted: &[InterfaceIR],
|
||||
desired: &[InterfaceIR],
|
||||
steps: &mut Vec<SchemaMigrationStep>,
|
||||
) -> HashMap<String, String> {
|
||||
let accepted_by_name = accepted
|
||||
.iter()
|
||||
.map(|interface| (interface.name.as_str(), interface))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let mut consumed = HashSet::new();
|
||||
|
||||
for interface in desired {
|
||||
if let Some(existing) = accepted_by_name.get(interface.name.as_str()) {
|
||||
consumed.insert(existing.name.clone());
|
||||
let _property_renames = plan_properties(
|
||||
SchemaTypeKind::Interface,
|
||||
&interface.name,
|
||||
&existing.properties,
|
||||
&interface.properties,
|
||||
steps,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
steps.push(SchemaMigrationStep::AddType {
|
||||
type_kind: SchemaTypeKind::Interface,
|
||||
name: interface.name.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
for leftover in accepted
|
||||
.iter()
|
||||
.filter(|interface| !consumed.contains(&interface.name))
|
||||
{
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("interface:{}", leftover.name),
|
||||
reason: format!(
|
||||
"removing interface '{}' is not supported in schema migration v1",
|
||||
leftover.name
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
HashMap::new()
|
||||
}
|
||||
|
||||
fn plan_nodes(
|
||||
accepted: &[NodeIR],
|
||||
desired: &[NodeIR],
|
||||
interface_renames: &HashMap<String, String>,
|
||||
steps: &mut Vec<SchemaMigrationStep>,
|
||||
) -> HashMap<String, String> {
|
||||
let accepted_by_name = accepted
|
||||
.iter()
|
||||
.map(|node| (node.name.as_str(), node))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let mut consumed = HashSet::new();
|
||||
let mut renames = HashMap::new();
|
||||
|
||||
for node in desired {
|
||||
let rename_from = rename_from_value(&node.annotations);
|
||||
let matched = accepted_by_name
|
||||
.get(node.name.as_str())
|
||||
.copied()
|
||||
.or_else(|| {
|
||||
rename_from.and_then(|from| {
|
||||
accepted_by_name
|
||||
.get(from)
|
||||
.copied()
|
||||
.filter(|candidate| candidate.name != node.name)
|
||||
})
|
||||
});
|
||||
|
||||
let Some(existing) = matched else {
|
||||
if let Some(from) = rename_from {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("node:{}", node.name),
|
||||
reason: format!(
|
||||
"node '{}' declares @rename_from(\"{}\") but no accepted node with that name exists",
|
||||
node.name, from
|
||||
),
|
||||
});
|
||||
} else {
|
||||
steps.push(SchemaMigrationStep::AddType {
|
||||
type_kind: SchemaTypeKind::Node,
|
||||
name: node.name.clone(),
|
||||
});
|
||||
}
|
||||
continue;
|
||||
};
|
||||
|
||||
consumed.insert(existing.name.clone());
|
||||
if existing.name != node.name {
|
||||
renames.insert(existing.name.clone(), node.name.clone());
|
||||
steps.push(SchemaMigrationStep::RenameType {
|
||||
type_kind: SchemaTypeKind::Node,
|
||||
from: existing.name.clone(),
|
||||
to: node.name.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
if normalize_strings(&existing.implements, interface_renames)
|
||||
!= normalize_strings(&node.implements, &HashMap::new())
|
||||
{
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("node:{}", node.name),
|
||||
reason: format!(
|
||||
"changing implemented interfaces on node '{}' is not supported in schema migration v1",
|
||||
node.name
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
plan_type_metadata(
|
||||
SchemaTypeKind::Node,
|
||||
&node.name,
|
||||
&existing.annotations,
|
||||
&node.annotations,
|
||||
steps,
|
||||
);
|
||||
let property_renames = plan_properties(
|
||||
SchemaTypeKind::Node,
|
||||
&node.name,
|
||||
&existing.properties,
|
||||
&node.properties,
|
||||
steps,
|
||||
);
|
||||
plan_constraints(
|
||||
SchemaTypeKind::Node,
|
||||
&node.name,
|
||||
&existing.constraints,
|
||||
&node.constraints,
|
||||
&property_renames,
|
||||
steps,
|
||||
);
|
||||
}
|
||||
|
||||
for leftover in accepted
|
||||
.iter()
|
||||
.filter(|node| !consumed.contains(&node.name))
|
||||
{
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("node:{}", leftover.name),
|
||||
reason: format!(
|
||||
"removing node type '{}' is not supported in schema migration v1",
|
||||
leftover.name
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
renames
|
||||
}
|
||||
|
||||
fn plan_edges(
|
||||
accepted: &[EdgeIR],
|
||||
desired: &[EdgeIR],
|
||||
node_renames: &HashMap<String, String>,
|
||||
steps: &mut Vec<SchemaMigrationStep>,
|
||||
) {
|
||||
let accepted_by_name = accepted
|
||||
.iter()
|
||||
.map(|edge| (edge.name.as_str(), edge))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let mut consumed = HashSet::new();
|
||||
|
||||
for edge in desired {
|
||||
let rename_from = rename_from_value(&edge.annotations);
|
||||
let matched = accepted_by_name
|
||||
.get(edge.name.as_str())
|
||||
.copied()
|
||||
.or_else(|| {
|
||||
rename_from.and_then(|from| {
|
||||
accepted_by_name
|
||||
.get(from)
|
||||
.copied()
|
||||
.filter(|candidate| candidate.name != edge.name)
|
||||
})
|
||||
});
|
||||
|
||||
let Some(existing) = matched else {
|
||||
if let Some(from) = rename_from {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("edge:{}", edge.name),
|
||||
reason: format!(
|
||||
"edge '{}' declares @rename_from(\"{}\") but no accepted edge with that name exists",
|
||||
edge.name, from
|
||||
),
|
||||
});
|
||||
} else {
|
||||
steps.push(SchemaMigrationStep::AddType {
|
||||
type_kind: SchemaTypeKind::Edge,
|
||||
name: edge.name.clone(),
|
||||
});
|
||||
}
|
||||
continue;
|
||||
};
|
||||
|
||||
consumed.insert(existing.name.clone());
|
||||
if existing.name != edge.name {
|
||||
steps.push(SchemaMigrationStep::RenameType {
|
||||
type_kind: SchemaTypeKind::Edge,
|
||||
from: existing.name.clone(),
|
||||
to: edge.name.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let normalized_from = normalize_type_ref(&existing.from_type, node_renames);
|
||||
let normalized_to = normalize_type_ref(&existing.to_type, node_renames);
|
||||
if normalized_from != edge.from_type || normalized_to != edge.to_type {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("edge:{}", edge.name),
|
||||
reason: format!(
|
||||
"changing edge endpoints on '{}' is not supported in schema migration v1",
|
||||
edge.name
|
||||
),
|
||||
});
|
||||
}
|
||||
if existing.cardinality != edge.cardinality {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("edge:{}", edge.name),
|
||||
reason: format!(
|
||||
"changing cardinality on edge '{}' is not supported in schema migration v1",
|
||||
edge.name
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
plan_type_metadata(
|
||||
SchemaTypeKind::Edge,
|
||||
&edge.name,
|
||||
&existing.annotations,
|
||||
&edge.annotations,
|
||||
steps,
|
||||
);
|
||||
let property_renames = plan_properties(
|
||||
SchemaTypeKind::Edge,
|
||||
&edge.name,
|
||||
&existing.properties,
|
||||
&edge.properties,
|
||||
steps,
|
||||
);
|
||||
plan_constraints(
|
||||
SchemaTypeKind::Edge,
|
||||
&edge.name,
|
||||
&existing.constraints,
|
||||
&edge.constraints,
|
||||
&property_renames,
|
||||
steps,
|
||||
);
|
||||
}
|
||||
|
||||
for leftover in accepted
|
||||
.iter()
|
||||
.filter(|edge| !consumed.contains(&edge.name))
|
||||
{
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("edge:{}", leftover.name),
|
||||
reason: format!(
|
||||
"removing edge type '{}' is not supported in schema migration v1",
|
||||
leftover.name
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn plan_properties(
|
||||
type_kind: SchemaTypeKind,
|
||||
type_name: &str,
|
||||
accepted: &[PropertyIR],
|
||||
desired: &[PropertyIR],
|
||||
steps: &mut Vec<SchemaMigrationStep>,
|
||||
) -> HashMap<String, String> {
|
||||
let accepted_by_name = accepted
|
||||
.iter()
|
||||
.map(|property| (property.name.as_str(), property))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let mut consumed = HashSet::new();
|
||||
let mut renames = HashMap::new();
|
||||
|
||||
for property in desired {
|
||||
let rename_from = rename_from_value(&property.annotations);
|
||||
let matched = accepted_by_name
|
||||
.get(property.name.as_str())
|
||||
.copied()
|
||||
.or_else(|| {
|
||||
rename_from.and_then(|from| {
|
||||
accepted_by_name
|
||||
.get(from)
|
||||
.copied()
|
||||
.filter(|candidate| candidate.name != property.name)
|
||||
})
|
||||
});
|
||||
|
||||
let Some(existing) = matched else {
|
||||
if let Some(from) = rename_from {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!(
|
||||
"{}:{}.{}",
|
||||
schema_type_kind_key(type_kind),
|
||||
type_name,
|
||||
property.name
|
||||
),
|
||||
reason: format!(
|
||||
"property '{}.{}' declares @rename_from(\"{}\") but no accepted property with that name exists",
|
||||
type_name, property.name, from
|
||||
),
|
||||
});
|
||||
} else if property.prop_type.nullable {
|
||||
steps.push(SchemaMigrationStep::AddProperty {
|
||||
type_kind,
|
||||
type_name: type_name.to_string(),
|
||||
property_name: property.name.clone(),
|
||||
property_type: property.prop_type.clone(),
|
||||
});
|
||||
} else {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!(
|
||||
"{}:{}.{}",
|
||||
schema_type_kind_key(type_kind),
|
||||
type_name,
|
||||
property.name
|
||||
),
|
||||
reason: format!(
|
||||
"adding required property '{}.{}' requires a backfill and is not supported in schema migration v1",
|
||||
type_name, property.name
|
||||
),
|
||||
});
|
||||
}
|
||||
continue;
|
||||
};
|
||||
|
||||
consumed.insert(existing.name.clone());
|
||||
if existing.name != property.name {
|
||||
renames.insert(existing.name.clone(), property.name.clone());
|
||||
steps.push(SchemaMigrationStep::RenameProperty {
|
||||
type_kind,
|
||||
type_name: type_name.to_string(),
|
||||
from: existing.name.clone(),
|
||||
to: property.name.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
if existing.prop_type != property.prop_type {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!(
|
||||
"{}:{}.{}",
|
||||
schema_type_kind_key(type_kind),
|
||||
type_name,
|
||||
property.name
|
||||
),
|
||||
reason: format!(
|
||||
"changing property type for '{}.{}' is not supported in schema migration v1",
|
||||
type_name, property.name
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
plan_property_metadata(
|
||||
type_kind,
|
||||
type_name,
|
||||
&property.name,
|
||||
&existing.annotations,
|
||||
&property.annotations,
|
||||
steps,
|
||||
);
|
||||
}
|
||||
|
||||
for leftover in accepted
|
||||
.iter()
|
||||
.filter(|property| !consumed.contains(&property.name))
|
||||
{
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!(
|
||||
"{}:{}.{}",
|
||||
schema_type_kind_key(type_kind),
|
||||
type_name,
|
||||
leftover.name
|
||||
),
|
||||
reason: format!(
|
||||
"removing property '{}.{}' is not supported in schema migration v1",
|
||||
type_name, leftover.name
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
renames
|
||||
}
|
||||
|
||||
fn plan_constraints(
|
||||
type_kind: SchemaTypeKind,
|
||||
type_name: &str,
|
||||
accepted: &[Constraint],
|
||||
desired: &[Constraint],
|
||||
property_renames: &HashMap<String, String>,
|
||||
steps: &mut Vec<SchemaMigrationStep>,
|
||||
) {
|
||||
let accepted = accepted
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|constraint| rename_constraint_properties(constraint, property_renames))
|
||||
.collect::<Vec<_>>();
|
||||
let desired_map = desired
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|constraint| (constraint_key(&constraint), constraint))
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
let accepted_map = accepted
|
||||
.into_iter()
|
||||
.map(|constraint| (constraint_key(&constraint), constraint))
|
||||
.collect::<BTreeMap<_, _>>();
|
||||
|
||||
let removed = accepted_map
|
||||
.keys()
|
||||
.filter(|key| !desired_map.contains_key(*key))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
if !removed.is_empty() {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("{}:{}", schema_type_kind_key(type_kind), type_name),
|
||||
reason: format!(
|
||||
"removing constraints from '{}' is not supported in schema migration v1",
|
||||
type_name
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
for (key, constraint) in desired_map {
|
||||
if accepted_map.contains_key(&key) {
|
||||
continue;
|
||||
}
|
||||
match constraint {
|
||||
Constraint::Index(_) => steps.push(SchemaMigrationStep::AddConstraint {
|
||||
type_kind,
|
||||
type_name: type_name.to_string(),
|
||||
constraint,
|
||||
}),
|
||||
_ => steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("{}:{}", schema_type_kind_key(type_kind), type_name),
|
||||
reason: format!(
|
||||
"adding constraint '{}' to '{}' is not supported in schema migration v1",
|
||||
key, type_name
|
||||
),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn plan_type_metadata(
|
||||
type_kind: SchemaTypeKind,
|
||||
name: &str,
|
||||
accepted: &[Annotation],
|
||||
desired: &[Annotation],
|
||||
steps: &mut Vec<SchemaMigrationStep>,
|
||||
) {
|
||||
match annotation_change_kind(accepted, desired) {
|
||||
AnnotationChangeKind::None => {}
|
||||
AnnotationChangeKind::MetadataOnly(metadata) => {
|
||||
steps.push(SchemaMigrationStep::UpdateTypeMetadata {
|
||||
type_kind,
|
||||
name: name.to_string(),
|
||||
annotations: metadata,
|
||||
});
|
||||
}
|
||||
AnnotationChangeKind::Unsupported(reason) => {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!("{}:{}", schema_type_kind_key(type_kind), name),
|
||||
reason,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn plan_property_metadata(
|
||||
type_kind: SchemaTypeKind,
|
||||
type_name: &str,
|
||||
property_name: &str,
|
||||
accepted: &[Annotation],
|
||||
desired: &[Annotation],
|
||||
steps: &mut Vec<SchemaMigrationStep>,
|
||||
) {
|
||||
match annotation_change_kind(accepted, desired) {
|
||||
AnnotationChangeKind::None => {}
|
||||
AnnotationChangeKind::MetadataOnly(metadata) => {
|
||||
steps.push(SchemaMigrationStep::UpdatePropertyMetadata {
|
||||
type_kind,
|
||||
type_name: type_name.to_string(),
|
||||
property_name: property_name.to_string(),
|
||||
annotations: metadata,
|
||||
});
|
||||
}
|
||||
AnnotationChangeKind::Unsupported(reason) => {
|
||||
steps.push(SchemaMigrationStep::UnsupportedChange {
|
||||
entity: format!(
|
||||
"{}:{}.{}",
|
||||
schema_type_kind_key(type_kind),
|
||||
type_name,
|
||||
property_name
|
||||
),
|
||||
reason,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum AnnotationChangeKind {
|
||||
None,
|
||||
MetadataOnly(Vec<Annotation>),
|
||||
Unsupported(String),
|
||||
}
|
||||
|
||||
fn annotation_change_kind(accepted: &[Annotation], desired: &[Annotation]) -> AnnotationChangeKind {
|
||||
let accepted_non_metadata = strip_metadata_annotations(accepted);
|
||||
let desired_non_metadata = strip_metadata_annotations(desired);
|
||||
if accepted_non_metadata != desired_non_metadata {
|
||||
return AnnotationChangeKind::Unsupported(
|
||||
"changing annotations beyond @description/@instruction is not supported in schema migration v1"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let accepted_metadata = metadata_annotations(accepted);
|
||||
let desired_metadata = metadata_annotations(desired);
|
||||
if accepted_metadata == desired_metadata {
|
||||
AnnotationChangeKind::None
|
||||
} else {
|
||||
AnnotationChangeKind::MetadataOnly(desired_metadata)
|
||||
}
|
||||
}
|
||||
|
||||
fn strip_metadata_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
|
||||
annotations
|
||||
.iter()
|
||||
.filter(|annotation| {
|
||||
!matches!(
|
||||
annotation.name.as_str(),
|
||||
"description" | "instruction" | "rename_from" | "key" | "unique" | "index"
|
||||
)
|
||||
})
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn metadata_annotations(annotations: &[Annotation]) -> Vec<Annotation> {
|
||||
annotations
|
||||
.iter()
|
||||
.filter(|annotation| matches!(annotation.name.as_str(), "description" | "instruction"))
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn normalize_strings(values: &[String], renames: &HashMap<String, String>) -> BTreeSet<String> {
|
||||
values
|
||||
.iter()
|
||||
.map(|value| normalize_type_ref(value, renames))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn normalize_type_ref(value: &str, renames: &HashMap<String, String>) -> String {
|
||||
renames
|
||||
.get(value)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| value.to_string())
|
||||
}
|
||||
|
||||
fn rename_constraint_properties(
|
||||
constraint: Constraint,
|
||||
property_renames: &HashMap<String, String>,
|
||||
) -> Constraint {
|
||||
match constraint {
|
||||
Constraint::Key(columns) => {
|
||||
Constraint::Key(rename_constraint_columns(columns, property_renames))
|
||||
}
|
||||
Constraint::Unique(columns) => {
|
||||
Constraint::Unique(rename_constraint_columns(columns, property_renames))
|
||||
}
|
||||
Constraint::Index(columns) => {
|
||||
Constraint::Index(rename_constraint_columns(columns, property_renames))
|
||||
}
|
||||
Constraint::Range { property, min, max } => Constraint::Range {
|
||||
property: normalize_property_ref(&property, property_renames),
|
||||
min,
|
||||
max,
|
||||
},
|
||||
Constraint::Check { property, pattern } => Constraint::Check {
|
||||
property: normalize_property_ref(&property, property_renames),
|
||||
pattern,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn rename_constraint_columns(
|
||||
columns: Vec<String>,
|
||||
property_renames: &HashMap<String, String>,
|
||||
) -> Vec<String> {
|
||||
let mut columns = columns
|
||||
.into_iter()
|
||||
.map(|column| normalize_property_ref(&column, property_renames))
|
||||
.collect::<Vec<_>>();
|
||||
columns.sort();
|
||||
columns
|
||||
}
|
||||
|
||||
fn normalize_property_ref(value: &str, renames: &HashMap<String, String>) -> String {
|
||||
renames
|
||||
.get(value)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| value.to_string())
|
||||
}
|
||||
|
||||
fn constraint_key(constraint: &Constraint) -> String {
|
||||
match constraint {
|
||||
Constraint::Key(columns) => format!("key:{}", columns.join(",")),
|
||||
Constraint::Unique(columns) => format!("unique:{}", columns.join(",")),
|
||||
Constraint::Index(columns) => format!("index:{}", columns.join(",")),
|
||||
Constraint::Range { property, min, max } => {
|
||||
format!("range:{}:{:?}:{:?}", property, min, max)
|
||||
}
|
||||
Constraint::Check { property, pattern } => format!("check:{}:{}", property, pattern),
|
||||
}
|
||||
}
|
||||
|
||||
fn rename_from_value(annotations: &[Annotation]) -> Option<&str> {
|
||||
annotations
|
||||
.iter()
|
||||
.find(|annotation| annotation.name == "rename_from")
|
||||
.and_then(|annotation| annotation.value.as_deref())
|
||||
}
|
||||
|
||||
fn schema_type_kind_key(kind: SchemaTypeKind) -> &'static str {
|
||||
match kind {
|
||||
SchemaTypeKind::Interface => "interface",
|
||||
SchemaTypeKind::Node => "node",
|
||||
SchemaTypeKind::Edge => "edge",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::catalog::schema_ir::build_schema_ir;
|
||||
use crate::schema::parser::parse_schema;
|
||||
|
||||
use super::SchemaMigrationStep::{
|
||||
AddConstraint, AddProperty, RenameProperty, RenameType, UnsupportedChange,
|
||||
UpdateTypeMetadata,
|
||||
};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn plan_supports_additive_nullable_property_and_index() {
|
||||
let accepted = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Person {
|
||||
name: String @key
|
||||
age: I32?
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let desired = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Person {
|
||||
name: String @key
|
||||
age: I32? @index
|
||||
nickname: String?
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let plan = plan_schema_migration(&accepted, &desired).unwrap();
|
||||
assert!(plan.supported);
|
||||
assert!(plan.steps.contains(&AddProperty {
|
||||
type_kind: SchemaTypeKind::Node,
|
||||
type_name: "Person".to_string(),
|
||||
property_name: "nickname".to_string(),
|
||||
property_type: PropType::scalar(crate::types::ScalarType::String, true),
|
||||
}));
|
||||
assert!(plan.steps.contains(&AddConstraint {
|
||||
type_kind: SchemaTypeKind::Node,
|
||||
type_name: "Person".to_string(),
|
||||
constraint: Constraint::Index(vec!["age".to_string()]),
|
||||
}));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_supports_explicit_type_and_property_rename() {
|
||||
let accepted = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node User {
|
||||
name: String @key
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let desired = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Account @rename_from("User") {
|
||||
full_name: String @key @rename_from("name")
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let plan = plan_schema_migration(&accepted, &desired).unwrap();
|
||||
assert!(plan.supported);
|
||||
assert!(plan.steps.contains(&RenameType {
|
||||
type_kind: SchemaTypeKind::Node,
|
||||
from: "User".to_string(),
|
||||
to: "Account".to_string(),
|
||||
}));
|
||||
assert!(plan.steps.contains(&RenameProperty {
|
||||
type_kind: SchemaTypeKind::Node,
|
||||
type_name: "Account".to_string(),
|
||||
from: "name".to_string(),
|
||||
to: "full_name".to_string(),
|
||||
}));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_rejects_required_property_addition() {
|
||||
let accepted = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Person {
|
||||
name: String @key
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let desired = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Person {
|
||||
name: String @key
|
||||
age: I32
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let plan = plan_schema_migration(&accepted, &desired).unwrap();
|
||||
assert!(!plan.supported);
|
||||
assert!(plan.steps.iter().any(|step| matches!(
|
||||
step,
|
||||
UnsupportedChange { entity, reason }
|
||||
if entity.contains("Person.age")
|
||||
&& reason.contains("adding required property")
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_supports_metadata_only_annotation_changes() {
|
||||
let accepted = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Person @description("old") {
|
||||
name: String @key
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let desired = build_schema_ir(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Person @description("new") {
|
||||
name: String @key
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let plan = plan_schema_migration(&accepted, &desired).unwrap();
|
||||
assert!(plan.supported);
|
||||
assert!(plan.steps.contains(&UpdateTypeMetadata {
|
||||
type_kind: SchemaTypeKind::Node,
|
||||
name: "Person".to_string(),
|
||||
annotations: vec![Annotation {
|
||||
name: "description".to_string(),
|
||||
value: Some("new".to_string()),
|
||||
}],
|
||||
}));
|
||||
}
|
||||
}
|
||||
379
crates/omnigraph-compiler/src/embedding.rs
Normal file
379
crates/omnigraph-compiler/src/embedding.rs
Normal file
|
|
@ -0,0 +1,379 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use reqwest::Client;
|
||||
use serde::Deserialize;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::error::{NanoError, Result};
|
||||
|
||||
const DEFAULT_EMBED_MODEL: &str = "text-embedding-3-small";
|
||||
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 30_000;
|
||||
const DEFAULT_RETRY_ATTEMPTS: usize = 4;
|
||||
const DEFAULT_RETRY_BACKOFF_MS: u64 = 200;
|
||||
|
||||
#[derive(Clone)]
|
||||
enum EmbeddingTransport {
|
||||
Mock,
|
||||
OpenAi {
|
||||
api_key: String,
|
||||
base_url: String,
|
||||
http: Client,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct EmbeddingClient {
|
||||
model: String,
|
||||
retry_attempts: usize,
|
||||
retry_backoff_ms: u64,
|
||||
transport: EmbeddingTransport,
|
||||
}
|
||||
|
||||
struct EmbedCallError {
|
||||
message: String,
|
||||
retryable: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiEmbeddingResponse {
|
||||
data: Vec<OpenAiEmbeddingDatum>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiEmbeddingDatum {
|
||||
index: usize,
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiErrorEnvelope {
|
||||
error: OpenAiErrorBody,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct OpenAiErrorBody {
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl EmbeddingClient {
|
||||
pub(crate) fn from_env() -> Result<Self> {
|
||||
let model = std::env::var("NANOGRAPH_EMBED_MODEL")
|
||||
.ok()
|
||||
.map(|v| v.trim().to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.unwrap_or_else(|| DEFAULT_EMBED_MODEL.to_string());
|
||||
let retry_attempts =
|
||||
parse_env_usize("NANOGRAPH_EMBED_RETRY_ATTEMPTS", DEFAULT_RETRY_ATTEMPTS);
|
||||
let retry_backoff_ms =
|
||||
parse_env_u64("NANOGRAPH_EMBED_RETRY_BACKOFF_MS", DEFAULT_RETRY_BACKOFF_MS);
|
||||
|
||||
if env_flag("NANOGRAPH_EMBEDDINGS_MOCK") {
|
||||
return Ok(Self {
|
||||
model,
|
||||
retry_attempts,
|
||||
retry_backoff_ms,
|
||||
transport: EmbeddingTransport::Mock,
|
||||
});
|
||||
}
|
||||
|
||||
let api_key = std::env::var("OPENAI_API_KEY")
|
||||
.ok()
|
||||
.map(|v| v.trim().to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.ok_or_else(|| {
|
||||
NanoError::Execution(
|
||||
"OPENAI_API_KEY is required when an embedding call is needed".to_string(),
|
||||
)
|
||||
})?;
|
||||
let base_url = std::env::var("OPENAI_BASE_URL")
|
||||
.ok()
|
||||
.map(|v| v.trim_end_matches('/').to_string())
|
||||
.filter(|v| !v.is_empty())
|
||||
.unwrap_or_else(|| DEFAULT_OPENAI_BASE_URL.to_string());
|
||||
let timeout_ms = parse_env_u64("NANOGRAPH_EMBED_TIMEOUT_MS", DEFAULT_TIMEOUT_MS);
|
||||
let http = Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
NanoError::Execution(format!("failed to initialize HTTP client: {}", e))
|
||||
})?;
|
||||
|
||||
Ok(Self {
|
||||
model,
|
||||
retry_attempts,
|
||||
retry_backoff_ms,
|
||||
transport: EmbeddingTransport::OpenAi {
|
||||
api_key,
|
||||
base_url,
|
||||
http,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn mock_for_tests() -> Self {
|
||||
Self {
|
||||
model: DEFAULT_EMBED_MODEL.to_string(),
|
||||
retry_attempts: DEFAULT_RETRY_ATTEMPTS,
|
||||
retry_backoff_ms: DEFAULT_RETRY_BACKOFF_MS,
|
||||
transport: EmbeddingTransport::Mock,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn model(&self) -> &str {
|
||||
&self.model
|
||||
}
|
||||
|
||||
pub(crate) async fn embed_text(&self, input: &str, expected_dim: usize) -> Result<Vec<f32>> {
|
||||
let mut vectors = self.embed_texts(&[input.to_string()], expected_dim).await?;
|
||||
vectors.pop().ok_or_else(|| {
|
||||
NanoError::Execution("embedding provider returned no vector".to_string())
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn embed_texts(
|
||||
&self,
|
||||
inputs: &[String],
|
||||
expected_dim: usize,
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
if expected_dim == 0 {
|
||||
return Err(NanoError::Execution(
|
||||
"embedding dimension must be greater than zero".to_string(),
|
||||
));
|
||||
}
|
||||
if inputs.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
match &self.transport {
|
||||
EmbeddingTransport::Mock => Ok(inputs
|
||||
.iter()
|
||||
.map(|input| mock_embedding(input, expected_dim))
|
||||
.collect()),
|
||||
EmbeddingTransport::OpenAi { .. } => {
|
||||
self.embed_texts_openai_with_retry(inputs, expected_dim)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn embed_texts_openai_with_retry(
|
||||
&self,
|
||||
inputs: &[String],
|
||||
expected_dim: usize,
|
||||
) -> Result<Vec<Vec<f32>>> {
|
||||
let max_attempt = self.retry_attempts.max(1);
|
||||
let mut attempt = 0usize;
|
||||
loop {
|
||||
attempt += 1;
|
||||
match self.embed_texts_openai_once(inputs, expected_dim).await {
|
||||
Ok(vectors) => return Ok(vectors),
|
||||
Err(err) => {
|
||||
if !err.retryable || attempt >= max_attempt {
|
||||
return Err(NanoError::Execution(err.message));
|
||||
}
|
||||
let shift = (attempt - 1).min(10) as u32;
|
||||
let delay = self.retry_backoff_ms.saturating_mul(1u64 << shift);
|
||||
sleep(Duration::from_millis(delay)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn embed_texts_openai_once(
|
||||
&self,
|
||||
inputs: &[String],
|
||||
expected_dim: usize,
|
||||
) -> std::result::Result<Vec<Vec<f32>>, EmbedCallError> {
|
||||
let (api_key, base_url, http) = match &self.transport {
|
||||
EmbeddingTransport::OpenAi {
|
||||
api_key,
|
||||
base_url,
|
||||
http,
|
||||
} => (api_key, base_url, http),
|
||||
EmbeddingTransport::Mock => unreachable!("mock transport should not call OpenAI"),
|
||||
};
|
||||
|
||||
let request = serde_json::json!({
|
||||
"model": self.model,
|
||||
"input": inputs,
|
||||
"dimensions": expected_dim,
|
||||
});
|
||||
let url = format!("{}/embeddings", base_url);
|
||||
let response = http
|
||||
.post(&url)
|
||||
.bearer_auth(api_key)
|
||||
.json(&request)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
let response = match response {
|
||||
Ok(resp) => resp,
|
||||
Err(err) => {
|
||||
let retryable = err.is_timeout() || err.is_connect() || err.is_request();
|
||||
return Err(EmbedCallError {
|
||||
message: format!("embedding request failed: {}", err),
|
||||
retryable,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let status = response.status();
|
||||
let body = match response.text().await {
|
||||
Ok(body) => body,
|
||||
Err(err) => {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding response read failed (status {}): {}",
|
||||
status, err
|
||||
),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
if !status.is_success() {
|
||||
let message = parse_openai_error_message(&body).unwrap_or_else(|| body.clone());
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding request failed with status {}: {}",
|
||||
status, message
|
||||
),
|
||||
retryable: status.is_server_error() || status.as_u16() == 429,
|
||||
});
|
||||
}
|
||||
|
||||
let mut parsed: OpenAiEmbeddingResponse =
|
||||
serde_json::from_str(&body).map_err(|err| EmbedCallError {
|
||||
message: format!("embedding response decode failed: {}", err),
|
||||
retryable: false,
|
||||
})?;
|
||||
|
||||
if parsed.data.len() != inputs.len() {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding response size mismatch: expected {}, got {}",
|
||||
inputs.len(),
|
||||
parsed.data.len()
|
||||
),
|
||||
retryable: false,
|
||||
});
|
||||
}
|
||||
|
||||
parsed.data.sort_by_key(|item| item.index);
|
||||
let mut vectors = Vec::with_capacity(parsed.data.len());
|
||||
for (idx, item) in parsed.data.into_iter().enumerate() {
|
||||
if item.index != idx {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding response index mismatch at position {}: got {}",
|
||||
idx, item.index
|
||||
),
|
||||
retryable: false,
|
||||
});
|
||||
}
|
||||
if item.embedding.len() != expected_dim {
|
||||
return Err(EmbedCallError {
|
||||
message: format!(
|
||||
"embedding dimension mismatch: expected {}, got {}",
|
||||
expected_dim,
|
||||
item.embedding.len()
|
||||
),
|
||||
retryable: false,
|
||||
});
|
||||
}
|
||||
vectors.push(item.embedding);
|
||||
}
|
||||
Ok(vectors)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_openai_error_message(body: &str) -> Option<String> {
|
||||
serde_json::from_str::<OpenAiErrorEnvelope>(body)
|
||||
.ok()
|
||||
.map(|e| e.error.message)
|
||||
.filter(|msg| !msg.trim().is_empty())
|
||||
}
|
||||
|
||||
fn parse_env_usize(name: &str, default: usize) -> usize {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<usize>().ok())
|
||||
.filter(|v| *v > 0)
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn parse_env_u64(name: &str, default: u64) -> u64 {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.and_then(|v| v.parse::<u64>().ok())
|
||||
.filter(|v| *v > 0)
|
||||
.unwrap_or(default)
|
||||
}
|
||||
|
||||
fn env_flag(name: &str) -> bool {
|
||||
std::env::var(name)
|
||||
.ok()
|
||||
.map(|v| {
|
||||
let s = v.trim().to_ascii_lowercase();
|
||||
s == "1" || s == "true" || s == "yes" || s == "on"
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn mock_embedding(input: &str, dim: usize) -> Vec<f32> {
|
||||
let mut seed = fnv1a64(input.as_bytes());
|
||||
let mut out = Vec::with_capacity(dim);
|
||||
for _ in 0..dim {
|
||||
seed = xorshift64(seed);
|
||||
let ratio = (seed as f64 / u64::MAX as f64) as f32;
|
||||
out.push((ratio * 2.0) - 1.0);
|
||||
}
|
||||
|
||||
let norm = out
|
||||
.iter()
|
||||
.map(|v| (*v as f64) * (*v as f64))
|
||||
.sum::<f64>()
|
||||
.sqrt() as f32;
|
||||
if norm > f32::EPSILON {
|
||||
for value in &mut out {
|
||||
*value /= norm;
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn fnv1a64(bytes: &[u8]) -> u64 {
|
||||
let mut hash = 14695981039346656037u64;
|
||||
for byte in bytes {
|
||||
hash ^= *byte as u64;
|
||||
hash = hash.wrapping_mul(1099511628211u64);
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
fn xorshift64(mut x: u64) -> u64 {
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
x
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn mock_embeddings_are_deterministic() {
|
||||
let client = EmbeddingClient::mock_for_tests();
|
||||
let a = client.embed_text("alpha", 8).await.unwrap();
|
||||
let b = client.embed_text("alpha", 8).await.unwrap();
|
||||
let c = client.embed_text("beta", 8).await.unwrap();
|
||||
assert_eq!(a, b);
|
||||
assert_ne!(a, c);
|
||||
assert_eq!(a.len(), 8);
|
||||
}
|
||||
}
|
||||
146
crates/omnigraph-compiler/src/error.rs
Normal file
146
crates/omnigraph-compiler/src/error.rs
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct SourceSpan {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl SourceSpan {
|
||||
pub fn new(start: usize, end: usize) -> Self {
|
||||
Self { start, end }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ParseDiagnostic {
|
||||
pub message: String,
|
||||
pub span: Option<SourceSpan>,
|
||||
}
|
||||
|
||||
impl ParseDiagnostic {
|
||||
pub fn new(message: String, span: Option<SourceSpan>) -> Self {
|
||||
Self { message, span }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ParseDiagnostic {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParseDiagnostic {}
|
||||
|
||||
pub fn render_span(span: SourceSpan) -> SourceSpan {
|
||||
SourceSpan {
|
||||
start: span.start,
|
||||
end: span.end.max(span.start.saturating_add(1)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_string_literal(raw: &str) -> Result<String> {
|
||||
let inner = raw
|
||||
.strip_prefix('"')
|
||||
.and_then(|inner| inner.strip_suffix('"'))
|
||||
.unwrap_or(raw);
|
||||
|
||||
let mut decoded = String::with_capacity(inner.len());
|
||||
let mut chars = inner.chars();
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch != '\\' {
|
||||
decoded.push(ch);
|
||||
continue;
|
||||
}
|
||||
|
||||
let escaped = chars
|
||||
.next()
|
||||
.ok_or_else(|| NanoError::Parse("unterminated escape sequence".to_string()))?;
|
||||
match escaped {
|
||||
'"' => decoded.push('"'),
|
||||
'\\' => decoded.push('\\'),
|
||||
'n' => decoded.push('\n'),
|
||||
'r' => decoded.push('\r'),
|
||||
't' => decoded.push('\t'),
|
||||
other => {
|
||||
return Err(NanoError::Parse(format!(
|
||||
"unsupported escape sequence: \\{}",
|
||||
other
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(decoded)
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NanoError {
|
||||
#[error("parse error: {0}")]
|
||||
Parse(String),
|
||||
|
||||
#[error("catalog error: {0}")]
|
||||
Catalog(String),
|
||||
|
||||
#[error("type error: {0}")]
|
||||
Type(String),
|
||||
|
||||
#[error("storage error: {0}")]
|
||||
Storage(String),
|
||||
|
||||
#[error(
|
||||
"@unique constraint violation on {type_name}.{property}: duplicate value '{value}' at rows {first_row} and {second_row}"
|
||||
)]
|
||||
UniqueConstraint {
|
||||
type_name: String,
|
||||
property: String,
|
||||
value: String,
|
||||
first_row: usize,
|
||||
second_row: usize,
|
||||
},
|
||||
|
||||
#[error("plan error: {0}")]
|
||||
Plan(String),
|
||||
|
||||
#[error("execution error: {0}")]
|
||||
Execution(String),
|
||||
|
||||
#[error(transparent)]
|
||||
Arrow(#[from] arrow_schema::ArrowError),
|
||||
|
||||
#[error("io error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("lance error: {0}")]
|
||||
Lance(String),
|
||||
|
||||
#[error("manifest error: {0}")]
|
||||
Manifest(String),
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, NanoError>;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{SourceSpan, decode_string_literal, render_span};
|
||||
|
||||
#[test]
|
||||
fn source_span_preserves_zero_width() {
|
||||
let span = SourceSpan::new(7, 7);
|
||||
assert_eq!(span.start, 7);
|
||||
assert_eq!(span.end, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_span_widens_zero_width_for_diagnostics() {
|
||||
let rendered = render_span(SourceSpan::new(7, 7));
|
||||
assert_eq!(rendered.start, 7);
|
||||
assert_eq!(rendered.end, 8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_string_literal_supports_common_escapes() {
|
||||
let decoded = decode_string_literal("\"a\\n\\r\\t\\\\\\\"b\"").unwrap();
|
||||
assert_eq!(decoded, "a\n\r\t\\\"b");
|
||||
}
|
||||
}
|
||||
657
crates/omnigraph-compiler/src/ir/lower.rs
Normal file
657
crates/omnigraph-compiler/src/ir/lower.rs
Normal file
|
|
@ -0,0 +1,657 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
use crate::catalog::Catalog;
|
||||
use crate::error::Result;
|
||||
use crate::query::ast::*;
|
||||
use crate::query::typecheck::TypeContext;
|
||||
use crate::types::Direction;
|
||||
|
||||
use super::*;
|
||||
|
||||
pub fn lower_query(
|
||||
catalog: &Catalog,
|
||||
query: &QueryDecl,
|
||||
type_ctx: &TypeContext,
|
||||
) -> Result<QueryIR> {
|
||||
if query.mutation.is_some() {
|
||||
return Err(crate::error::NanoError::Plan(
|
||||
"cannot lower mutation query with read-query lowerer".to_string(),
|
||||
));
|
||||
}
|
||||
let param_names: HashSet<String> = query.params.iter().map(|p| p.name.clone()).collect();
|
||||
|
||||
let mut pipeline = Vec::new();
|
||||
let mut bound_vars = HashSet::new();
|
||||
|
||||
lower_clauses(
|
||||
catalog,
|
||||
&query.match_clause,
|
||||
type_ctx,
|
||||
&mut pipeline,
|
||||
&mut bound_vars,
|
||||
¶m_names,
|
||||
)?;
|
||||
|
||||
let return_exprs: Vec<IRProjection> = query
|
||||
.return_clause
|
||||
.iter()
|
||||
.map(|p| IRProjection {
|
||||
expr: lower_expr(&p.expr, ¶m_names),
|
||||
alias: p.alias.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let order_by: Vec<IROrdering> = query
|
||||
.order_clause
|
||||
.iter()
|
||||
.map(|o| IROrdering {
|
||||
expr: lower_expr(&o.expr, ¶m_names),
|
||||
descending: o.descending,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(QueryIR {
|
||||
name: query.name.clone(),
|
||||
params: query.params.clone(),
|
||||
pipeline,
|
||||
return_exprs,
|
||||
order_by,
|
||||
limit: query.limit,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lower_mutation_query(query: &QueryDecl) -> Result<MutationIR> {
|
||||
let mutation = query.mutation.as_ref().ok_or_else(|| {
|
||||
crate::error::NanoError::Plan("query does not contain a mutation body".to_string())
|
||||
})?;
|
||||
let param_names: HashSet<String> = query.params.iter().map(|p| p.name.clone()).collect();
|
||||
|
||||
let op = match mutation {
|
||||
Mutation::Insert(insert) => MutationOpIR::Insert {
|
||||
type_name: insert.type_name.clone(),
|
||||
assignments: insert
|
||||
.assignments
|
||||
.iter()
|
||||
.map(|a| IRAssignment {
|
||||
property: a.property.clone(),
|
||||
value: lower_match_value(&a.value, ¶m_names),
|
||||
})
|
||||
.collect(),
|
||||
},
|
||||
Mutation::Update(update) => MutationOpIR::Update {
|
||||
type_name: update.type_name.clone(),
|
||||
assignments: update
|
||||
.assignments
|
||||
.iter()
|
||||
.map(|a| IRAssignment {
|
||||
property: a.property.clone(),
|
||||
value: lower_match_value(&a.value, ¶m_names),
|
||||
})
|
||||
.collect(),
|
||||
predicate: IRMutationPredicate {
|
||||
property: update.predicate.property.clone(),
|
||||
op: update.predicate.op,
|
||||
value: lower_match_value(&update.predicate.value, ¶m_names),
|
||||
},
|
||||
},
|
||||
Mutation::Delete(delete) => MutationOpIR::Delete {
|
||||
type_name: delete.type_name.clone(),
|
||||
predicate: IRMutationPredicate {
|
||||
property: delete.predicate.property.clone(),
|
||||
op: delete.predicate.op,
|
||||
value: lower_match_value(&delete.predicate.value, ¶m_names),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
Ok(MutationIR {
|
||||
name: query.name.clone(),
|
||||
params: query.params.clone(),
|
||||
op,
|
||||
})
|
||||
}
|
||||
|
||||
fn lower_clauses(
|
||||
catalog: &Catalog,
|
||||
clauses: &[Clause],
|
||||
type_ctx: &TypeContext,
|
||||
pipeline: &mut Vec<IROp>,
|
||||
bound_vars: &mut HashSet<String>,
|
||||
param_names: &HashSet<String>,
|
||||
) -> Result<()> {
|
||||
// Separate clause types for ordering: bindings first, then traversals, then filters
|
||||
let mut bindings = Vec::new();
|
||||
let mut traversals = Vec::new();
|
||||
let mut filters = Vec::new();
|
||||
let mut negations = Vec::new();
|
||||
|
||||
for clause in clauses {
|
||||
match clause {
|
||||
Clause::Binding(b) => bindings.push(b),
|
||||
Clause::Traversal(t) => traversals.push(t),
|
||||
Clause::Filter(f) => filters.push(f),
|
||||
Clause::Negation(inner) => negations.push(inner),
|
||||
}
|
||||
}
|
||||
|
||||
// Lower bindings into NodeScan ops
|
||||
for binding in &bindings {
|
||||
let node_type = catalog
|
||||
.node_types
|
||||
.get(&binding.type_name)
|
||||
.expect("binding type was validated during typecheck");
|
||||
// Collect inline filters from prop matches
|
||||
let mut scan_filters = Vec::new();
|
||||
for pm in &binding.prop_matches {
|
||||
let prop = node_type
|
||||
.properties
|
||||
.get(&pm.prop_name)
|
||||
.expect("binding property was validated during typecheck");
|
||||
let op = if prop.list {
|
||||
CompOp::Contains
|
||||
} else {
|
||||
CompOp::Eq
|
||||
};
|
||||
match &pm.value {
|
||||
MatchValue::Literal(lit) => {
|
||||
scan_filters.push(IRFilter {
|
||||
left: IRExpr::PropAccess {
|
||||
variable: binding.variable.clone(),
|
||||
property: pm.prop_name.clone(),
|
||||
},
|
||||
op,
|
||||
right: IRExpr::Literal(lit.clone()),
|
||||
});
|
||||
}
|
||||
MatchValue::Now => {
|
||||
scan_filters.push(IRFilter {
|
||||
left: IRExpr::PropAccess {
|
||||
variable: binding.variable.clone(),
|
||||
property: pm.prop_name.clone(),
|
||||
},
|
||||
op,
|
||||
right: IRExpr::Param(NOW_PARAM_NAME.to_string()),
|
||||
});
|
||||
}
|
||||
MatchValue::Variable(v) => {
|
||||
let right = if param_names.contains(v) {
|
||||
IRExpr::Param(v.clone())
|
||||
} else {
|
||||
IRExpr::Variable(v.clone())
|
||||
};
|
||||
scan_filters.push(IRFilter {
|
||||
left: IRExpr::PropAccess {
|
||||
variable: binding.variable.clone(),
|
||||
property: pm.prop_name.clone(),
|
||||
},
|
||||
op,
|
||||
right,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pipeline.push(IROp::NodeScan {
|
||||
variable: binding.variable.clone(),
|
||||
type_name: binding.type_name.clone(),
|
||||
filters: scan_filters,
|
||||
});
|
||||
bound_vars.insert(binding.variable.clone());
|
||||
}
|
||||
|
||||
// Lower traversals into Expand ops
|
||||
// Handle "cycle closing" — if both src and dst are already bound, use a filter
|
||||
for traversal in &traversals {
|
||||
let edge = catalog
|
||||
.lookup_edge_by_name(&traversal.edge_name)
|
||||
.ok_or_else(|| {
|
||||
crate::error::NanoError::Plan(format!(
|
||||
"lowering traversal referenced missing edge '{}' after typecheck",
|
||||
traversal.edge_name
|
||||
))
|
||||
})?;
|
||||
|
||||
// Determine direction from type context
|
||||
let direction = type_ctx
|
||||
.traversals
|
||||
.iter()
|
||||
.find(|rt| {
|
||||
rt.src == traversal.src && rt.dst == traversal.dst && rt.edge_type == edge.name
|
||||
})
|
||||
.map(|rt| rt.direction)
|
||||
.unwrap_or(Direction::Out);
|
||||
|
||||
let dst_type = match direction {
|
||||
Direction::Out => edge.to_type.clone(),
|
||||
Direction::In => edge.from_type.clone(),
|
||||
};
|
||||
|
||||
if bound_vars.contains(&traversal.src) && bound_vars.contains(&traversal.dst) {
|
||||
// Cycle closing: emit expand to a temp var, then filter temp.id = dst.id
|
||||
let temp_var = format!("__temp_{}", traversal.dst);
|
||||
pipeline.push(IROp::Expand {
|
||||
src_var: traversal.src.clone(),
|
||||
dst_var: temp_var.clone(),
|
||||
edge_type: edge.name.clone(),
|
||||
direction,
|
||||
dst_type,
|
||||
min_hops: traversal.min_hops,
|
||||
max_hops: traversal.max_hops,
|
||||
});
|
||||
pipeline.push(IROp::Filter(IRFilter {
|
||||
left: IRExpr::PropAccess {
|
||||
variable: temp_var,
|
||||
property: "id".to_string(),
|
||||
},
|
||||
op: CompOp::Eq,
|
||||
right: IRExpr::PropAccess {
|
||||
variable: traversal.dst.clone(),
|
||||
property: "id".to_string(),
|
||||
},
|
||||
}));
|
||||
} else if !bound_vars.contains(&traversal.src) && bound_vars.contains(&traversal.dst) {
|
||||
// Reverse expand: dst is bound, src is not.
|
||||
// Swap direction and expand from dst to discover src.
|
||||
let reverse_dir = match direction {
|
||||
Direction::Out => Direction::In,
|
||||
Direction::In => Direction::Out,
|
||||
};
|
||||
let src_type = match direction {
|
||||
Direction::Out => edge.from_type.clone(),
|
||||
Direction::In => edge.to_type.clone(),
|
||||
};
|
||||
pipeline.push(IROp::Expand {
|
||||
src_var: traversal.dst.clone(),
|
||||
dst_var: traversal.src.clone(),
|
||||
edge_type: edge.name.clone(),
|
||||
direction: reverse_dir,
|
||||
dst_type: src_type,
|
||||
min_hops: traversal.min_hops,
|
||||
max_hops: traversal.max_hops,
|
||||
});
|
||||
if traversal.src != "_" {
|
||||
bound_vars.insert(traversal.src.clone());
|
||||
}
|
||||
} else {
|
||||
pipeline.push(IROp::Expand {
|
||||
src_var: traversal.src.clone(),
|
||||
dst_var: traversal.dst.clone(),
|
||||
edge_type: edge.name.clone(),
|
||||
direction,
|
||||
dst_type,
|
||||
min_hops: traversal.min_hops,
|
||||
max_hops: traversal.max_hops,
|
||||
});
|
||||
if traversal.dst != "_" {
|
||||
bound_vars.insert(traversal.dst.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Lower explicit filters
|
||||
for filter in &filters {
|
||||
pipeline.push(IROp::Filter(IRFilter {
|
||||
left: lower_expr(&filter.left, param_names),
|
||||
op: filter.op,
|
||||
right: lower_expr(&filter.right, param_names),
|
||||
}));
|
||||
}
|
||||
|
||||
// Lower negations into AntiJoin ops
|
||||
for neg_clauses in &negations {
|
||||
// Find outer-bound variable referenced in the negation
|
||||
let outer_var = find_outer_var(neg_clauses, bound_vars);
|
||||
|
||||
let mut inner_pipeline = Vec::new();
|
||||
let mut inner_bound = bound_vars.clone();
|
||||
lower_clauses(
|
||||
catalog,
|
||||
neg_clauses,
|
||||
type_ctx,
|
||||
&mut inner_pipeline,
|
||||
&mut inner_bound,
|
||||
param_names,
|
||||
)?;
|
||||
|
||||
pipeline.push(IROp::AntiJoin {
|
||||
outer_var: outer_var.unwrap_or_default(),
|
||||
inner: inner_pipeline,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_outer_var(clauses: &[Clause], outer_bound: &HashSet<String>) -> Option<String> {
|
||||
for clause in clauses {
|
||||
match clause {
|
||||
Clause::Traversal(t) => {
|
||||
if outer_bound.contains(&t.src) {
|
||||
return Some(t.src.clone());
|
||||
}
|
||||
if outer_bound.contains(&t.dst) {
|
||||
return Some(t.dst.clone());
|
||||
}
|
||||
}
|
||||
Clause::Filter(f) => {
|
||||
if let Some(v) = expr_var(&f.left)
|
||||
&& outer_bound.contains(&v)
|
||||
{
|
||||
return Some(v);
|
||||
}
|
||||
if let Some(v) = expr_var(&f.right)
|
||||
&& outer_bound.contains(&v)
|
||||
{
|
||||
return Some(v);
|
||||
}
|
||||
}
|
||||
Clause::Binding(b) => {
|
||||
if outer_bound.contains(&b.variable) {
|
||||
return Some(b.variable.clone());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn expr_var(expr: &Expr) -> Option<String> {
|
||||
match expr {
|
||||
Expr::Now => None,
|
||||
Expr::PropAccess { variable, .. } => Some(variable.clone()),
|
||||
Expr::Variable(v) => Some(v.clone()),
|
||||
Expr::Nearest { variable, .. } => Some(variable.clone()),
|
||||
Expr::Search { field, query } => expr_var(field).or_else(|| expr_var(query)),
|
||||
Expr::Fuzzy {
|
||||
field,
|
||||
query,
|
||||
max_edits,
|
||||
} => expr_var(field)
|
||||
.or_else(|| expr_var(query))
|
||||
.or_else(|| max_edits.as_deref().and_then(expr_var)),
|
||||
Expr::MatchText { field, query } => expr_var(field).or_else(|| expr_var(query)),
|
||||
Expr::Bm25 { field, query } => expr_var(field).or_else(|| expr_var(query)),
|
||||
Expr::Rrf {
|
||||
primary,
|
||||
secondary,
|
||||
k,
|
||||
} => expr_var(primary)
|
||||
.or_else(|| expr_var(secondary))
|
||||
.or_else(|| k.as_deref().and_then(expr_var)),
|
||||
Expr::Aggregate { arg, .. } => expr_var(arg),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_expr(expr: &Expr, param_names: &HashSet<String>) -> IRExpr {
|
||||
match expr {
|
||||
Expr::Now => IRExpr::Param(NOW_PARAM_NAME.to_string()),
|
||||
Expr::PropAccess { variable, property } => IRExpr::PropAccess {
|
||||
variable: variable.clone(),
|
||||
property: property.clone(),
|
||||
},
|
||||
Expr::Nearest {
|
||||
variable,
|
||||
property,
|
||||
query,
|
||||
} => IRExpr::Nearest {
|
||||
variable: variable.clone(),
|
||||
property: property.clone(),
|
||||
query: Box::new(lower_expr(query, param_names)),
|
||||
},
|
||||
Expr::Search { field, query } => IRExpr::Search {
|
||||
field: Box::new(lower_expr(field, param_names)),
|
||||
query: Box::new(lower_expr(query, param_names)),
|
||||
},
|
||||
Expr::Fuzzy {
|
||||
field,
|
||||
query,
|
||||
max_edits,
|
||||
} => IRExpr::Fuzzy {
|
||||
field: Box::new(lower_expr(field, param_names)),
|
||||
query: Box::new(lower_expr(query, param_names)),
|
||||
max_edits: max_edits
|
||||
.as_ref()
|
||||
.map(|expr| Box::new(lower_expr(expr, param_names))),
|
||||
},
|
||||
Expr::MatchText { field, query } => IRExpr::MatchText {
|
||||
field: Box::new(lower_expr(field, param_names)),
|
||||
query: Box::new(lower_expr(query, param_names)),
|
||||
},
|
||||
Expr::Bm25 { field, query } => IRExpr::Bm25 {
|
||||
field: Box::new(lower_expr(field, param_names)),
|
||||
query: Box::new(lower_expr(query, param_names)),
|
||||
},
|
||||
Expr::Rrf {
|
||||
primary,
|
||||
secondary,
|
||||
k,
|
||||
} => IRExpr::Rrf {
|
||||
primary: Box::new(lower_expr(primary, param_names)),
|
||||
secondary: Box::new(lower_expr(secondary, param_names)),
|
||||
k: k.as_ref()
|
||||
.map(|expr| Box::new(lower_expr(expr, param_names))),
|
||||
},
|
||||
Expr::Variable(v) => {
|
||||
if param_names.contains(v) {
|
||||
IRExpr::Param(v.clone())
|
||||
} else {
|
||||
IRExpr::Variable(v.clone())
|
||||
}
|
||||
}
|
||||
Expr::Literal(l) => IRExpr::Literal(l.clone()),
|
||||
Expr::Aggregate { func, arg } => IRExpr::Aggregate {
|
||||
func: *func,
|
||||
arg: Box::new(lower_expr(arg, param_names)),
|
||||
},
|
||||
Expr::AliasRef(name) => IRExpr::AliasRef(name.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_match_value(value: &MatchValue, param_names: &HashSet<String>) -> IRExpr {
|
||||
match value {
|
||||
MatchValue::Now => IRExpr::Param(NOW_PARAM_NAME.to_string()),
|
||||
MatchValue::Literal(l) => IRExpr::Literal(l.clone()),
|
||||
MatchValue::Variable(v) => {
|
||||
if param_names.contains(v) {
|
||||
IRExpr::Param(v.clone())
|
||||
} else {
|
||||
IRExpr::Variable(v.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::catalog::build_catalog;
|
||||
use crate::query::parser::parse_query;
|
||||
use crate::query::typecheck::{CheckedQuery, typecheck_query, typecheck_query_decl};
|
||||
use crate::schema::parser::parse_schema;
|
||||
|
||||
fn setup() -> Catalog {
|
||||
let schema = parse_schema(
|
||||
r#"
|
||||
node Person { name: String age: I32? }
|
||||
node Company { name: String }
|
||||
edge Knows: Person -> Person { since: Date? }
|
||||
edge WorksAt: Person -> Company
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
build_catalog(&schema).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lower_basic() {
|
||||
let catalog = setup();
|
||||
let qf = parse_query(
|
||||
r#"
|
||||
query q($name: String) {
|
||||
match {
|
||||
$p: Person { name: $name }
|
||||
$p knows $f
|
||||
}
|
||||
return { $f.name, $f.age }
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
|
||||
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
|
||||
|
||||
assert_eq!(ir.pipeline.len(), 2); // NodeScan + Expand
|
||||
assert_eq!(ir.return_exprs.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lower_negation() {
|
||||
let catalog = setup();
|
||||
let qf = parse_query(
|
||||
r#"
|
||||
query q() {
|
||||
match {
|
||||
$p: Person
|
||||
not { $p worksAt $_ }
|
||||
}
|
||||
return { $p.name }
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
|
||||
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
|
||||
|
||||
assert_eq!(ir.pipeline.len(), 2); // NodeScan + AntiJoin
|
||||
assert!(matches!(&ir.pipeline[1], IROp::AntiJoin { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lower_mutation_update() {
|
||||
let catalog = setup();
|
||||
let qf = parse_query(
|
||||
r#"
|
||||
query q($name: String, $age: I32) {
|
||||
update Person set { age: $age } where name = $name
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let checked = typecheck_query_decl(&catalog, &qf.queries[0]).unwrap();
|
||||
assert!(matches!(checked, CheckedQuery::Mutation(_)));
|
||||
|
||||
let ir = lower_mutation_query(&qf.queries[0]).unwrap();
|
||||
match ir.op {
|
||||
MutationOpIR::Update {
|
||||
type_name,
|
||||
assignments,
|
||||
predicate,
|
||||
} => {
|
||||
assert_eq!(type_name, "Person");
|
||||
assert_eq!(assignments.len(), 1);
|
||||
assert_eq!(assignments[0].property, "age");
|
||||
assert_eq!(predicate.property, "name");
|
||||
}
|
||||
_ => panic!("expected update mutation op"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lower_bounded_traversal() {
|
||||
let catalog = setup();
|
||||
let qf = parse_query(
|
||||
r#"
|
||||
query q() {
|
||||
match {
|
||||
$p: Person
|
||||
$p knows{1,3} $f
|
||||
}
|
||||
return { $f.name }
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
|
||||
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
|
||||
let expand = ir
|
||||
.pipeline
|
||||
.iter()
|
||||
.find_map(|op| match op {
|
||||
IROp::Expand {
|
||||
min_hops, max_hops, ..
|
||||
} => Some((*min_hops, *max_hops)),
|
||||
_ => None,
|
||||
})
|
||||
.expect("expected expand op");
|
||||
assert_eq!(expand.0, 1);
|
||||
assert_eq!(expand.1, Some(3));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lower_now_uses_reserved_runtime_param() {
|
||||
let catalog = setup();
|
||||
let qf = parse_query(
|
||||
r#"
|
||||
query stamp() {
|
||||
match { $p: Person }
|
||||
return { now() as ts }
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let tc = typecheck_query(&catalog, &qf.queries[0]).unwrap();
|
||||
let ir = lower_query(&catalog, &qf.queries[0], &tc).unwrap();
|
||||
|
||||
assert!(matches!(
|
||||
ir.return_exprs[0].expr,
|
||||
IRExpr::Param(ref name) if name == NOW_PARAM_NAME
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lower_mutation_now_uses_reserved_runtime_param() {
|
||||
let catalog = build_catalog(
|
||||
&parse_schema(
|
||||
r#"
|
||||
node Event {
|
||||
slug: String @key
|
||||
updated_at: DateTime?
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let qf = parse_query(
|
||||
r#"
|
||||
query stamp() {
|
||||
update Event set { updated_at: now() } where updated_at = now()
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
let checked = typecheck_query_decl(&catalog, &qf.queries[0]).unwrap();
|
||||
assert!(matches!(checked, CheckedQuery::Mutation(_)));
|
||||
|
||||
let ir = lower_mutation_query(&qf.queries[0]).unwrap();
|
||||
match ir.op {
|
||||
MutationOpIR::Update {
|
||||
assignments,
|
||||
predicate,
|
||||
..
|
||||
} => {
|
||||
assert!(matches!(
|
||||
assignments[0].value,
|
||||
IRExpr::Param(ref name) if name == NOW_PARAM_NAME
|
||||
));
|
||||
assert!(matches!(
|
||||
predicate.value,
|
||||
IRExpr::Param(ref name) if name == NOW_PARAM_NAME
|
||||
));
|
||||
}
|
||||
_ => panic!("expected update mutation op"),
|
||||
}
|
||||
}
|
||||
}
|
||||
143
crates/omnigraph-compiler/src/ir/mod.rs
Normal file
143
crates/omnigraph-compiler/src/ir/mod.rs
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
pub(crate) mod lower;
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::query::ast::{AggFunc, CompOp, Literal, Param};
|
||||
use crate::types::Direction;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QueryIR {
|
||||
pub name: String,
|
||||
pub params: Vec<Param>,
|
||||
pub pipeline: Vec<IROp>,
|
||||
pub return_exprs: Vec<IRProjection>,
|
||||
pub order_by: Vec<IROrdering>,
|
||||
pub limit: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MutationIR {
|
||||
pub name: String,
|
||||
pub params: Vec<Param>,
|
||||
pub op: MutationOpIR,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MutationOpIR {
|
||||
Insert {
|
||||
type_name: String,
|
||||
assignments: Vec<IRAssignment>,
|
||||
},
|
||||
Update {
|
||||
type_name: String,
|
||||
assignments: Vec<IRAssignment>,
|
||||
predicate: IRMutationPredicate,
|
||||
},
|
||||
Delete {
|
||||
type_name: String,
|
||||
predicate: IRMutationPredicate,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IRAssignment {
|
||||
pub property: String,
|
||||
pub value: IRExpr,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IRMutationPredicate {
|
||||
pub property: String,
|
||||
pub op: CompOp,
|
||||
pub value: IRExpr,
|
||||
}
|
||||
|
||||
/// Resolved runtime parameters: param name → literal value.
|
||||
pub type ParamMap = HashMap<String, Literal>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IROp {
|
||||
NodeScan {
|
||||
variable: String,
|
||||
type_name: String,
|
||||
filters: Vec<IRFilter>,
|
||||
},
|
||||
Expand {
|
||||
src_var: String,
|
||||
dst_var: String,
|
||||
edge_type: String,
|
||||
direction: Direction,
|
||||
dst_type: String,
|
||||
min_hops: u32,
|
||||
max_hops: Option<u32>,
|
||||
},
|
||||
Filter(IRFilter),
|
||||
AntiJoin {
|
||||
/// The outer variable whose id is used for the join key
|
||||
outer_var: String,
|
||||
/// The inner pipeline that produces rows to anti-join against
|
||||
inner: Vec<IROp>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IRFilter {
|
||||
pub left: IRExpr,
|
||||
pub op: CompOp,
|
||||
pub right: IRExpr,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum IRExpr {
|
||||
PropAccess {
|
||||
variable: String,
|
||||
property: String,
|
||||
},
|
||||
Nearest {
|
||||
variable: String,
|
||||
property: String,
|
||||
query: Box<IRExpr>,
|
||||
},
|
||||
Search {
|
||||
field: Box<IRExpr>,
|
||||
query: Box<IRExpr>,
|
||||
},
|
||||
Fuzzy {
|
||||
field: Box<IRExpr>,
|
||||
query: Box<IRExpr>,
|
||||
max_edits: Option<Box<IRExpr>>,
|
||||
},
|
||||
MatchText {
|
||||
field: Box<IRExpr>,
|
||||
query: Box<IRExpr>,
|
||||
},
|
||||
Bm25 {
|
||||
field: Box<IRExpr>,
|
||||
query: Box<IRExpr>,
|
||||
},
|
||||
Rrf {
|
||||
primary: Box<IRExpr>,
|
||||
secondary: Box<IRExpr>,
|
||||
k: Option<Box<IRExpr>>,
|
||||
},
|
||||
Variable(String),
|
||||
Param(String),
|
||||
Literal(Literal),
|
||||
Aggregate {
|
||||
func: AggFunc,
|
||||
arg: Box<IRExpr>,
|
||||
},
|
||||
AliasRef(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IRProjection {
|
||||
pub expr: IRExpr,
|
||||
pub alias: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IROrdering {
|
||||
pub expr: IRExpr,
|
||||
pub descending: bool,
|
||||
}
|
||||
352
crates/omnigraph-compiler/src/json_output.rs
Normal file
352
crates/omnigraph-compiler/src/json_output.rs
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
use arrow_array::{
|
||||
Array, ArrayRef, BooleanArray, Date32Array, Date64Array, FixedSizeListArray, Float32Array,
|
||||
Float64Array, Int32Array, Int64Array, ListArray, RecordBatch, StringArray, StructArray,
|
||||
UInt32Array, UInt64Array,
|
||||
};
|
||||
use arrow_schema::DataType;
|
||||
|
||||
pub const JS_MAX_SAFE_INTEGER_I64: i64 = 9_007_199_254_740_991;
|
||||
pub const JS_MAX_SAFE_INTEGER_U64: u64 = 9_007_199_254_740_991;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum JsonIntegerMode {
|
||||
JavaScript,
|
||||
Native,
|
||||
}
|
||||
|
||||
pub fn is_js_safe_integer_i64(value: i64) -> bool {
|
||||
(-JS_MAX_SAFE_INTEGER_I64..=JS_MAX_SAFE_INTEGER_I64).contains(&value)
|
||||
}
|
||||
|
||||
/// Convert Arrow RecordBatches into a Vec of JSON objects (one per row).
|
||||
pub fn record_batches_to_json_rows(results: &[RecordBatch]) -> Vec<serde_json::Value> {
|
||||
record_batches_to_json_rows_with_mode(results, JsonIntegerMode::JavaScript)
|
||||
}
|
||||
|
||||
/// Convert Arrow RecordBatches into JSON rows without JS-safe integer coercion.
|
||||
pub fn record_batches_to_rust_json_rows(results: &[RecordBatch]) -> Vec<serde_json::Value> {
|
||||
record_batches_to_json_rows_with_mode(results, JsonIntegerMode::Native)
|
||||
}
|
||||
|
||||
fn record_batches_to_json_rows_with_mode(
|
||||
results: &[RecordBatch],
|
||||
integer_mode: JsonIntegerMode,
|
||||
) -> Vec<serde_json::Value> {
|
||||
let total_rows = results.iter().map(RecordBatch::num_rows).sum();
|
||||
let mut out = Vec::with_capacity(total_rows);
|
||||
for batch in results {
|
||||
let schema = batch.schema();
|
||||
for row in 0..batch.num_rows() {
|
||||
let mut map = serde_json::Map::new();
|
||||
for (col_idx, field) in schema.fields().iter().enumerate() {
|
||||
let col_arr = batch.column(col_idx);
|
||||
map.insert(
|
||||
field.name().clone(),
|
||||
array_value_to_json_with_mode(col_arr, row, integer_mode),
|
||||
);
|
||||
}
|
||||
out.push(serde_json::Value::Object(map));
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Convert a single cell from an Arrow array to a serde_json::Value.
|
||||
pub fn array_value_to_json(array: &ArrayRef, row: usize) -> serde_json::Value {
|
||||
array_value_to_json_with_mode(array, row, JsonIntegerMode::JavaScript)
|
||||
}
|
||||
|
||||
fn array_value_to_json_with_mode(
|
||||
array: &ArrayRef,
|
||||
row: usize,
|
||||
integer_mode: JsonIntegerMode,
|
||||
) -> serde_json::Value {
|
||||
if array.is_null(row) {
|
||||
return serde_json::Value::Null;
|
||||
}
|
||||
|
||||
match array.data_type() {
|
||||
DataType::Utf8 => array
|
||||
.as_any()
|
||||
.downcast_ref::<StringArray>()
|
||||
.map(|a| serde_json::Value::String(a.value(row).to_string()))
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Boolean => array
|
||||
.as_any()
|
||||
.downcast_ref::<BooleanArray>()
|
||||
.map(|a| serde_json::Value::Bool(a.value(row)))
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Int32 => array
|
||||
.as_any()
|
||||
.downcast_ref::<Int32Array>()
|
||||
.map(|a| serde_json::Value::Number((a.value(row) as i64).into()))
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Int64 => array
|
||||
.as_any()
|
||||
.downcast_ref::<Int64Array>()
|
||||
.map(|a| {
|
||||
let value = a.value(row);
|
||||
match integer_mode {
|
||||
JsonIntegerMode::JavaScript if !is_js_safe_integer_i64(value) => {
|
||||
serde_json::Value::String(value.to_string())
|
||||
}
|
||||
JsonIntegerMode::JavaScript | JsonIntegerMode::Native => {
|
||||
serde_json::Value::Number(value.into())
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::UInt32 => array
|
||||
.as_any()
|
||||
.downcast_ref::<UInt32Array>()
|
||||
.map(|a| serde_json::Value::Number((a.value(row) as u64).into()))
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::UInt64 => array
|
||||
.as_any()
|
||||
.downcast_ref::<UInt64Array>()
|
||||
.map(|a| {
|
||||
let value = a.value(row);
|
||||
match integer_mode {
|
||||
JsonIntegerMode::JavaScript if value > JS_MAX_SAFE_INTEGER_U64 => {
|
||||
serde_json::Value::String(value.to_string())
|
||||
}
|
||||
JsonIntegerMode::JavaScript | JsonIntegerMode::Native => {
|
||||
serde_json::Value::Number(value.into())
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Float32 => array
|
||||
.as_any()
|
||||
.downcast_ref::<Float32Array>()
|
||||
.map(|a| json_float_value(a.value(row) as f64))
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Float64 => array
|
||||
.as_any()
|
||||
.downcast_ref::<Float64Array>()
|
||||
.map(|a| json_float_value(a.value(row)))
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Date32 => array
|
||||
.as_any()
|
||||
.downcast_ref::<Date32Array>()
|
||||
.map(|a| {
|
||||
let days = a.value(row);
|
||||
arrow_array::temporal_conversions::date32_to_datetime(days)
|
||||
.map(|dt| serde_json::Value::String(dt.format("%Y-%m-%d").to_string()))
|
||||
.unwrap_or_else(|| serde_json::Value::Number((days as i64).into()))
|
||||
})
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Date64 => array
|
||||
.as_any()
|
||||
.downcast_ref::<Date64Array>()
|
||||
.map(|a| {
|
||||
let ms = a.value(row);
|
||||
arrow_array::temporal_conversions::date64_to_datetime(ms)
|
||||
.map(|dt| {
|
||||
serde_json::Value::String(dt.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string())
|
||||
})
|
||||
.unwrap_or_else(|| serde_json::Value::Number(ms.into()))
|
||||
})
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::List(_) => array
|
||||
.as_any()
|
||||
.downcast_ref::<ListArray>()
|
||||
.map(|a| {
|
||||
let values = a.value(row);
|
||||
serde_json::Value::Array(
|
||||
(0..values.len())
|
||||
.map(|idx| array_value_to_json_with_mode(&values, idx, integer_mode))
|
||||
.collect(),
|
||||
)
|
||||
})
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::FixedSizeList(_, _) => array
|
||||
.as_any()
|
||||
.downcast_ref::<FixedSizeListArray>()
|
||||
.map(|a| fixed_size_list_value_to_json(a, row, integer_mode))
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
DataType::Struct(_) => array
|
||||
.as_any()
|
||||
.downcast_ref::<StructArray>()
|
||||
.map(|struct_arr| {
|
||||
let mut obj = serde_json::Map::new();
|
||||
for (i, field) in struct_arr.fields().iter().enumerate() {
|
||||
let col = struct_arr.column(i);
|
||||
obj.insert(
|
||||
field.name().clone(),
|
||||
array_value_to_json_with_mode(col, row, integer_mode),
|
||||
);
|
||||
}
|
||||
serde_json::Value::Object(obj)
|
||||
})
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
_ => {
|
||||
let display =
|
||||
arrow_cast::display::array_value_to_string(array, row).unwrap_or_default();
|
||||
serde_json::Value::String(display)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn json_float_value(value: f64) -> serde_json::Value {
|
||||
if value.is_nan() {
|
||||
return serde_json::Value::String("NaN".to_string());
|
||||
}
|
||||
if value == f64::INFINITY {
|
||||
return serde_json::Value::String("Infinity".to_string());
|
||||
}
|
||||
if value == f64::NEG_INFINITY {
|
||||
return serde_json::Value::String("-Infinity".to_string());
|
||||
}
|
||||
|
||||
serde_json::Number::from_f64(value)
|
||||
.map(serde_json::Value::Number)
|
||||
.unwrap_or(serde_json::Value::Null)
|
||||
}
|
||||
|
||||
fn fixed_size_list_value_to_json(
|
||||
array: &FixedSizeListArray,
|
||||
row: usize,
|
||||
integer_mode: JsonIntegerMode,
|
||||
) -> serde_json::Value {
|
||||
let value_len = array.value_length() as usize;
|
||||
let values = array.values();
|
||||
if let Some(float_values) = values.as_any().downcast_ref::<Float32Array>() {
|
||||
let start = row.saturating_mul(value_len);
|
||||
return float32_json_array(float_values, start, value_len);
|
||||
}
|
||||
|
||||
let values = array.value(row);
|
||||
serde_json::Value::Array(
|
||||
(0..values.len())
|
||||
.map(|idx| array_value_to_json_with_mode(&values, idx, integer_mode))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn float32_json_array(values: &Float32Array, start: usize, len: usize) -> serde_json::Value {
|
||||
let mut out = Vec::with_capacity(len);
|
||||
let end = start.saturating_add(len).min(values.len());
|
||||
for idx in start..end {
|
||||
if values.is_null(idx) {
|
||||
out.push(serde_json::Value::Null);
|
||||
continue;
|
||||
}
|
||||
let value = values.value(idx) as f64;
|
||||
out.push(
|
||||
serde_json::Number::from_f64(value)
|
||||
.map(serde_json::Value::Number)
|
||||
.unwrap_or(serde_json::Value::Null),
|
||||
);
|
||||
}
|
||||
serde_json::Value::Array(out)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{array_value_to_json, record_batches_to_rust_json_rows};
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::builder::{FixedSizeListBuilder, Float32Builder};
|
||||
use arrow_array::{ArrayRef, Float64Array, Int64Array, RecordBatch, UInt64Array};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
#[test]
|
||||
fn int64_outside_js_safe_range_is_stringified() {
|
||||
let values: ArrayRef = Arc::new(Int64Array::from(vec![Some(9_007_199_254_740_992)]));
|
||||
assert_eq!(
|
||||
array_value_to_json(&values, 0),
|
||||
serde_json::Value::String("9007199254740992".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uint64_outside_js_safe_range_is_stringified() {
|
||||
let values: ArrayRef = Arc::new(UInt64Array::from(vec![Some(9_007_199_254_740_992)]));
|
||||
assert_eq!(
|
||||
array_value_to_json(&values, 0),
|
||||
serde_json::Value::String("9007199254740992".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uint64_within_js_safe_range_stays_numeric() {
|
||||
let values: ArrayRef = Arc::new(UInt64Array::from(vec![Some(9_007_199_254_740_991)]));
|
||||
assert_eq!(
|
||||
array_value_to_json(&values, 0),
|
||||
serde_json::json!(9_007_199_254_740_991u64)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_json_rows_preserve_full_width_integers() {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("signed", DataType::Int64, false),
|
||||
Field::new("unsigned", DataType::UInt64, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(Int64Array::from(vec![i64::MIN])),
|
||||
Arc::new(UInt64Array::from(vec![u64::MAX])),
|
||||
],
|
||||
)
|
||||
.expect("batch");
|
||||
|
||||
assert_eq!(
|
||||
record_batches_to_rust_json_rows(&[batch]),
|
||||
vec![serde_json::json!({
|
||||
"signed": i64::MIN,
|
||||
"unsigned": u64::MAX,
|
||||
})]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixed_size_float32_vectors_serialize_without_recursive_dispatch() {
|
||||
let mut builder = FixedSizeListBuilder::new(Float32Builder::new(), 3);
|
||||
builder.values().append_value(0.25);
|
||||
builder.values().append_value(0.5);
|
||||
builder.values().append_value(0.75);
|
||||
builder.append(true);
|
||||
|
||||
for _ in 0..3 {
|
||||
builder.values().append_null();
|
||||
}
|
||||
builder.append(false);
|
||||
|
||||
builder.values().append_value(1.0);
|
||||
builder.values().append_value(2.0);
|
||||
builder.values().append_value(3.0);
|
||||
builder.append(true);
|
||||
|
||||
let values: ArrayRef = Arc::new(builder.finish());
|
||||
assert_eq!(
|
||||
array_value_to_json(&values, 0),
|
||||
serde_json::json!([0.25, 0.5, 0.75])
|
||||
);
|
||||
assert_eq!(array_value_to_json(&values, 1), serde_json::Value::Null);
|
||||
assert_eq!(
|
||||
array_value_to_json(&values, 2),
|
||||
serde_json::json!([1.0, 2.0, 3.0])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_finite_floats_are_stringified() {
|
||||
let values: ArrayRef = Arc::new(Float64Array::from(vec![
|
||||
Some(f64::NAN),
|
||||
Some(f64::INFINITY),
|
||||
Some(f64::NEG_INFINITY),
|
||||
]));
|
||||
assert_eq!(array_value_to_json(&values, 0), serde_json::json!("NaN"));
|
||||
assert_eq!(
|
||||
array_value_to_json(&values, 1),
|
||||
serde_json::json!("Infinity")
|
||||
);
|
||||
assert_eq!(
|
||||
array_value_to_json(&values, 2),
|
||||
serde_json::json!("-Infinity")
|
||||
);
|
||||
}
|
||||
}
|
||||
28
crates/omnigraph-compiler/src/lib.rs
Normal file
28
crates/omnigraph-compiler/src/lib.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
pub mod catalog;
|
||||
pub mod embedding;
|
||||
pub mod error;
|
||||
pub mod ir;
|
||||
pub mod json_output;
|
||||
pub mod query;
|
||||
pub mod query_input;
|
||||
pub mod result;
|
||||
pub mod schema;
|
||||
pub mod types;
|
||||
|
||||
pub use catalog::build_catalog;
|
||||
pub use catalog::schema_ir::{
|
||||
SchemaIR, build_catalog_from_ir, build_schema_ir, schema_ir_hash, schema_ir_json,
|
||||
schema_ir_pretty_json,
|
||||
};
|
||||
pub use catalog::schema_plan::{
|
||||
SchemaMigrationPlan, SchemaMigrationStep, SchemaTypeKind, plan_schema_migration,
|
||||
};
|
||||
pub use ir::ParamMap;
|
||||
pub use ir::lower::{lower_mutation_query, lower_query};
|
||||
pub use query::ast::Literal;
|
||||
pub use query_input::{
|
||||
JsonParamMode, RunInputError, RunInputResult, ToParam, find_named_query,
|
||||
json_params_to_param_map,
|
||||
};
|
||||
pub use result::{MutationExecResult, MutationResult, QueryResult, RunResult};
|
||||
pub use types::{Direction, PropType, ScalarType};
|
||||
221
crates/omnigraph-compiler/src/query/ast.rs
Normal file
221
crates/omnigraph-compiler/src/query/ast.rs
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
pub const NOW_PARAM_NAME: &str = "__nanograph_now";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QueryFile {
|
||||
pub queries: Vec<QueryDecl>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QueryDecl {
|
||||
pub name: String,
|
||||
pub description: Option<String>,
|
||||
pub instruction: Option<String>,
|
||||
pub params: Vec<Param>,
|
||||
pub match_clause: Vec<Clause>,
|
||||
pub return_clause: Vec<Projection>,
|
||||
pub order_clause: Vec<Ordering>,
|
||||
pub limit: Option<u64>,
|
||||
pub mutation: Option<Mutation>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Param {
|
||||
pub name: String,
|
||||
pub type_name: String,
|
||||
pub nullable: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Clause {
|
||||
Binding(Binding),
|
||||
Traversal(Traversal),
|
||||
Filter(Filter),
|
||||
Negation(Vec<Clause>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Binding {
|
||||
pub variable: String,
|
||||
pub type_name: String,
|
||||
pub prop_matches: Vec<PropMatch>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PropMatch {
|
||||
pub prop_name: String,
|
||||
pub value: MatchValue,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MatchValue {
|
||||
Literal(Literal),
|
||||
Variable(String),
|
||||
Now,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Traversal {
|
||||
pub src: String,
|
||||
pub edge_name: String,
|
||||
pub dst: String,
|
||||
pub min_hops: u32,
|
||||
pub max_hops: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Filter {
|
||||
pub left: Expr,
|
||||
pub op: CompOp,
|
||||
pub right: Expr,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CompOp {
|
||||
Eq,
|
||||
Ne,
|
||||
Gt,
|
||||
Lt,
|
||||
Ge,
|
||||
Le,
|
||||
Contains,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CompOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Eq => write!(f, "="),
|
||||
Self::Ne => write!(f, "!="),
|
||||
Self::Gt => write!(f, ">"),
|
||||
Self::Lt => write!(f, "<"),
|
||||
Self::Ge => write!(f, ">="),
|
||||
Self::Le => write!(f, "<="),
|
||||
Self::Contains => write!(f, "contains"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expr {
|
||||
Now,
|
||||
PropAccess {
|
||||
variable: String,
|
||||
property: String,
|
||||
},
|
||||
Nearest {
|
||||
variable: String,
|
||||
property: String,
|
||||
query: Box<Expr>,
|
||||
},
|
||||
Search {
|
||||
field: Box<Expr>,
|
||||
query: Box<Expr>,
|
||||
},
|
||||
Fuzzy {
|
||||
field: Box<Expr>,
|
||||
query: Box<Expr>,
|
||||
max_edits: Option<Box<Expr>>,
|
||||
},
|
||||
MatchText {
|
||||
field: Box<Expr>,
|
||||
query: Box<Expr>,
|
||||
},
|
||||
Bm25 {
|
||||
field: Box<Expr>,
|
||||
query: Box<Expr>,
|
||||
},
|
||||
Rrf {
|
||||
primary: Box<Expr>,
|
||||
secondary: Box<Expr>,
|
||||
k: Option<Box<Expr>>,
|
||||
},
|
||||
Variable(String),
|
||||
Literal(Literal),
|
||||
Aggregate {
|
||||
func: AggFunc,
|
||||
arg: Box<Expr>,
|
||||
},
|
||||
AliasRef(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum AggFunc {
|
||||
Count,
|
||||
Sum,
|
||||
Avg,
|
||||
Min,
|
||||
Max,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for AggFunc {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Count => write!(f, "count"),
|
||||
Self::Sum => write!(f, "sum"),
|
||||
Self::Avg => write!(f, "avg"),
|
||||
Self::Min => write!(f, "min"),
|
||||
Self::Max => write!(f, "max"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Literal {
|
||||
String(String),
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
Bool(bool),
|
||||
Date(String),
|
||||
DateTime(String),
|
||||
List(Vec<Literal>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Projection {
|
||||
pub expr: Expr,
|
||||
pub alias: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Ordering {
|
||||
pub expr: Expr,
|
||||
pub descending: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Mutation {
|
||||
Insert(InsertMutation),
|
||||
Update(UpdateMutation),
|
||||
Delete(DeleteMutation),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct InsertMutation {
|
||||
pub type_name: String,
|
||||
pub assignments: Vec<MutationAssignment>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct UpdateMutation {
|
||||
pub type_name: String,
|
||||
pub assignments: Vec<MutationAssignment>,
|
||||
pub predicate: MutationPredicate,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DeleteMutation {
|
||||
pub type_name: String,
|
||||
pub predicate: MutationPredicate,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MutationAssignment {
|
||||
pub property: String,
|
||||
pub value: MatchValue,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MutationPredicate {
|
||||
pub property: String,
|
||||
pub op: CompOp,
|
||||
pub value: MatchValue,
|
||||
}
|
||||
3
crates/omnigraph-compiler/src/query/mod.rs
Normal file
3
crates/omnigraph-compiler/src/query/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
pub mod ast;
|
||||
pub mod parser;
|
||||
pub mod typecheck;
|
||||
1689
crates/omnigraph-compiler/src/query/parser.rs
Normal file
1689
crates/omnigraph-compiler/src/query/parser.rs
Normal file
File diff suppressed because it is too large
Load diff
114
crates/omnigraph-compiler/src/query/query.pest
Normal file
114
crates/omnigraph-compiler/src/query/query.pest
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
// NanoGraph Query Grammar (.gq files)
|
||||
|
||||
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
|
||||
COMMENT = _{ LINE_COMMENT | BLOCK_COMMENT }
|
||||
LINE_COMMENT = _{ "//" ~ (!"\n" ~ ANY)* }
|
||||
BLOCK_COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
|
||||
|
||||
query_file = { SOI ~ query_decl* ~ EOI }
|
||||
|
||||
query_decl = {
|
||||
"query" ~ ident ~ "(" ~ param_list? ~ ")" ~ query_annotation* ~ "{"
|
||||
~ query_body
|
||||
~ "}"
|
||||
}
|
||||
query_annotation = { description_annotation | instruction_annotation }
|
||||
description_annotation = { "@description" ~ "(" ~ string_lit ~ ")" }
|
||||
instruction_annotation = { "@instruction" ~ "(" ~ string_lit ~ ")" }
|
||||
|
||||
query_body = { read_query_body | mutation_stmt }
|
||||
read_query_body = {
|
||||
match_clause
|
||||
~ return_clause
|
||||
~ order_clause?
|
||||
~ limit_clause?
|
||||
}
|
||||
|
||||
mutation_stmt = { insert_stmt | update_stmt | delete_stmt }
|
||||
insert_stmt = { "insert" ~ type_name ~ "{" ~ mutation_assignment+ ~ "}" }
|
||||
update_stmt = { "update" ~ type_name ~ "set" ~ "{" ~ mutation_assignment+ ~ "}" ~ "where" ~ mutation_predicate }
|
||||
delete_stmt = { "delete" ~ type_name ~ "where" ~ mutation_predicate }
|
||||
mutation_assignment = { ident ~ ":" ~ match_value ~ ","? }
|
||||
mutation_predicate = { ident ~ comp_op ~ match_value }
|
||||
|
||||
param_list = { param ~ ("," ~ param)* }
|
||||
param = { variable ~ ":" ~ type_ref }
|
||||
|
||||
type_ref = { (list_type | base_type | vector_type) ~ "?"? }
|
||||
list_type = { "[" ~ base_type ~ "]" }
|
||||
vector_type = { "Vector" ~ "(" ~ integer ~ ")" }
|
||||
base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }
|
||||
|
||||
match_clause = { "match" ~ "{" ~ clause+ ~ "}" }
|
||||
|
||||
clause = { negation | binding | traversal | filter | text_search_clause }
|
||||
text_search_clause = { search_call | fuzzy_call | match_text_call }
|
||||
|
||||
// Binding: $p: Person { name: "Alice" }
|
||||
binding = { variable ~ ":" ~ type_name ~ ("{" ~ prop_match_list ~ "}")? }
|
||||
|
||||
prop_match_list = { prop_match ~ ("," ~ prop_match)* ~ ","? }
|
||||
prop_match = { ident ~ ":" ~ match_value }
|
||||
match_value = { literal | variable | now_call }
|
||||
|
||||
// Traversal: $p knows $f
|
||||
traversal = { variable ~ edge_ident ~ traversal_bounds? ~ variable }
|
||||
traversal_bounds = { "{" ~ integer ~ "," ~ integer? ~ "}" }
|
||||
|
||||
// Filter: $f.age > 25
|
||||
filter = { expr ~ filter_op ~ expr }
|
||||
|
||||
// Negation: not { ... }
|
||||
negation = { "not" ~ "{" ~ clause+ ~ "}" }
|
||||
|
||||
// Return clause — projections separated by commas or newlines
|
||||
return_clause = { "return" ~ "{" ~ projection+ ~ "}" }
|
||||
projection = { expr ~ ("as" ~ ident)? ~ ","? }
|
||||
|
||||
// Order clause
|
||||
order_clause = { "order" ~ "{" ~ ordering ~ ("," ~ ordering)* ~ "}" }
|
||||
ordering = { nearest_ordering | (expr ~ order_dir?) }
|
||||
nearest_ordering = { "nearest" ~ "(" ~ prop_access ~ "," ~ expr ~ ")" }
|
||||
order_dir = { "asc" | "desc" }
|
||||
|
||||
// Limit clause
|
||||
limit_clause = { "limit" ~ integer }
|
||||
|
||||
// Expressions
|
||||
expr = { now_call | nearest_ordering | search_call | fuzzy_call | match_text_call | bm25_call | rrf_call | agg_call | prop_access | variable | literal | ident }
|
||||
now_call = { "now" ~ "(" ~ ")" }
|
||||
search_call = { "search" ~ "(" ~ expr ~ "," ~ expr ~ ")" }
|
||||
fuzzy_call = { "fuzzy" ~ "(" ~ expr ~ "," ~ expr ~ ("," ~ expr)? ~ ")" }
|
||||
match_text_call = { "match_text" ~ "(" ~ expr ~ "," ~ expr ~ ")" }
|
||||
bm25_call = { "bm25" ~ "(" ~ expr ~ "," ~ expr ~ ")" }
|
||||
rank_expr = { nearest_ordering | bm25_call }
|
||||
rrf_call = { "rrf" ~ "(" ~ rank_expr ~ "," ~ rank_expr ~ ("," ~ expr)? ~ ")" }
|
||||
|
||||
prop_access = { variable ~ "." ~ ident }
|
||||
|
||||
agg_call = { agg_func ~ "(" ~ expr ~ ")" }
|
||||
agg_func = { "count" | "sum" | "avg" | "min" | "max" }
|
||||
|
||||
comp_op = { ">=" | "<=" | "!=" | ">" | "<" | "=" }
|
||||
filter_op = { "contains" | comp_op }
|
||||
|
||||
// Terminals
|
||||
variable = @{ "$" ~ (ident_chars | "_") }
|
||||
ident_chars = @{ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
|
||||
|
||||
// Edge identifier — lowercase start, same as ident but used in traversal context
|
||||
// Must not match keywords
|
||||
edge_ident = @{ !("not" ~ !ASCII_ALPHANUMERIC) ~ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
|
||||
|
||||
type_name = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }
|
||||
ident = @{ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
|
||||
|
||||
literal = { list_lit | datetime_lit | date_lit | string_lit | float_lit | integer | bool_lit }
|
||||
date_lit = { "date" ~ "(" ~ string_lit ~ ")" }
|
||||
datetime_lit = { "datetime" ~ "(" ~ string_lit ~ ")" }
|
||||
list_lit = { "[" ~ (literal ~ ("," ~ literal)*)? ~ "]" }
|
||||
string_lit = @{ "\"" ~ string_char* ~ "\"" }
|
||||
string_char = @{ !("\"" | "\\") ~ ANY | "\\" ~ ANY }
|
||||
float_lit = @{ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
|
||||
integer = @{ ASCII_DIGIT+ }
|
||||
bool_lit = { "true" | "false" }
|
||||
2776
crates/omnigraph-compiler/src/query/typecheck.rs
Normal file
2776
crates/omnigraph-compiler/src/query/typecheck.rs
Normal file
File diff suppressed because it is too large
Load diff
892
crates/omnigraph-compiler/src/query_input.rs
Normal file
892
crates/omnigraph-compiler/src/query_input.rs
Normal file
|
|
@ -0,0 +1,892 @@
|
|||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::NanoError;
|
||||
use crate::ir::ParamMap;
|
||||
use crate::json_output::{JS_MAX_SAFE_INTEGER_U64, is_js_safe_integer_i64};
|
||||
use crate::query::ast::{Literal, Param, QueryDecl};
|
||||
use crate::query::parser::parse_query;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum JsonParamMode {
|
||||
Standard,
|
||||
JavaScript,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum RunInputError {
|
||||
Core(NanoError),
|
||||
Message(String),
|
||||
}
|
||||
|
||||
impl RunInputError {
|
||||
fn message(message: impl Into<String>) -> Self {
|
||||
Self::Message(message.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RunInputError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Core(err) => err.fmt(f),
|
||||
Self::Message(message) => f.write_str(message),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for RunInputError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
match self {
|
||||
Self::Core(err) => Some(err),
|
||||
Self::Message(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NanoError> for RunInputError {
|
||||
fn from(value: NanoError) -> Self {
|
||||
Self::Core(value)
|
||||
}
|
||||
}
|
||||
|
||||
pub type RunInputResult<T> = std::result::Result<T, RunInputError>;
|
||||
|
||||
pub trait ToParam {
|
||||
fn to_param(self) -> crate::error::Result<Literal>;
|
||||
}
|
||||
|
||||
impl ToParam for Literal {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for &Literal {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for String {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::String(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for &String {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::String(self.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for &str {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::String(self.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for bool {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Bool(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for i8 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Integer(i64::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for i16 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Integer(i64::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for i32 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Integer(i64::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for i64 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Integer(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for isize {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let value = i64::try_from(self).map_err(|_| {
|
||||
NanoError::Execution(format!(
|
||||
"param value {} exceeds current engine range for numeric literals (max {})",
|
||||
self,
|
||||
i64::MAX
|
||||
))
|
||||
})?;
|
||||
Ok(Literal::Integer(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for u8 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Integer(i64::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for u16 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Integer(i64::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for u32 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
Ok(Literal::Integer(i64::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for u64 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let value = i64::try_from(self).map_err(|_| {
|
||||
NanoError::Execution(format!(
|
||||
"param value {} exceeds current engine range for numeric literals (max {})",
|
||||
self,
|
||||
i64::MAX
|
||||
))
|
||||
})?;
|
||||
Ok(Literal::Integer(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for usize {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let value = i64::try_from(self).map_err(|_| {
|
||||
NanoError::Execution(format!(
|
||||
"param value {} exceeds current engine range for numeric literals (max {})",
|
||||
self,
|
||||
i64::MAX
|
||||
))
|
||||
})?;
|
||||
Ok(Literal::Integer(value))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for f32 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
if !self.is_finite() {
|
||||
return Err(NanoError::Execution(format!(
|
||||
"invalid float parameter {}",
|
||||
self
|
||||
)));
|
||||
}
|
||||
Ok(Literal::Float(f64::from(self)))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToParam for f64 {
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
if !self.is_finite() {
|
||||
return Err(NanoError::Execution(format!(
|
||||
"invalid float parameter {}",
|
||||
self
|
||||
)));
|
||||
}
|
||||
Ok(Literal::Float(self))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ToParam for Vec<T>
|
||||
where
|
||||
T: ToParam,
|
||||
{
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let mut out = Vec::with_capacity(self.len());
|
||||
for value in self {
|
||||
out.push(value.to_param()?);
|
||||
}
|
||||
Ok(Literal::List(out))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ToParam for &[T]
|
||||
where
|
||||
T: Clone + ToParam,
|
||||
{
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let mut out = Vec::with_capacity(self.len());
|
||||
for value in self {
|
||||
out.push(value.clone().to_param()?);
|
||||
}
|
||||
Ok(Literal::List(out))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, const N: usize> ToParam for [T; N]
|
||||
where
|
||||
T: ToParam,
|
||||
{
|
||||
fn to_param(self) -> crate::error::Result<Literal> {
|
||||
let mut out = Vec::with_capacity(N);
|
||||
for value in self {
|
||||
out.push(value.to_param()?);
|
||||
}
|
||||
Ok(Literal::List(out))
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! params {
|
||||
() => {
|
||||
::std::result::Result::Ok($crate::ParamMap::new())
|
||||
};
|
||||
($($key:expr => $value:expr),+ $(,)?) => {{
|
||||
(|| -> $crate::error::Result<$crate::ParamMap> {
|
||||
let mut map = $crate::ParamMap::new();
|
||||
$(
|
||||
map.insert(::std::convert::Into::<String>::into($key), $crate::ToParam::to_param($value)?);
|
||||
)+
|
||||
Ok(map)
|
||||
})()
|
||||
}};
|
||||
}
|
||||
|
||||
pub fn find_named_query(query_source: &str, query_name: &str) -> RunInputResult<QueryDecl> {
|
||||
let queries = parse_query(query_source)?;
|
||||
queries
|
||||
.queries
|
||||
.into_iter()
|
||||
.find(|query| query.name == query_name)
|
||||
.ok_or_else(|| RunInputError::message(format!("query '{}' not found", query_name)))
|
||||
}
|
||||
|
||||
pub fn json_params_to_param_map(
|
||||
params: Option<&Value>,
|
||||
query_params: &[Param],
|
||||
mode: JsonParamMode,
|
||||
) -> RunInputResult<ParamMap> {
|
||||
let mut map = ParamMap::new();
|
||||
let object = match params {
|
||||
Some(Value::Object(object)) => object,
|
||||
Some(Value::Null) | None => return Ok(map),
|
||||
Some(other) => {
|
||||
let message = match mode {
|
||||
JsonParamMode::Standard => "params must be a JSON object".to_string(),
|
||||
JsonParamMode::JavaScript => {
|
||||
format!("params must be an object, got {}", json_type_name(other))
|
||||
}
|
||||
};
|
||||
return Err(RunInputError::message(message));
|
||||
}
|
||||
};
|
||||
|
||||
for (key, value) in object {
|
||||
let decl = query_params.iter().find(|param| param.name == *key);
|
||||
let literal = if let Some(decl) = decl {
|
||||
json_value_to_literal_typed(key, value, &decl.type_name, mode)?
|
||||
} else {
|
||||
json_value_to_literal_inferred(key, value, mode)?
|
||||
};
|
||||
map.insert(key.clone(), literal);
|
||||
}
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
fn json_value_to_literal_typed(
|
||||
key: &str,
|
||||
value: &Value,
|
||||
type_name: &str,
|
||||
mode: JsonParamMode,
|
||||
) -> RunInputResult<Literal> {
|
||||
match type_name {
|
||||
"String" => match value {
|
||||
Value::String(value) => Ok(Literal::String(value.clone())),
|
||||
other => Err(RunInputError::message(format!(
|
||||
"param '{}': expected string, got {}",
|
||||
key,
|
||||
json_type_name(other)
|
||||
))),
|
||||
},
|
||||
"I32" => match mode {
|
||||
JsonParamMode::Standard => {
|
||||
let value = parse_i64_param(key, value, mode)?;
|
||||
let value = i32::try_from(value).map_err(|_| {
|
||||
RunInputError::message(format!("param '{}': value {} exceeds I32", key, value))
|
||||
})?;
|
||||
Ok(Literal::Integer(i64::from(value)))
|
||||
}
|
||||
JsonParamMode::JavaScript => {
|
||||
let value = parse_i64_param(key, value, mode)?;
|
||||
let value = i32::try_from(value).map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': value {} exceeds I32 range",
|
||||
key, value
|
||||
))
|
||||
})?;
|
||||
Ok(Literal::Integer(i64::from(value)))
|
||||
}
|
||||
},
|
||||
"I64" => Ok(Literal::Integer(parse_i64_param(key, value, mode)?)),
|
||||
"U32" => {
|
||||
let value = parse_u64_param(key, value, mode)?;
|
||||
let value = match mode {
|
||||
JsonParamMode::Standard => u32::try_from(value).map_err(|_| {
|
||||
RunInputError::message(format!("param '{}': value {} exceeds U32", key, value))
|
||||
})?,
|
||||
JsonParamMode::JavaScript => u32::try_from(value).map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': value {} exceeds U32 range",
|
||||
key, value
|
||||
))
|
||||
})?,
|
||||
};
|
||||
Ok(Literal::Integer(i64::from(value)))
|
||||
}
|
||||
"U64" => {
|
||||
let value = parse_u64_param(key, value, mode)?;
|
||||
let value = match mode {
|
||||
JsonParamMode::Standard => i64::try_from(value).map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': value {} exceeds current engine range for U64 (max {})",
|
||||
key,
|
||||
value,
|
||||
i64::MAX
|
||||
))
|
||||
})?,
|
||||
JsonParamMode::JavaScript => i64::try_from(value).map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': value {} exceeds current engine range for U64 parameters (max {})",
|
||||
key,
|
||||
value,
|
||||
i64::MAX
|
||||
))
|
||||
})?,
|
||||
};
|
||||
Ok(Literal::Integer(value))
|
||||
}
|
||||
"F32" | "F64" => {
|
||||
let value = value.as_f64().ok_or_else(|| match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': expected float", key))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': expected float, got {}",
|
||||
key,
|
||||
json_type_name(value)
|
||||
)),
|
||||
})?;
|
||||
Ok(Literal::Float(value))
|
||||
}
|
||||
"Bool" => {
|
||||
let value = value.as_bool().ok_or_else(|| match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': expected boolean", key))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': expected boolean, got {}",
|
||||
key,
|
||||
json_type_name(value)
|
||||
)),
|
||||
})?;
|
||||
Ok(Literal::Bool(value))
|
||||
}
|
||||
"Date" => match value {
|
||||
Value::String(value) => Ok(Literal::Date(value.clone())),
|
||||
other => Err(match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': expected date string", key))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': expected date string, got {}",
|
||||
key,
|
||||
json_type_name(other)
|
||||
)),
|
||||
}),
|
||||
},
|
||||
"DateTime" => match value {
|
||||
Value::String(value) => Ok(Literal::DateTime(value.clone())),
|
||||
other => Err(match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': expected datetime string", key))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': expected datetime string, got {}",
|
||||
key,
|
||||
json_type_name(other)
|
||||
)),
|
||||
}),
|
||||
},
|
||||
"Blob" => match value {
|
||||
Value::String(value) => Ok(Literal::String(value.clone())),
|
||||
other => Err(RunInputError::message(format!(
|
||||
"param '{}': expected blob URI string, got {}",
|
||||
key,
|
||||
json_type_name(other)
|
||||
))),
|
||||
},
|
||||
other if parse_list_item_type(other).is_some() => {
|
||||
let item_type = parse_list_item_type(other).unwrap();
|
||||
let items = value.as_array().ok_or_else(|| match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': expected array for {}", key, other))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': expected array for {}, got {}",
|
||||
key,
|
||||
other,
|
||||
json_type_name(value)
|
||||
)),
|
||||
})?;
|
||||
let mut out = Vec::with_capacity(items.len());
|
||||
for item in items {
|
||||
out.push(json_value_to_literal_typed(key, item, item_type, mode)?);
|
||||
}
|
||||
Ok(Literal::List(out))
|
||||
}
|
||||
other if other.starts_with("Vector(") => {
|
||||
let expected_dim = parse_vector_dim(other).ok_or_else(|| match mode {
|
||||
JsonParamMode::Standard => RunInputError::message(format!(
|
||||
"param '{}': invalid vector type '{}'",
|
||||
key, other
|
||||
)),
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': invalid vector type '{}' (expected Vector(N))",
|
||||
key, other
|
||||
)),
|
||||
})?;
|
||||
let items = value.as_array().ok_or_else(|| match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': expected array for {}", key, other))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': expected array for {}, got {}",
|
||||
key,
|
||||
other,
|
||||
json_type_name(value)
|
||||
)),
|
||||
})?;
|
||||
if items.len() != expected_dim {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': expected {} values for {}, got {}",
|
||||
key,
|
||||
expected_dim,
|
||||
other,
|
||||
items.len()
|
||||
)));
|
||||
}
|
||||
let mut out = Vec::with_capacity(items.len());
|
||||
for item in items {
|
||||
let value = item.as_f64().ok_or_else(|| match mode {
|
||||
JsonParamMode::Standard => RunInputError::message(format!(
|
||||
"param '{}': vector element is not numeric",
|
||||
key
|
||||
)),
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': vector element '{}' is not numeric",
|
||||
key, item
|
||||
)),
|
||||
})?;
|
||||
out.push(Literal::Float(value));
|
||||
}
|
||||
Ok(Literal::List(out))
|
||||
}
|
||||
_ => match value {
|
||||
Value::String(value) => Ok(Literal::String(value.clone())),
|
||||
other => Err(RunInputError::message(format!(
|
||||
"param '{}': expected string for type '{}', got {}",
|
||||
key,
|
||||
type_name,
|
||||
json_type_name(other)
|
||||
))),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn json_value_to_literal_inferred(
|
||||
key: &str,
|
||||
value: &Value,
|
||||
mode: JsonParamMode,
|
||||
) -> RunInputResult<Literal> {
|
||||
match value {
|
||||
Value::String(value) => Ok(Literal::String(value.clone())),
|
||||
Value::Bool(value) => Ok(Literal::Bool(*value)),
|
||||
Value::Number(number) => match mode {
|
||||
JsonParamMode::Standard => {
|
||||
if let Some(value) = number.as_i64() {
|
||||
Ok(Literal::Integer(value))
|
||||
} else if let Some(value) = number.as_f64() {
|
||||
Ok(Literal::Float(value))
|
||||
} else {
|
||||
Err(RunInputError::message(format!(
|
||||
"param '{}': unsupported numeric value",
|
||||
key
|
||||
)))
|
||||
}
|
||||
}
|
||||
JsonParamMode::JavaScript => {
|
||||
if let Some(value) = number.as_i64() {
|
||||
if !is_js_safe_integer_i64(value) {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': integer {} exceeds JS safe integer range; use a decimal string and a typed query parameter for exact values",
|
||||
key, value
|
||||
)));
|
||||
}
|
||||
Ok(Literal::Integer(value))
|
||||
} else if let Some(value) = number.as_u64() {
|
||||
if value > JS_MAX_SAFE_INTEGER_U64 {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': integer {} exceeds JS safe integer range; use a decimal string and a typed query parameter for exact values",
|
||||
key, value
|
||||
)));
|
||||
}
|
||||
let value = i64::try_from(value).map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': integer {} exceeds supported range (max {})",
|
||||
key,
|
||||
value,
|
||||
i64::MAX
|
||||
))
|
||||
})?;
|
||||
Ok(Literal::Integer(value))
|
||||
} else if let Some(value) = number.as_f64() {
|
||||
Ok(Literal::Float(value))
|
||||
} else {
|
||||
Err(RunInputError::message(format!(
|
||||
"param '{}': unsupported number value",
|
||||
key
|
||||
)))
|
||||
}
|
||||
}
|
||||
},
|
||||
Value::Array(values) => {
|
||||
let mut out = Vec::with_capacity(values.len());
|
||||
for value in values {
|
||||
out.push(json_value_to_literal_inferred(key, value, mode)?);
|
||||
}
|
||||
Ok(Literal::List(out))
|
||||
}
|
||||
Value::Null => Err(match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': null is not supported", key))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': null values are not supported as query parameters",
|
||||
key
|
||||
)),
|
||||
}),
|
||||
Value::Object(_) => Err(match mode {
|
||||
JsonParamMode::Standard => {
|
||||
RunInputError::message(format!("param '{}': object is not supported", key))
|
||||
}
|
||||
JsonParamMode::JavaScript => RunInputError::message(format!(
|
||||
"param '{}': object values are not supported as query parameters",
|
||||
key
|
||||
)),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_i64_param(key: &str, value: &Value, mode: JsonParamMode) -> RunInputResult<i64> {
|
||||
match mode {
|
||||
JsonParamMode::Standard => match value {
|
||||
Value::Number(number) => number.as_i64().ok_or_else(|| {
|
||||
RunInputError::message(format!("param '{}': expected integer number", key))
|
||||
}),
|
||||
Value::String(value) => value.parse::<i64>().map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': expected integer string, got '{}'",
|
||||
key, value
|
||||
))
|
||||
}),
|
||||
_ => Err(RunInputError::message(format!(
|
||||
"param '{}': expected integer",
|
||||
key
|
||||
))),
|
||||
},
|
||||
JsonParamMode::JavaScript => match value {
|
||||
Value::Number(number) => {
|
||||
let parsed = if let Some(parsed) = number.as_i64() {
|
||||
parsed
|
||||
} else if let Some(parsed) = number.as_f64() {
|
||||
if !parsed.is_finite() || parsed.fract() != 0.0 {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': expected integer, got number",
|
||||
key
|
||||
)));
|
||||
}
|
||||
if parsed < i64::MIN as f64 || parsed > i64::MAX as f64 {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': integer {} is outside i64 range",
|
||||
key, parsed
|
||||
)));
|
||||
}
|
||||
parsed as i64
|
||||
} else {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': expected integer, got number",
|
||||
key
|
||||
)));
|
||||
};
|
||||
if !is_js_safe_integer_i64(parsed) {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': integer {} exceeds JS safe integer range; pass a decimal string for exact values",
|
||||
key, parsed
|
||||
)));
|
||||
}
|
||||
Ok(parsed)
|
||||
}
|
||||
Value::String(value) => value.parse::<i64>().map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': expected integer string, got '{}'",
|
||||
key, value
|
||||
))
|
||||
}),
|
||||
other => Err(RunInputError::message(format!(
|
||||
"param '{}': expected integer, got {}",
|
||||
key,
|
||||
json_type_name(other)
|
||||
))),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_u64_param(key: &str, value: &Value, mode: JsonParamMode) -> RunInputResult<u64> {
|
||||
match mode {
|
||||
JsonParamMode::Standard => match value {
|
||||
Value::Number(number) => number.as_u64().ok_or_else(|| {
|
||||
RunInputError::message(format!("param '{}': expected unsigned integer number", key))
|
||||
}),
|
||||
Value::String(value) => value.parse::<u64>().map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': expected unsigned integer string, got '{}'",
|
||||
key, value
|
||||
))
|
||||
}),
|
||||
_ => Err(RunInputError::message(format!(
|
||||
"param '{}': expected unsigned integer",
|
||||
key
|
||||
))),
|
||||
},
|
||||
JsonParamMode::JavaScript => match value {
|
||||
Value::Number(number) => {
|
||||
let parsed = if let Some(parsed) = number.as_u64() {
|
||||
parsed
|
||||
} else if let Some(parsed) = number.as_f64() {
|
||||
if !parsed.is_finite() || parsed.fract() != 0.0 || parsed < 0.0 {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': expected unsigned integer, got number",
|
||||
key
|
||||
)));
|
||||
}
|
||||
if parsed > u64::MAX as f64 {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': integer {} is outside u64 range",
|
||||
key, parsed
|
||||
)));
|
||||
}
|
||||
parsed as u64
|
||||
} else {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': expected unsigned integer, got number",
|
||||
key
|
||||
)));
|
||||
};
|
||||
if parsed > JS_MAX_SAFE_INTEGER_U64 {
|
||||
return Err(RunInputError::message(format!(
|
||||
"param '{}': integer {} exceeds JS safe integer range; pass a decimal string for exact values",
|
||||
key, parsed
|
||||
)));
|
||||
}
|
||||
Ok(parsed)
|
||||
}
|
||||
Value::String(value) => value.parse::<u64>().map_err(|_| {
|
||||
RunInputError::message(format!(
|
||||
"param '{}': expected unsigned integer string, got '{}'",
|
||||
key, value
|
||||
))
|
||||
}),
|
||||
other => Err(RunInputError::message(format!(
|
||||
"param '{}': expected unsigned integer, got {}",
|
||||
key,
|
||||
json_type_name(other)
|
||||
))),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_vector_dim(type_name: &str) -> Option<usize> {
|
||||
let dim = type_name
|
||||
.strip_prefix("Vector(")?
|
||||
.strip_suffix(')')?
|
||||
.parse::<usize>()
|
||||
.ok()?;
|
||||
if dim == 0 { None } else { Some(dim) }
|
||||
}
|
||||
|
||||
fn parse_list_item_type(type_name: &str) -> Option<&str> {
|
||||
Some(type_name.strip_prefix('[')?.strip_suffix(']')?.trim())
|
||||
}
|
||||
|
||||
fn json_type_name(value: &Value) -> &'static str {
|
||||
match value {
|
||||
Value::Null => "null",
|
||||
Value::Bool(_) => "boolean",
|
||||
Value::Number(_) => "number",
|
||||
Value::String(_) => "string",
|
||||
Value::Array(_) => "array",
|
||||
Value::Object(_) => "object",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_json::json;
|
||||
|
||||
use super::{JsonParamMode, ToParam, find_named_query, json_params_to_param_map};
|
||||
use crate::query::ast::Literal;
|
||||
|
||||
#[test]
|
||||
fn js_mode_rejects_unsafe_integer_numbers() {
|
||||
let query = find_named_query(
|
||||
"query find($id: U64) { match { $u: User } return { $u } }",
|
||||
"find",
|
||||
)
|
||||
.expect("query should parse");
|
||||
|
||||
let error = json_params_to_param_map(
|
||||
Some(&json!({ "id": 9_007_199_254_740_992u64 })),
|
||||
&query.params,
|
||||
JsonParamMode::JavaScript,
|
||||
)
|
||||
.expect_err("unsafe integer should fail");
|
||||
|
||||
assert_eq!(
|
||||
error.to_string(),
|
||||
"param 'id': integer 9007199254740992 exceeds JS safe integer range; pass a decimal string for exact values"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn standard_mode_preserves_ffi_param_object_error() {
|
||||
let error = json_params_to_param_map(Some(&json!(["nope"])), &[], JsonParamMode::Standard)
|
||||
.expect_err("non-object params should fail");
|
||||
|
||||
assert_eq!(error.to_string(), "params must be a JSON object");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_param_supports_lists_and_explicit_date_literals() {
|
||||
let vector = vec![1_i32, 2_i32, 3_i32].to_param().expect("vector param");
|
||||
match vector {
|
||||
Literal::List(values) => {
|
||||
assert!(matches!(values.first(), Some(Literal::Integer(1))));
|
||||
assert!(matches!(values.get(1), Some(Literal::Integer(2))));
|
||||
assert!(matches!(values.get(2), Some(Literal::Integer(3))));
|
||||
}
|
||||
other => panic!("expected list param, got {:?}", other),
|
||||
}
|
||||
|
||||
let date = Literal::Date("2026-03-06".to_string())
|
||||
.to_param()
|
||||
.expect("date param");
|
||||
assert!(matches!(date, Literal::Date(ref value) if value == "2026-03-06"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_param_rejects_unsigned_values_outside_engine_range() {
|
||||
let error = u64::MAX.to_param().expect_err("oversized u64 should fail");
|
||||
|
||||
assert_eq!(
|
||||
error.to_string(),
|
||||
format!(
|
||||
"execution error: param value {} exceeds current engine range for numeric literals (max {})",
|
||||
u64::MAX,
|
||||
i64::MAX
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn params_macro_builds_param_map() {
|
||||
let params = params! {
|
||||
"name" => "Alice",
|
||||
"age" => 41_i32,
|
||||
"scores" => [1_u8, 2_u8, 3_u8],
|
||||
"published_at" => Literal::DateTime("2026-03-06T12:00:00Z".to_string()),
|
||||
}
|
||||
.expect("params");
|
||||
|
||||
assert!(matches!(
|
||||
params.get("name"),
|
||||
Some(Literal::String(value)) if value == "Alice"
|
||||
));
|
||||
assert!(matches!(params.get("age"), Some(Literal::Integer(41))));
|
||||
match params.get("scores") {
|
||||
Some(Literal::List(values)) => {
|
||||
assert!(matches!(values.first(), Some(Literal::Integer(1))));
|
||||
assert!(matches!(values.get(1), Some(Literal::Integer(2))));
|
||||
assert!(matches!(values.get(2), Some(Literal::Integer(3))));
|
||||
}
|
||||
other => panic!("expected list param, got {:?}", other),
|
||||
}
|
||||
assert!(matches!(
|
||||
params.get("published_at"),
|
||||
Some(Literal::DateTime(value)) if value == "2026-03-06T12:00:00Z"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typed_json_params_support_list_and_datetime_types() {
|
||||
let query = find_named_query(
|
||||
r#"
|
||||
query q($tags: [String], $days: [Date]?, $due_at: DateTime) {
|
||||
match { $t: Task }
|
||||
return { $t.slug }
|
||||
}
|
||||
"#,
|
||||
"q",
|
||||
)
|
||||
.expect("query");
|
||||
|
||||
let params = json_params_to_param_map(
|
||||
Some(&json!({
|
||||
"tags": ["launch", "priority"],
|
||||
"days": ["2026-04-01", "2026-04-02"],
|
||||
"due_at": "2026-04-03T10:15:00Z"
|
||||
})),
|
||||
&query.params,
|
||||
JsonParamMode::Standard,
|
||||
)
|
||||
.expect("typed params");
|
||||
|
||||
assert!(matches!(
|
||||
params.get("due_at"),
|
||||
Some(Literal::DateTime(value)) if value == "2026-04-03T10:15:00Z"
|
||||
));
|
||||
match params.get("tags") {
|
||||
Some(Literal::List(values)) => {
|
||||
assert!(
|
||||
matches!(values.first(), Some(Literal::String(value)) if value == "launch")
|
||||
);
|
||||
assert!(
|
||||
matches!(values.get(1), Some(Literal::String(value)) if value == "priority")
|
||||
);
|
||||
}
|
||||
other => panic!("expected string list param, got {:?}", other),
|
||||
}
|
||||
match params.get("days") {
|
||||
Some(Literal::List(values)) => {
|
||||
assert!(
|
||||
matches!(values.first(), Some(Literal::Date(value)) if value == "2026-04-01")
|
||||
);
|
||||
assert!(
|
||||
matches!(values.get(1), Some(Literal::Date(value)) if value == "2026-04-02")
|
||||
);
|
||||
}
|
||||
other => panic!("expected date list param, got {:?}", other),
|
||||
}
|
||||
}
|
||||
}
|
||||
286
crates/omnigraph-compiler/src/result.rs
Normal file
286
crates/omnigraph-compiler/src/result.rs
Normal file
|
|
@ -0,0 +1,286 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{RecordBatch, UInt64Array};
|
||||
use arrow_ipc::writer::StreamWriter;
|
||||
use arrow_schema::{DataType, Field, Schema, SchemaRef};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
use crate::error::{NanoError, Result};
|
||||
use crate::json_output::{record_batches_to_json_rows, record_batches_to_rust_json_rows};
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct MutationExecResult {
|
||||
pub affected_nodes: usize,
|
||||
pub affected_edges: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QueryResult {
|
||||
schema: SchemaRef,
|
||||
batches: Vec<RecordBatch>,
|
||||
}
|
||||
|
||||
impl QueryResult {
|
||||
pub fn new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Self {
|
||||
Self { schema, batches }
|
||||
}
|
||||
|
||||
pub fn schema(&self) -> &SchemaRef {
|
||||
&self.schema
|
||||
}
|
||||
|
||||
pub fn batches(&self) -> &[RecordBatch] {
|
||||
&self.batches
|
||||
}
|
||||
|
||||
pub fn into_batches(self) -> Vec<RecordBatch> {
|
||||
self.batches
|
||||
}
|
||||
|
||||
pub fn num_rows(&self) -> usize {
|
||||
self.batches.iter().map(RecordBatch::num_rows).sum()
|
||||
}
|
||||
|
||||
pub fn concat_batches(&self) -> Result<RecordBatch> {
|
||||
if self.batches.is_empty() {
|
||||
return Ok(RecordBatch::new_empty(self.schema.clone()));
|
||||
}
|
||||
|
||||
arrow_select::concat::concat_batches(&self.schema, &self.batches)
|
||||
.map_err(|err| NanoError::Execution(err.to_string()))
|
||||
}
|
||||
|
||||
pub fn to_sdk_json(&self) -> serde_json::Value {
|
||||
serde_json::Value::Array(record_batches_to_json_rows(&self.batches))
|
||||
}
|
||||
|
||||
pub fn to_rust_json(&self) -> serde_json::Value {
|
||||
serde_json::Value::Array(record_batches_to_rust_json_rows(&self.batches))
|
||||
}
|
||||
|
||||
pub fn deserialize<T: DeserializeOwned>(&self) -> Result<T> {
|
||||
serde_json::from_value(self.to_rust_json()).map_err(|err| {
|
||||
NanoError::Execution(format!("failed to deserialize query result: {}", err))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn to_arrow_ipc(&self) -> Result<Vec<u8>> {
|
||||
let mut buffer = Vec::new();
|
||||
let mut writer = StreamWriter::try_new(&mut buffer, &self.schema)?;
|
||||
for batch in &self.batches {
|
||||
writer.write(batch)?;
|
||||
}
|
||||
writer.finish()?;
|
||||
drop(writer);
|
||||
Ok(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct MutationResult {
|
||||
pub affected_nodes: usize,
|
||||
pub affected_edges: usize,
|
||||
}
|
||||
|
||||
impl MutationResult {
|
||||
pub fn to_sdk_json(&self) -> serde_json::Value {
|
||||
serde_json::json!({
|
||||
"affectedNodes": self.affected_nodes,
|
||||
"affectedEdges": self.affected_edges,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn to_record_batch(&self) -> Result<RecordBatch> {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("affected_nodes", DataType::UInt64, false),
|
||||
Field::new("affected_edges", DataType::UInt64, false),
|
||||
]));
|
||||
Ok(RecordBatch::try_new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(UInt64Array::from(vec![self.affected_nodes as u64])),
|
||||
Arc::new(UInt64Array::from(vec![self.affected_edges as u64])),
|
||||
],
|
||||
)?)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<MutationExecResult> for MutationResult {
|
||||
fn from(value: MutationExecResult) -> Self {
|
||||
Self {
|
||||
affected_nodes: value.affected_nodes,
|
||||
affected_edges: value.affected_edges,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RunResult {
|
||||
Query(QueryResult),
|
||||
Mutation(MutationResult),
|
||||
}
|
||||
|
||||
impl RunResult {
|
||||
pub fn to_sdk_json(&self) -> serde_json::Value {
|
||||
match self {
|
||||
Self::Query(result) => result.to_sdk_json(),
|
||||
Self::Mutation(result) => result.to_sdk_json(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_record_batches(self) -> Result<Vec<RecordBatch>> {
|
||||
match self {
|
||||
Self::Query(result) => Ok(result.into_batches()),
|
||||
Self::Mutation(result) => Ok(vec![result.to_record_batch()?]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use arrow_array::Int64Array;
|
||||
use arrow_ipc::reader::StreamReader;
|
||||
use serde::Deserialize;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn query_result_arrow_ipc_round_trips_empty_schema() {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false)]));
|
||||
let result = QueryResult::new(schema.clone(), vec![]);
|
||||
|
||||
let encoded = result.to_arrow_ipc().expect("encode empty result");
|
||||
let reader = StreamReader::try_new(Cursor::new(encoded), None).expect("open stream");
|
||||
|
||||
assert_eq!(reader.schema().as_ref(), schema.as_ref());
|
||||
assert_eq!(reader.count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_result_arrow_ipc_round_trips_batches() {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt64, false)]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(UInt64Array::from(vec![1_u64, 2_u64]))],
|
||||
)
|
||||
.expect("batch");
|
||||
let result = QueryResult::new(schema.clone(), vec![batch]);
|
||||
|
||||
let encoded = result.to_arrow_ipc().expect("encode result");
|
||||
let mut reader = StreamReader::try_new(Cursor::new(encoded), None).expect("open stream");
|
||||
let decoded = reader.next().expect("first batch").expect("decode batch");
|
||||
|
||||
assert_eq!(reader.schema().as_ref(), schema.as_ref());
|
||||
assert_eq!(decoded.num_rows(), 2);
|
||||
assert_eq!(decoded.schema().as_ref(), schema.as_ref());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_result_num_rows_and_concat_cover_multiple_batches() {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt64, false)]));
|
||||
let batch1 = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(UInt64Array::from(vec![1_u64, 2_u64]))],
|
||||
)
|
||||
.expect("batch1");
|
||||
let batch2 = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(UInt64Array::from(vec![3_u64]))],
|
||||
)
|
||||
.expect("batch2");
|
||||
let result = QueryResult::new(schema.clone(), vec![batch1, batch2]);
|
||||
|
||||
assert_eq!(result.num_rows(), 3);
|
||||
|
||||
let concatenated = result.concat_batches().expect("concat batches");
|
||||
let ids = concatenated
|
||||
.column(0)
|
||||
.as_any()
|
||||
.downcast_ref::<UInt64Array>()
|
||||
.expect("u64 ids");
|
||||
assert_eq!(concatenated.schema().as_ref(), schema.as_ref());
|
||||
assert_eq!(ids.values(), &[1, 2, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_result_concat_empty_batches_returns_empty_batch() {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::UInt64, false)]));
|
||||
let result = QueryResult::new(schema.clone(), vec![]);
|
||||
|
||||
let concatenated = result.concat_batches().expect("concat empty");
|
||||
|
||||
assert_eq!(concatenated.schema().as_ref(), schema.as_ref());
|
||||
assert_eq!(concatenated.num_rows(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_result_to_rust_json_preserves_wide_integers() {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("signed", DataType::Int64, false),
|
||||
Field::new("unsigned", DataType::UInt64, false),
|
||||
]));
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int64Array::from(vec![i64::MIN])),
|
||||
Arc::new(UInt64Array::from(vec![u64::MAX])),
|
||||
],
|
||||
)
|
||||
.expect("batch");
|
||||
let result = QueryResult::new(schema, vec![batch]);
|
||||
|
||||
assert_eq!(
|
||||
result.to_rust_json(),
|
||||
serde_json::json!([{
|
||||
"signed": i64::MIN,
|
||||
"unsigned": u64::MAX,
|
||||
}])
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, PartialEq)]
|
||||
struct PersonRow {
|
||||
id: u64,
|
||||
age: i64,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_result_deserialize_decodes_rust_rows() {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::UInt64, false),
|
||||
Field::new("age", DataType::Int64, false),
|
||||
]));
|
||||
let batch1 = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(UInt64Array::from(vec![1_u64])),
|
||||
Arc::new(Int64Array::from(vec![40_i64])),
|
||||
],
|
||||
)
|
||||
.expect("batch1");
|
||||
let batch2 = RecordBatch::try_new(
|
||||
schema,
|
||||
vec![
|
||||
Arc::new(UInt64Array::from(vec![u64::MAX])),
|
||||
Arc::new(Int64Array::from(vec![-5_i64])),
|
||||
],
|
||||
)
|
||||
.expect("batch2");
|
||||
let result = QueryResult::new(batch1.schema(), vec![batch1, batch2]);
|
||||
|
||||
let rows: Vec<PersonRow> = result.deserialize().expect("deserialize rows");
|
||||
|
||||
assert_eq!(
|
||||
rows,
|
||||
vec![
|
||||
PersonRow { id: 1, age: 40 },
|
||||
PersonRow {
|
||||
id: u64::MAX,
|
||||
age: -5,
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
111
crates/omnigraph-compiler/src/schema/ast.rs
Normal file
111
crates/omnigraph-compiler/src/schema/ast.rs
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
use crate::types::PropType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SchemaFile {
|
||||
pub declarations: Vec<SchemaDecl>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum SchemaDecl {
|
||||
Interface(InterfaceDecl),
|
||||
Node(NodeDecl),
|
||||
Edge(EdgeDecl),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct InterfaceDecl {
|
||||
pub name: String,
|
||||
pub properties: Vec<PropDecl>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NodeDecl {
|
||||
pub name: String,
|
||||
pub annotations: Vec<Annotation>,
|
||||
pub implements: Vec<String>,
|
||||
pub properties: Vec<PropDecl>,
|
||||
pub constraints: Vec<Constraint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct EdgeDecl {
|
||||
pub name: String,
|
||||
pub from_type: String,
|
||||
pub to_type: String,
|
||||
pub cardinality: Cardinality,
|
||||
pub annotations: Vec<Annotation>,
|
||||
pub properties: Vec<PropDecl>,
|
||||
pub constraints: Vec<Constraint>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct PropDecl {
|
||||
pub name: String,
|
||||
pub prop_type: PropType,
|
||||
pub annotations: Vec<Annotation>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Annotation {
|
||||
pub name: String,
|
||||
pub value: Option<String>,
|
||||
}
|
||||
|
||||
/// A typed constraint declared in a node or edge body.
|
||||
///
|
||||
/// Property-level annotations (`@key`, `@unique`, `@index`) are desugared
|
||||
/// into these during parsing, so both syntactic positions produce the same
|
||||
/// representation.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Constraint {
|
||||
Key(Vec<String>),
|
||||
Unique(Vec<String>),
|
||||
Index(Vec<String>),
|
||||
Range {
|
||||
property: String,
|
||||
min: Option<ConstraintBound>,
|
||||
max: Option<ConstraintBound>,
|
||||
},
|
||||
Check {
|
||||
property: String,
|
||||
pattern: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// A numeric bound used in `@range` constraints.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum ConstraintBound {
|
||||
Integer(i64),
|
||||
Float(f64),
|
||||
}
|
||||
|
||||
/// Edge cardinality: `@card(min..max)`. Default is `0..*`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct Cardinality {
|
||||
pub min: u32,
|
||||
pub max: Option<u32>,
|
||||
}
|
||||
|
||||
impl Default for Cardinality {
|
||||
fn default() -> Self {
|
||||
Self { min: 0, max: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl Cardinality {
|
||||
pub fn is_default(&self) -> bool {
|
||||
self.min == 0 && self.max.is_none()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_annotation(annotations: &[Annotation], name: &str) -> bool {
|
||||
annotations.iter().any(|ann| ann.name == name)
|
||||
}
|
||||
|
||||
pub fn annotation_value<'a>(annotations: &'a [Annotation], name: &str) -> Option<&'a str> {
|
||||
annotations
|
||||
.iter()
|
||||
.find(|ann| ann.name == name)
|
||||
.and_then(|ann| ann.value.as_deref())
|
||||
}
|
||||
2
crates/omnigraph-compiler/src/schema/mod.rs
Normal file
2
crates/omnigraph-compiler/src/schema/mod.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod ast;
|
||||
pub mod parser;
|
||||
1950
crates/omnigraph-compiler/src/schema/parser.rs
Normal file
1950
crates/omnigraph-compiler/src/schema/parser.rs
Normal file
File diff suppressed because it is too large
Load diff
60
crates/omnigraph-compiler/src/schema/schema.pest
Normal file
60
crates/omnigraph-compiler/src/schema/schema.pest
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
// Omnigraph Schema Grammar (.pg files)
|
||||
|
||||
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
|
||||
COMMENT = _{ LINE_COMMENT | BLOCK_COMMENT }
|
||||
LINE_COMMENT = _{ "//" ~ (!"\n" ~ ANY)* }
|
||||
BLOCK_COMMENT = _{ "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
|
||||
|
||||
schema_file = { SOI ~ schema_decl* ~ EOI }
|
||||
|
||||
schema_decl = { interface_decl | node_decl | edge_decl }
|
||||
|
||||
// interface Named { name: String @key }
|
||||
interface_decl = { "interface" ~ type_name ~ "{" ~ prop_decl* ~ "}" }
|
||||
|
||||
// node Person implements Named, Described { ... }
|
||||
node_decl = { "node" ~ type_name ~ annotation* ~ implements_clause? ~ "{" ~ (prop_decl | body_constraint)* ~ "}" }
|
||||
implements_clause = { "implements" ~ type_name ~ ("," ~ type_name)* }
|
||||
|
||||
// edge Knows: Person -> Person @card(0..1) { ... }
|
||||
// edge Knows: Person -> Person
|
||||
edge_decl = { "edge" ~ type_name ~ ":" ~ type_name ~ "->" ~ type_name ~ cardinality? ~ annotation* ~ ("{" ~ (prop_decl | body_constraint)* ~ "}")? }
|
||||
|
||||
// @card(0..1), @card(1..), @card(0..)
|
||||
cardinality = { "@card" ~ "(" ~ integer ~ ".." ~ integer? ~ ")" }
|
||||
|
||||
prop_decl = { ident ~ ":" ~ type_ref ~ annotation* }
|
||||
|
||||
// Body-level constraints: @key(name), @unique(a, b), @index(a, b), @range(age, 0..200), @check(code, "regex")
|
||||
body_constraint = { "@" ~ constraint_name ~ "(" ~ constraint_args ~ ")" }
|
||||
constraint_name = { "key" | "unique" | "index" | "range" | "check" }
|
||||
constraint_args = { constraint_arg ~ ("," ~ constraint_arg)* }
|
||||
constraint_arg = { range_bound | literal | ident }
|
||||
range_bound = { (signed_float | signed_integer) ~ ".." ~ (signed_float | signed_integer)? | ".." ~ (signed_float | signed_integer) }
|
||||
|
||||
type_ref = { core_type ~ "?"? }
|
||||
core_type = { list_type | enum_type | vector_type | base_type }
|
||||
list_type = { "[" ~ base_type ~ "]" }
|
||||
enum_type = { "enum" ~ "(" ~ enum_value ~ ("," ~ enum_value)* ~ ")" }
|
||||
vector_type = { "Vector" ~ "(" ~ integer ~ ")" }
|
||||
enum_value = @{ (ASCII_ALPHANUMERIC | "_" | "-")+ }
|
||||
|
||||
base_type = { "String" | "Blob" | "Bool" | "I32" | "I64" | "U32" | "U64" | "F32" | "F64" | "DateTime" | "Date" }
|
||||
|
||||
// Annotation rule excludes constraint keywords followed by "(" — those are body_constraints
|
||||
annotation = { "@" ~ !(constraint_name ~ "(") ~ ident ~ ("(" ~ annotation_arg ~ ")")? }
|
||||
annotation_arg = { literal | ident }
|
||||
|
||||
literal = { string_lit | float_lit | integer | bool_lit }
|
||||
|
||||
string_lit = @{ "\"" ~ string_char* ~ "\"" }
|
||||
string_char = @{ !("\"" | "\\") ~ ANY | "\\" ~ ANY }
|
||||
float_lit = @{ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
|
||||
integer = @{ ASCII_DIGIT+ }
|
||||
|
||||
signed_float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ }
|
||||
signed_integer = @{ "-"? ~ ASCII_DIGIT+ }
|
||||
bool_lit = { "true" | "false" }
|
||||
|
||||
type_name = @{ ASCII_ALPHA_UPPER ~ (ASCII_ALPHANUMERIC | "_")* }
|
||||
ident = @{ (ASCII_ALPHA_LOWER | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
|
||||
227
crates/omnigraph-compiler/src/types.rs
Normal file
227
crates/omnigraph-compiler/src/types.rs
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
use arrow_schema::DataType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
const MAX_VECTOR_DIM: u32 = i32::MAX as u32;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum ScalarType {
|
||||
String,
|
||||
Bool,
|
||||
I32,
|
||||
I64,
|
||||
U32,
|
||||
U64,
|
||||
F32,
|
||||
F64,
|
||||
Date,
|
||||
DateTime,
|
||||
Vector(u32),
|
||||
Blob,
|
||||
}
|
||||
|
||||
impl ScalarType {
|
||||
pub fn from_str_name(s: &str) -> Option<Self> {
|
||||
if let Some(inner) = s.strip_prefix("Vector(").and_then(|t| t.strip_suffix(')')) {
|
||||
let dim = inner.parse::<u32>().ok()?;
|
||||
if dim == 0 || dim > MAX_VECTOR_DIM {
|
||||
return None;
|
||||
}
|
||||
return Some(Self::Vector(dim));
|
||||
}
|
||||
|
||||
match s {
|
||||
"String" => Some(Self::String),
|
||||
"Bool" => Some(Self::Bool),
|
||||
"I32" => Some(Self::I32),
|
||||
"I64" => Some(Self::I64),
|
||||
"U32" => Some(Self::U32),
|
||||
"U64" => Some(Self::U64),
|
||||
"F32" => Some(Self::F32),
|
||||
"F64" => Some(Self::F64),
|
||||
"Date" => Some(Self::Date),
|
||||
"DateTime" => Some(Self::DateTime),
|
||||
"Blob" => Some(Self::Blob),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_arrow(&self) -> DataType {
|
||||
match self {
|
||||
Self::String => DataType::Utf8,
|
||||
Self::Bool => DataType::Boolean,
|
||||
Self::I32 => DataType::Int32,
|
||||
Self::I64 => DataType::Int64,
|
||||
Self::U32 => DataType::UInt32,
|
||||
Self::U64 => DataType::UInt64,
|
||||
Self::F32 => DataType::Float32,
|
||||
Self::F64 => DataType::Float64,
|
||||
Self::Date => DataType::Date32,
|
||||
Self::DateTime => DataType::Date64,
|
||||
Self::Blob => DataType::LargeBinary,
|
||||
Self::Vector(dim) => {
|
||||
let dim = i32::try_from(*dim)
|
||||
.expect("vector dimension exceeds Arrow FixedSizeList i32 bound");
|
||||
DataType::FixedSizeList(
|
||||
std::sync::Arc::new(arrow_schema::Field::new("item", DataType::Float32, true)),
|
||||
dim,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_numeric(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Self::I32 | Self::I64 | Self::U32 | Self::U64 | Self::F32 | Self::F64
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ScalarType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
Self::String => "String",
|
||||
Self::Bool => "Bool",
|
||||
Self::I32 => "I32",
|
||||
Self::I64 => "I64",
|
||||
Self::U32 => "U32",
|
||||
Self::U64 => "U64",
|
||||
Self::F32 => "F32",
|
||||
Self::F64 => "F64",
|
||||
Self::Date => "Date",
|
||||
Self::DateTime => "DateTime",
|
||||
Self::Blob => "Blob",
|
||||
Self::Vector(dim) => return write!(f, "Vector({})", dim),
|
||||
};
|
||||
write!(f, "{}", s)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub struct PropType {
|
||||
pub scalar: ScalarType,
|
||||
pub nullable: bool,
|
||||
pub list: bool,
|
||||
pub enum_values: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl PropType {
|
||||
pub fn from_param_type_name(s: &str, nullable: bool) -> Option<Self> {
|
||||
if let Some(inner) = s
|
||||
.strip_prefix('[')
|
||||
.and_then(|value| value.strip_suffix(']'))
|
||||
{
|
||||
let scalar = ScalarType::from_str_name(inner)?;
|
||||
return Some(Self::list_of(scalar, nullable));
|
||||
}
|
||||
|
||||
let scalar = ScalarType::from_str_name(s)?;
|
||||
Some(Self::scalar(scalar, nullable))
|
||||
}
|
||||
|
||||
pub fn scalar(scalar: ScalarType, nullable: bool) -> Self {
|
||||
Self {
|
||||
scalar,
|
||||
nullable,
|
||||
list: false,
|
||||
enum_values: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_of(scalar: ScalarType, nullable: bool) -> Self {
|
||||
Self {
|
||||
scalar,
|
||||
nullable,
|
||||
list: true,
|
||||
enum_values: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn enum_type(mut values: Vec<String>, nullable: bool) -> Self {
|
||||
values.sort();
|
||||
values.dedup();
|
||||
Self {
|
||||
scalar: ScalarType::String,
|
||||
nullable,
|
||||
list: false,
|
||||
enum_values: Some(values),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_enum(&self) -> bool {
|
||||
self.enum_values.is_some()
|
||||
}
|
||||
|
||||
pub fn to_arrow(&self) -> DataType {
|
||||
let scalar_dt = self.scalar.to_arrow();
|
||||
if self.list {
|
||||
DataType::List(std::sync::Arc::new(arrow_schema::Field::new(
|
||||
"item", scalar_dt, true,
|
||||
)))
|
||||
} else {
|
||||
scalar_dt
|
||||
}
|
||||
}
|
||||
|
||||
pub fn display_name(&self) -> String {
|
||||
let base = if let Some(values) = &self.enum_values {
|
||||
format!("enum({})", values.join(", "))
|
||||
} else {
|
||||
self.scalar.to_string()
|
||||
};
|
||||
let wrapped = if self.list {
|
||||
format!("[{}]", base)
|
||||
} else {
|
||||
base
|
||||
};
|
||||
if self.nullable {
|
||||
format!("{}?", wrapped)
|
||||
} else {
|
||||
wrapped
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Direction {
|
||||
Out,
|
||||
In,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use arrow_schema::{DataType, Field};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[test]
|
||||
fn vector_to_arrow_uses_nullable_float32_child() {
|
||||
let dt = ScalarType::Vector(4).to_arrow();
|
||||
assert_eq!(
|
||||
dt,
|
||||
DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 4)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scalar_type_from_str_name_rejects_vector_dimensions_outside_arrow_bounds() {
|
||||
let too_large = format!("Vector({})", (i32::MAX as u64) + 1);
|
||||
assert!(ScalarType::from_str_name(&too_large).is_none());
|
||||
assert_eq!(
|
||||
ScalarType::from_str_name("Vector(2147483647)"),
|
||||
Some(ScalarType::Vector(2147483647))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prop_type_from_param_type_name_supports_lists_and_nullable_scalars() {
|
||||
assert_eq!(
|
||||
PropType::from_param_type_name("[DateTime]", false),
|
||||
Some(PropType::list_of(ScalarType::DateTime, false))
|
||||
);
|
||||
assert_eq!(
|
||||
PropType::from_param_type_name("DateTime", true),
|
||||
Some(PropType::scalar(ScalarType::DateTime, true))
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue